├── README.md ├── data ├── banklist.csv ├── billboard.csv ├── concat_1.csv ├── concat_2.csv ├── concat_3.csv ├── country_timeseries.csv ├── gapminder.tsv ├── pew.csv ├── raw_data_urls.txt ├── scientists.csv ├── survey_person.csv ├── survey_site.csv ├── survey_survey.csv ├── survey_visited.csv ├── tesla_stock_quandl.csv └── weather.csv ├── notebook ├── .ipynb_checkpoints │ ├── 01_done-checkpoint.ipynb │ ├── 02_done-checkpoint.ipynb │ ├── 03_done-checkpoint.ipynb │ ├── 04_done-checkpoint.ipynb │ ├── 05_done-checkpoint.ipynb │ ├── 06_done-checkpoint.ipynb │ ├── 07_done-checkpoint.ipynb │ ├── 08_done-checkpoint.ipynb │ ├── 09_done-checkpoint.ipynb │ ├── 10_done-Copy1-checkpoint.ipynb │ ├── 10_done-checkpoint.ipynb │ ├── 11_done-checkpoint.ipynb │ ├── 12_done-checkpoint.ipynb │ ├── Special-checkpoint.ipynb │ └── hello_jupyter_notebook-checkpoint.ipynb ├── 02_done.ipynb ├── 02_practice.ipynb ├── 03_done.ipynb ├── 03_practice.ipynb ├── 04_done.ipynb ├── 04_practice.ipynb ├── 05_done.ipynb ├── 05_practice.ipynb ├── 06_done.ipynb ├── 06_practice.ipynb ├── 07_done.ipynb ├── 07_practice.ipynb ├── 08_done.ipynb ├── 08_practice.ipynb ├── 09_done.ipynb ├── 09_practice.ipynb ├── 10_done.ipynb ├── 10_practice.ipynb ├── 11_done.ipynb ├── 11_practice.ipynb ├── 12_done.ipynb ├── 12_practice.ipynb ├── Special.ipynb └── hello_jupyter_notebook.ipynb └── output ├── scientist_names_series.csv ├── scientists_df.pickle ├── scientists_df.tsv ├── scientists_df.xlsx ├── scientists_df_no_index.csv ├── scientists_names_series.pickle ├── scientists_names_series_df.xls └── scientists_names_series_df.xlsx /README.md: -------------------------------------------------------------------------------- 1 | # Do it! 판다스 입문 실습 자료입니다. 2 | 3 | 안녕하세요? 이지스퍼블리싱의 담당 편집자 박현규라고 합니다.
4 | 해당 도서의 실습 자료는 방문하신 깃허브의 오른쪽 위에 있는 `Clone or Download` 버튼(초록색 버튼)으로 내려받을 수 있습니다.
5 | 만약 깃허브가 익숙하지 않은 독자 여러분은 [이지스퍼블리싱 홈페이지](http://easyspub.co.kr/30_Menu/DataList/PUB)에 방문하여
6 | 무료 회원가입을 하시고 [자료실]에서 Do it! 판다스 입문을 검색하여 자료를 내려받으세요. 7 | -------------------------------------------------------------------------------- /data/concat_1.csv: -------------------------------------------------------------------------------- 1 | A,B,C,D 2 | a0,b0,c0,d0 3 | a1,b1,c1,d1 4 | a2,b2,c2,d2 5 | a3,b3,c3,d3 6 | -------------------------------------------------------------------------------- /data/concat_2.csv: -------------------------------------------------------------------------------- 1 | A,B,C,D 2 | a4,b4,c4,d4 3 | a5,b5,c5,d5 4 | a6,b6,c6,d6 5 | a7,b7,c7,d7 6 | -------------------------------------------------------------------------------- /data/concat_3.csv: -------------------------------------------------------------------------------- 1 | A,B,C,D 2 | a8,b8,c8,d8 3 | a9,b9,c9,d9 4 | a10,b10,c10,d10 5 | a11,b11,c11,d11 6 | -------------------------------------------------------------------------------- /data/country_timeseries.csv: -------------------------------------------------------------------------------- 1 | Date,Day,Cases_Guinea,Cases_Liberia,Cases_SierraLeone,Cases_Nigeria,Cases_Senegal,Cases_UnitedStates,Cases_Spain,Cases_Mali,Deaths_Guinea,Deaths_Liberia,Deaths_SierraLeone,Deaths_Nigeria,Deaths_Senegal,Deaths_UnitedStates,Deaths_Spain,Deaths_Mali 2 | 1/5/2015,289,2776,,10030,,,,,,1786,,2977,,,,, 3 | 1/4/2015,288,2775,,9780,,,,,,1781,,2943,,,,, 4 | 1/3/2015,287,2769,8166,9722,,,,,,1767,3496,2915,,,,, 5 | 1/2/2015,286,,8157,,,,,,,,3496,,,,,, 6 | 12/31/2014,284,2730,8115,9633,,,,,,1739,3471,2827,,,,, 7 | 12/28/2014,281,2706,8018,9446,,,,,,1708,3423,2758,,,,, 8 | 12/27/2014,280,2695,,9409,,,,,,1697,,2732,,,,, 9 | 12/24/2014,277,2630,7977,9203,,,,,,,3413,2655,,,,, 10 | 12/21/2014,273,2597,,9004,,,,,,1607,,2582,,,,, 11 | 12/20/2014,272,2571,7862,8939,,,,,,1586,3384,2556,,,,, 12 | 12/18/2014,271,,7830,,,,,,,,3376,,,,,, 13 | 12/14/2014,267,2416,,8356,,,,,,1525,,2085,,,,, 14 | 12/9/2014,262,,7797,,,,,,,,3290,,,,,, 15 | 12/7/2014,260,2292,,7897,20,1,4,1,7,1428,,1768,8,0,1,0,6 16 | 12/3/2014,256,,7719,,,,,,,,3177,,,,,, 17 | 11/30/2014,253,2164,,7312,20,1,4,1,7,1327,,1583,8,0,1,0,6 18 | 11/28/2014,251,,7635,,,,,,,,3145,,,,,, 19 | 11/23/2014,246,2134,,6599,20,1,4,1,7,1260,,1398,8,0,1,0,6 20 | 11/22/2014,245,,7168,,,,,,,,3016,,,,,, 21 | 11/18/2014,241,2047,7082,6190,20,1,4,1,6,1214,2963,1267,8,0,1,0,6 22 | 11/16/2014,239,1971,,6073,20,1,4,1,5,1192,,1250,8,0,1,0,5 23 | 11/15/2014,238,,7069,,,,,,,,2964,,,,,, 24 | 11/11/2014,234,1919,,5586,20,1,4,1,4,1166,,1187,8,0,1,0,3 25 | 11/10/2014,233,,6878,,,,,,,,2812,,,,,, 26 | 11/9/2014,232,1878,,5368,20,1,4,1,1,1142,,1169,8,0,1,0,1 27 | 11/8/2014,231,,6822,,,,,,,,2836,,,,,, 28 | 11/4/2014,227,,6619,4862,20,1,4,1,1,,2766,1130,8,0,1,0,1 29 | 11/3/2014,226,1760,,,,,,,,1054,,,,,,, 30 | 11/2/2014,225,1731,,4759,20,1,4,1,1,1041,,1070,8,0,1,0,1 31 | 10/31/2014,222,,6525,,,,,,,,2697,,,,,, 32 | 10/29/2014,220,1667,,5338,20,1,4,1,1,1018,,1510,8,0,1,0,1 33 | 10/27/2014,218,1906,,5235,20,1,4,1,1,997,,1500,8,0,1,0,1 34 | 10/25/2014,216,,6535,,,,,,,,2413,,,,,, 35 | 10/22/2014,214,,,3896,,,4,1,1,,,1281,,,1,0,1 36 | 10/21/2014,213,1553,,,,,,,,926,,,,,,, 37 | 10/19/2014,211,1540,,3706,20,1,3,1,,904,,1259,8,0,1,0, 38 | 10/18/2014,210,,4665,,,,,,,,2705,,,,,, 39 | 10/14/2014,206,1519,,3410,20,1,3,1,,862,,1200,8,0,0,1, 40 | 10/13/2014,205,,4262,,,,,,,,2484,,,,,, 41 | 10/12/2014,204,1472,,3252,20,1,2,1,,843,,1183,8,0,1,1, 42 | 10/11/2014,203,,4249,,,,,,,,2458,,,,,, 43 | 10/8/2014,200,,,2950,20,1,1,1,,,,930,8,0,1,1, 44 | 10/7/2014,199,1350,4076,,,,,,,778,2316,,,,,, 45 | 10/5/2014,197,1298,,2789,20,1,1,,,768,,879,8,0,0,, 46 | 10/4/2014,196,,3924,,,,,,,,2210,,,,,, 47 | 10/1/2014,193,1199,3834,2437,20,1,1,,,739,2069,623,8,0,0,, 48 | 9/28/2014,190,1157,3696,2304,20,1,,,,710,1998,622,8,0,,, 49 | 9/23/2014,185,1074,3458,2021,20,1,,,,648,1830,605,8,0,,, 50 | 9/21/2014,183,1022,3280,1940,20,1,,,,635,1677,597,8,0,,, 51 | 9/20/2014,182,,,1813,,,,,,,,593,,,,, 52 | 9/19/2014,181,1008,,,,,,,,632,,,,,,, 53 | 9/17/2014,179,,3022,,,,,,,,1578,,,,,, 54 | 9/14/2014,176,942,2710,1673,,,,,,601,1459,562,,,,, 55 | 9/13/2014,175,936,,1620,21,1,,,,595,1296,562,8,0,,, 56 | 9/10/2014,172,899,,1478,21,1,,,,568,,536,8,,,, 57 | 9/9/2014,171,,2407,,,,,,,,,,,,,, 58 | 9/7/2014,169,861,2081,1424,21,3,,,,557,1137,524,8,0,,, 59 | 9/5/2014,167,812,1871,1261,22,1,,,,517,1089,491,8,,,, 60 | 8/31/2014,162,771,1698,1216,21,1,,,,494,871,476,7,,,, 61 | 8/26/2014,157,648,1378,1026,17,,,,,430,694,422,6,,,, 62 | 8/20/2014,151,607,1082,910,16,,,,,406,624,392,5,,,, 63 | 8/18/2014,149,579,972,907,15,,,,,396,576,374,4,,,, 64 | 8/16/2014,147,543,834,848,15,,,,,394,466,365,4,,,, 65 | 8/13/2014,144,519,786,810,12,,,,,380,413,348,4,,,, 66 | 8/11/2014,142,510,670,783,12,,,,,377,355,334,3,,,, 67 | 8/9/2014,140,506,599,730,13,,,,,373,323,315,2,,,, 68 | 8/6/2014,137,495,554,717,13,,,,,367,294,298,2,,,, 69 | 8/4/2014,135,495,516,691,9,,,,,363,282,286,1,,,, 70 | 8/1/2014,132,485,468,646,4,,,,,358,255,273,1,,,, 71 | 7/30/2014,129,472,391,574,3,,,,,346,227,252,1,,,, 72 | 7/27/2014,126,460,329,533,1,,,,,339,156,233,1,,,, 73 | 7/23/2014,123,427,249,525,0,,,,,319,129,224,0,,,, 74 | 7/20/2014,120,415,224,454,,,,,,314,127,219,,,,, 75 | 7/17/2014,117,410,196,442,,,,,,310,116,206,,,,, 76 | 7/14/2014,114,411,174,397,,,,,,310,106,197,,,,, 77 | 7/12/2014,112,406,172,386,,,,,,304,105,194,,,,, 78 | 7/8/2014,108,409,142,337,,,,,,309,88,142,,,,, 79 | 7/6/2014,106,408,131,305,,,,,,307,84,127,,,,, 80 | 7/2/2014,102,412,115,252,,,,,,305,75,101,,,,, 81 | 6/30/2014,100,413,107,239,,,,,,303,65,99,,,,, 82 | 6/22/2014,92,,51,,,,,,,,34,,,,,, 83 | 6/20/2014,90,390,,158,,,,,,270,,34,,,,, 84 | 6/19/2014,89,,41,,,,,,,,25,,,,,, 85 | 6/18/2014,88,390,,136,,,,,,267,,28,,,,, 86 | 6/17/2014,87,,,97,,,,,,,,49,,,,, 87 | 6/16/2014,86,398,33,,,,,,,264,24,,,,,, 88 | 6/10/2014,80,351,13,89,,,,,,226,24,7,,,,, 89 | 6/5/2014,75,,13,81,,,,,,,,6,,,,, 90 | 6/3/2014,73,344,13,,,,,,,215,12,6,,,,, 91 | 6/1/2014,71,328,13,79,,,,,,208,12,6,,,,, 92 | 5/28/2014,67,291,13,50,,,,,,193,12,6,,,,, 93 | 5/27/2014,66,281,12,16,,,,,,186,11,5,,,,, 94 | 5/23/2014,62,258,12,0,,,,,,174,11,0,,,,, 95 | 5/12/2014,51,248,12,0,,,,,,171,11,0,,,,, 96 | 5/10/2014,49,233,12,0,,,,,,157,11,0,,,,, 97 | 5/7/2014,46,236,13,0,,,,,,158,11,0,,,,, 98 | 5/5/2014,44,235,13,0,,,,,,157,11,0,,,,, 99 | 5/3/2014,42,231,13,0,,,,,,155,11,0,,,,, 100 | 5/1/2014,40,226,13,0,,,,,,149,11,0,,,,, 101 | 4/26/2014,35,224,,0,,,,,,143,,0,,,,, 102 | 4/24/2014,33,,35,0,,,,,,,,0,,,,, 103 | 4/23/2014,32,218,,0,,,,,,141,,0,,,,, 104 | 4/22/2014,31,,,0,,,,,,,,0,,,,, 105 | 4/21/2014,30,,34,,,,,,,,11,,,,,, 106 | 4/20/2014,29,208,,,,,,,,136,6,,,,,, 107 | 4/17/2014,26,203,27,,,,,,,129,,,,,,, 108 | 4/16/2014,25,197,27,,,,,,,122,13,,,,,, 109 | 4/15/2014,24,,,12,,,,,,,,,,,,, 110 | 4/14/2014,23,168,,,,,,,,108,,,,,,, 111 | 4/11/2014,20,159,26,2,,,,,,106,13,2,,,,, 112 | 4/9/2014,18,158,25,2,,,,,,101,12,2,,,,, 113 | 4/7/2014,16,151,21,2,,,,,,95,10,2,,,,, 114 | 4/4/2014,13,143,18,2,,,,,,86,7,2,,,,, 115 | 4/1/2014,10,127,8,2,,,,,,83,5,2,,,,, 116 | 3/31/2014,9,122,8,2,,,,,,80,4,2,,,,, 117 | 3/29/2014,7,112,7,,,,,,,70,2,,,,,, 118 | 3/28/2014,6,112,3,2,,,,,,70,3,2,,,,, 119 | 3/27/2014,5,103,8,6,,,,,,66,6,5,,,,, 120 | 3/26/2014,4,86,,,,,,,,62,,,,,,, 121 | 3/25/2014,3,86,,,,,,,,60,,,,,,, 122 | 3/24/2014,2,86,,,,,,,,59,,,,,,, 123 | 3/22/2014,0,49,,,,,,,,29,,,,,,, -------------------------------------------------------------------------------- /data/pew.csv: -------------------------------------------------------------------------------- 1 | "religion","<$10k","$10-20k","$20-30k","$30-40k","$40-50k","$50-75k","$75-100k","$100-150k",">150k","Don't know/refused" 2 | "Agnostic",27,34,60,81,76,137,122,109,84,96 3 | "Atheist",12,27,37,52,35,70,73,59,74,76 4 | "Buddhist",27,21,30,34,33,58,62,39,53,54 5 | "Catholic",418,617,732,670,638,1116,949,792,633,1489 6 | "Don’t know/refused",15,14,15,11,10,35,21,17,18,116 7 | "Evangelical Prot",575,869,1064,982,881,1486,949,723,414,1529 8 | "Hindu",1,9,7,9,11,34,47,48,54,37 9 | "Historically Black Prot",228,244,236,238,197,223,131,81,78,339 10 | "Jehovah's Witness",20,27,24,24,21,30,15,11,6,37 11 | "Jewish",19,19,25,25,30,95,69,87,151,162 12 | "Mainline Prot",289,495,619,655,651,1107,939,753,634,1328 13 | "Mormon",29,40,48,51,56,112,85,49,42,69 14 | "Muslim",6,7,9,10,9,23,16,8,6,22 15 | "Orthodox",13,17,23,32,32,47,38,42,46,73 16 | "Other Christian",9,7,11,13,13,14,18,14,12,18 17 | "Other Faiths",20,33,40,46,49,63,46,40,41,71 18 | "Other World Religions",5,2,3,4,2,7,3,4,4,8 19 | "Unaffiliated",217,299,374,365,341,528,407,321,258,597 20 | -------------------------------------------------------------------------------- /data/raw_data_urls.txt: -------------------------------------------------------------------------------- 1 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-01.csv 2 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-02.csv 3 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-03.csv 4 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-04.csv 5 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-05.csv 6 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-06.csv 7 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-07.csv 8 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-08.csv 9 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-09.csv 10 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-10.csv 11 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-11.csv 12 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-12.csv 13 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-01.csv 14 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-02.csv 15 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-03.csv 16 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-04.csv 17 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-05.csv 18 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-06.csv 19 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-07.csv 20 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-08.csv 21 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-09.csv 22 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-10.csv 23 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-11.csv 24 | https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2016-12.csv 25 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2013-08.csv 26 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2013-09.csv 27 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2013-10.csv 28 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2013-11.csv 29 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2013-12.csv 30 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-01.csv 31 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-02.csv 32 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-03.csv 33 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-04.csv 34 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-05.csv 35 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-06.csv 36 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-07.csv 37 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-08.csv 38 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-09.csv 39 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-10.csv 40 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-11.csv 41 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2014-12.csv 42 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-01.csv 43 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-02.csv 44 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-03.csv 45 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-04.csv 46 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-05.csv 47 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-06.csv 48 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-07.csv 49 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-08.csv 50 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-09.csv 51 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-10.csv 52 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-11.csv 53 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2015-12.csv 54 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-01.csv 55 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-02.csv 56 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-03.csv 57 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-04.csv 58 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-05.csv 59 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-06.csv 60 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-07.csv 61 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-08.csv 62 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-09.csv 63 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-10.csv 64 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-11.csv 65 | https://s3.amazonaws.com/nyc-tlc/trip+data/green_tripdata_2016-12.csv 66 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-01.csv 67 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-02.csv 68 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-03.csv 69 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-04.csv 70 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-05.csv 71 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-06.csv 72 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-07.csv 73 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-08.csv 74 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-09.csv 75 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-10.csv 76 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-11.csv 77 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2009-12.csv 78 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-01.csv 79 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-02.csv 80 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-03.csv 81 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-04.csv 82 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-05.csv 83 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-06.csv 84 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-07.csv 85 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-08.csv 86 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-09.csv 87 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-10.csv 88 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-11.csv 89 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2010-12.csv 90 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-01.csv 91 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-02.csv 92 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-03.csv 93 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-04.csv 94 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-05.csv 95 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-06.csv 96 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-07.csv 97 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-08.csv 98 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-09.csv 99 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-10.csv 100 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-11.csv 101 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2011-12.csv 102 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-01.csv 103 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-02.csv 104 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-03.csv 105 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-04.csv 106 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-05.csv 107 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-06.csv 108 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-07.csv 109 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-08.csv 110 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-09.csv 111 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-10.csv 112 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-11.csv 113 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2012-12.csv 114 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-01.csv 115 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-02.csv 116 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-03.csv 117 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-04.csv 118 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-05.csv 119 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-06.csv 120 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-07.csv 121 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-08.csv 122 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-09.csv 123 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-10.csv 124 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-11.csv 125 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2013-12.csv 126 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-01.csv 127 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-02.csv 128 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-03.csv 129 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-04.csv 130 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-05.csv 131 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-06.csv 132 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-07.csv 133 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-08.csv 134 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-09.csv 135 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-10.csv 136 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-11.csv 137 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2014-12.csv 138 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-01.csv 139 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-02.csv 140 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-03.csv 141 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-04.csv 142 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-05.csv 143 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-06.csv 144 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-07.csv 145 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-08.csv 146 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-09.csv 147 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-10.csv 148 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-11.csv 149 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-12.csv 150 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-01.csv 151 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-02.csv 152 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-03.csv 153 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-04.csv 154 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-05.csv 155 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-06.csv 156 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-07.csv 157 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-08.csv 158 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-09.csv 159 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-10.csv 160 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-11.csv 161 | https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2016-12.csv 162 | -------------------------------------------------------------------------------- /data/scientists.csv: -------------------------------------------------------------------------------- 1 | Name,Born,Died,Age,Occupation 2 | Rosaline Franklin,1920-07-25,1958-04-16,37,Chemist 3 | William Gosset,1876-06-13,1937-10-16,61,Statistician 4 | Florence Nightingale,1820-05-12,1910-08-13,90,Nurse 5 | Marie Curie,1867-11-07,1934-07-04,66,Chemist 6 | Rachel Carson,1907-05-27,1964-04-14,56,Biologist 7 | John Snow,1813-03-15,1858-06-16,45,Physician 8 | Alan Turing,1912-06-23,1954-06-07,41,Computer Scientist 9 | Johann Gauss,1777-04-30,1855-02-23,77,Mathematician 10 | -------------------------------------------------------------------------------- /data/survey_person.csv: -------------------------------------------------------------------------------- 1 | ident,personal,family 2 | dyer,William,Dyer 3 | pb,Frank,Pabodie 4 | lake,Anderson,Lake 5 | roe,Valentina,Roerich 6 | danforth,Frank,Danforth 7 | -------------------------------------------------------------------------------- /data/survey_site.csv: -------------------------------------------------------------------------------- 1 | name,lat,long 2 | DR-1,-49.85,-128.57 3 | DR-3,-47.15,-126.72 4 | MSK-4,-48.87,-123.4 5 | -------------------------------------------------------------------------------- /data/survey_survey.csv: -------------------------------------------------------------------------------- 1 | taken,person,quant,reading 2 | 619,dyer,rad,9.82 3 | 619,dyer,sal,0.13 4 | 622,dyer,rad,7.8 5 | 622,dyer,sal,0.09 6 | 734,pb,rad,8.41 7 | 734,lake,sal,0.05 8 | 734,pb,temp,-21.5 9 | 735,pb,rad,7.22 10 | 735,,sal,0.06 11 | 735,,temp,-26.0 12 | 751,pb,rad,4.35 13 | 751,pb,temp,-18.5 14 | 751,lake,sal,0.1 15 | 752,lake,rad,2.19 16 | 752,lake,sal,0.09 17 | 752,lake,temp,-16.0 18 | 752,roe,sal,41.6 19 | 837,lake,rad,1.46 20 | 837,lake,sal,0.21 21 | 837,roe,sal,22.5 22 | 844,roe,rad,11.25 23 | -------------------------------------------------------------------------------- /data/survey_visited.csv: -------------------------------------------------------------------------------- 1 | ident,site,dated 2 | 619,DR-1,1927-02-08 3 | 622,DR-1,1927-02-10 4 | 734,DR-3,1939-01-07 5 | 735,DR-3,1930-01-12 6 | 751,DR-3,1930-02-26 7 | 752,DR-3, 8 | 837,MSK-4,1932-01-14 9 | 844,DR-1,1932-03-22 10 | -------------------------------------------------------------------------------- /data/weather.csv: -------------------------------------------------------------------------------- 1 | "id","year","month","element","d1","d2","d3","d4","d5","d6","d7","d8","d9","d10","d11","d12","d13","d14","d15","d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29","d30","d31" 2 | "MX17004",2010,1,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,27.8,NA 3 | "MX17004",2010,1,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,14.5,NA 4 | "MX17004",2010,2,"tmax",NA,27.3,24.1,NA,NA,NA,NA,NA,NA,NA,29.7,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,29.9,NA,NA,NA,NA,NA,NA,NA,NA 5 | "MX17004",2010,2,"tmin",NA,14.4,14.4,NA,NA,NA,NA,NA,NA,NA,13.4,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,10.7,NA,NA,NA,NA,NA,NA,NA,NA 6 | "MX17004",2010,3,"tmax",NA,NA,NA,NA,32.1,NA,NA,NA,NA,34.5,NA,NA,NA,NA,NA,31.1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA 7 | "MX17004",2010,3,"tmin",NA,NA,NA,NA,14.2,NA,NA,NA,NA,16.8,NA,NA,NA,NA,NA,17.6,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA 8 | "MX17004",2010,4,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,36.3,NA,NA,NA,NA 9 | "MX17004",2010,4,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,16.7,NA,NA,NA,NA 10 | "MX17004",2010,5,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,33.2,NA,NA,NA,NA 11 | "MX17004",2010,5,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,18.2,NA,NA,NA,NA 12 | "MX17004",2010,6,"tmax",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,28,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,30.1,NA,NA 13 | "MX17004",2010,6,"tmin",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,17.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,18,NA,NA 14 | "MX17004",2010,7,"tmax",NA,NA,28.6,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,29.9,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA 15 | "MX17004",2010,7,"tmin",NA,NA,17.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,16.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA 16 | "MX17004",2010,8,"tmax",NA,NA,NA,NA,29.6,NA,NA,29,NA,NA,NA,NA,29.8,NA,NA,NA,NA,NA,NA,NA,NA,NA,26.4,NA,29.7,NA,NA,NA,28,NA,25.4 17 | "MX17004",2010,8,"tmin",NA,NA,NA,NA,15.8,NA,NA,17.3,NA,NA,NA,NA,16.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,15,NA,15.6,NA,NA,NA,15.3,NA,15.4 18 | "MX17004",2010,10,"tmax",NA,NA,NA,NA,27,NA,28.1,NA,NA,NA,NA,NA,NA,29.5,28.7,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,31.2,NA,NA,NA 19 | "MX17004",2010,10,"tmin",NA,NA,NA,NA,14,NA,12.9,NA,NA,NA,NA,NA,NA,13,10.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,15,NA,NA,NA 20 | "MX17004",2010,11,"tmax",NA,31.3,NA,27.2,26.3,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,28.1,27.7,NA,NA,NA,NA 21 | "MX17004",2010,11,"tmin",NA,16.3,NA,12,7.9,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,12.1,14.2,NA,NA,NA,NA 22 | "MX17004",2010,12,"tmax",29.9,NA,NA,NA,NA,27.8,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA 23 | "MX17004",2010,12,"tmin",13.8,NA,NA,NA,NA,10.5,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA 24 | -------------------------------------------------------------------------------- /notebook/.ipynb_checkpoints/08_done-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 직접 해보세요!\n", 8 | "## 자료형을 자유자재로 변환하기 ─ astype 메서드(172쪽)" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import pandas as pd\n", 18 | "import seaborn as sns\n", 19 | "\n", 20 | "tips = sns.load_dataset(\"tips\")" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "### 2. 여러 가지 자료형을 문자열로 변환하기" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | "total_bill float64\n", 40 | "tip float64\n", 41 | "sex category\n", 42 | "smoker category\n", 43 | "day category\n", 44 | "time category\n", 45 | "size int64\n", 46 | "sex_str object\n", 47 | "dtype: object\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "tips['sex_str'] = tips['sex'].astype(str)\n", 53 | "print(tips.dtypes)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "### 4. 자료형을 변환한 데이터 다시 원래대로 만들기" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 4, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "name": "stdout", 70 | "output_type": "stream", 71 | "text": [ 72 | "total_bill object\n", 73 | "tip float64\n", 74 | "sex category\n", 75 | "smoker category\n", 76 | "day category\n", 77 | "time category\n", 78 | "size int64\n", 79 | "sex_str object\n", 80 | "dtype: object\n" 81 | ] 82 | } 83 | ], 84 | "source": [ 85 | "tips['total_bill'] = tips['total_bill'].astype(str) \n", 86 | "print(tips.dtypes)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 5, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "name": "stdout", 96 | "output_type": "stream", 97 | "text": [ 98 | "total_bill float64\n", 99 | "tip float64\n", 100 | "sex category\n", 101 | "smoker category\n", 102 | "day category\n", 103 | "time category\n", 104 | "size int64\n", 105 | "sex_str object\n", 106 | "dtype: object\n" 107 | ] 108 | } 109 | ], 110 | "source": [ 111 | "tips['total_bill'] = tips['total_bill'].astype(float) \n", 112 | "print(tips.dtypes)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "# 직접 해보세요!\n", 120 | "## 잘못 입력한 문자열 처리하기 ─ to_numeric 메서드(174쪽)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 6, 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "name": "stdout", 130 | "output_type": "stream", 131 | "text": [ 132 | " total_bill tip sex smoker day time size sex_str\n", 133 | "0 16.99 1.01 Female No Sun Dinner 2 Female\n", 134 | "1 missing 1.66 Male No Sun Dinner 3 Male\n", 135 | "2 21.01 3.50 Male No Sun Dinner 3 Male\n", 136 | "3 missing 3.31 Male No Sun Dinner 2 Male\n", 137 | "4 24.59 3.61 Female No Sun Dinner 4 Female\n", 138 | "5 missing 4.71 Male No Sun Dinner 4 Male\n", 139 | "6 8.77 2.00 Male No Sun Dinner 2 Male\n", 140 | "7 missing 3.12 Male No Sun Dinner 4 Male\n", 141 | "8 15.04 1.96 Male No Sun Dinner 2 Male\n", 142 | "9 14.78 3.23 Male No Sun Dinner 2 Male\n" 143 | ] 144 | }, 145 | { 146 | "name": "stderr", 147 | "output_type": "stream", 148 | "text": [ 149 | "C:\\Users\\phk70\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:543: SettingWithCopyWarning: \n", 150 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 151 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 152 | "\n", 153 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 154 | " self.obj[item] = s\n" 155 | ] 156 | } 157 | ], 158 | "source": [ 159 | "tips_sub_miss = tips.head(10)\n", 160 | "tips_sub_miss.loc[[1, 3, 5, 7], 'total_bill'] = 'missing'\n", 161 | "\n", 162 | "print(tips_sub_miss)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 8, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "name": "stdout", 172 | "output_type": "stream", 173 | "text": [ 174 | "total_bill object\n", 175 | "tip float64\n", 176 | "sex category\n", 177 | "smoker category\n", 178 | "day category\n", 179 | "time category\n", 180 | "size int64\n", 181 | "sex_str object\n", 182 | "dtype: object\n" 183 | ] 184 | } 185 | ], 186 | "source": [ 187 | "print(tips_sub_miss.dtypes)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 9, 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "ename": "ValueError", 197 | "evalue": "could not convert string to float: 'missing'", 198 | "output_type": "error", 199 | "traceback": [ 200 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 201 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", 202 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtips_sub_miss\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'total_bill'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfloat\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 203 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\util\\_decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 175\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 176\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mnew_arg_name\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnew_arg_value\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 177\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 178\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 179\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0m_deprecate_kwarg\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 204 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36mastype\u001b[1;34m(self, dtype, copy, errors, **kwargs)\u001b[0m\n\u001b[0;32m 4995\u001b[0m \u001b[1;31m# else, only a single dtype is given\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4996\u001b[0m new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,\n\u001b[1;32m-> 4997\u001b[1;33m **kwargs)\n\u001b[0m\u001b[0;32m 4998\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnew_data\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4999\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 205 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mastype\u001b[1;34m(self, dtype, **kwargs)\u001b[0m\n\u001b[0;32m 3712\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3713\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3714\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'astype'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3715\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3716\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mconvert\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 206 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)\u001b[0m\n\u001b[0;32m 3579\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3580\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'mgr'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3581\u001b[1;33m \u001b[0mapplied\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3582\u001b[0m \u001b[0mresult_blocks\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_extend_blocks\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mapplied\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresult_blocks\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3583\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 207 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mastype\u001b[1;34m(self, dtype, copy, errors, values, **kwargs)\u001b[0m\n\u001b[0;32m 573\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'raise'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 574\u001b[0m return self._astype(dtype, copy=copy, errors=errors, values=values,\n\u001b[1;32m--> 575\u001b[1;33m **kwargs)\n\u001b[0m\u001b[0;32m 576\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 577\u001b[0m def _astype(self, dtype, copy=False, errors='raise', values=None,\n", 208 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36m_astype\u001b[1;34m(self, dtype, copy, errors, values, klass, mgr, **kwargs)\u001b[0m\n\u001b[0;32m 662\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 663\u001b[0m \u001b[1;31m# _astype_nansafe works fine with 1-d only\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 664\u001b[1;33m \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mastype_nansafe\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 665\u001b[0m \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalues\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 666\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 209 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\dtypes\\cast.py\u001b[0m in \u001b[0;36mastype_nansafe\u001b[1;34m(arr, dtype, copy)\u001b[0m\n\u001b[0;32m 728\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 729\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 730\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0marr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 731\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0marr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mview\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 732\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 210 | "\u001b[1;31mValueError\u001b[0m: could not convert string to float: 'missing'" 211 | ] 212 | } 213 | ], 214 | "source": [ 215 | "tips_sub_miss['total_bill'].astype(float)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 10, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "ename": "ValueError", 225 | "evalue": "Unable to parse string \"missing\" at position 1", 226 | "output_type": "error", 227 | "traceback": [ 228 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 229 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", 230 | "\u001b[1;32mpandas/_libs/src\\inference.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[1;34m()\u001b[0m\n", 231 | "\u001b[1;31mValueError\u001b[0m: Unable to parse string \"missing\"", 232 | "\nDuring handling of the above exception, another exception occurred:\n", 233 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", 234 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_numeric\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtips_sub_miss\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'total_bill'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 235 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\tools\\numeric.py\u001b[0m in \u001b[0;36mto_numeric\u001b[1;34m(arg, errors, downcast)\u001b[0m\n\u001b[0;32m 131\u001b[0m \u001b[0mcoerce_numeric\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mFalse\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;34m'ignore'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'raise'\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32melse\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 132\u001b[0m values = lib.maybe_convert_numeric(values, set(),\n\u001b[1;32m--> 133\u001b[1;33m coerce_numeric=coerce_numeric)\n\u001b[0m\u001b[0;32m 134\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 135\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 236 | "\u001b[1;32mpandas/_libs/src\\inference.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[1;34m()\u001b[0m\n", 237 | "\u001b[1;31mValueError\u001b[0m: Unable to parse string \"missing\" at position 1" 238 | ] 239 | } 240 | ], 241 | "source": [ 242 | "pd.to_numeric(tips_sub_miss['total_bill'])" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 11, 248 | "metadata": {}, 249 | "outputs": [ 250 | { 251 | "name": "stdout", 252 | "output_type": "stream", 253 | "text": [ 254 | "total_bill object\n", 255 | "tip float64\n", 256 | "sex category\n", 257 | "smoker category\n", 258 | "day category\n", 259 | "time category\n", 260 | "size int64\n", 261 | "sex_str object\n", 262 | "dtype: object\n" 263 | ] 264 | }, 265 | { 266 | "name": "stderr", 267 | "output_type": "stream", 268 | "text": [ 269 | "C:\\Users\\fermat39\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 270 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 271 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 272 | "\n", 273 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 274 | " \"\"\"Entry point for launching an IPython kernel.\n" 275 | ] 276 | } 277 | ], 278 | "source": [ 279 | "tips_sub_miss['total_bill'] = pd.to_numeric( tips_sub_miss['total_bill'], errors='ignore')\n", 280 | "\n", 281 | "print(tips_sub_miss.dtypes)" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 12, 287 | "metadata": {}, 288 | "outputs": [ 289 | { 290 | "name": "stdout", 291 | "output_type": "stream", 292 | "text": [ 293 | "total_bill float64\n", 294 | "tip float64\n", 295 | "sex category\n", 296 | "smoker category\n", 297 | "day category\n", 298 | "time category\n", 299 | "size int64\n", 300 | "sex_str object\n", 301 | "dtype: object\n" 302 | ] 303 | }, 304 | { 305 | "name": "stderr", 306 | "output_type": "stream", 307 | "text": [ 308 | "C:\\Users\\fermat39\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 309 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 310 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 311 | "\n", 312 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 313 | " \"\"\"Entry point for launching an IPython kernel.\n" 314 | ] 315 | } 316 | ], 317 | "source": [ 318 | "tips_sub_miss['total_bill'] = pd.to_numeric( tips_sub_miss['total_bill'], errors='coerce')\n", 319 | "\n", 320 | "print(tips_sub_miss.dtypes)" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 13, 326 | "metadata": {}, 327 | "outputs": [ 328 | { 329 | "name": "stdout", 330 | "output_type": "stream", 331 | "text": [ 332 | "total_bill float32\n", 333 | "tip float64\n", 334 | "sex category\n", 335 | "smoker category\n", 336 | "day category\n", 337 | "time category\n", 338 | "size int64\n", 339 | "sex_str object\n", 340 | "dtype: object\n" 341 | ] 342 | }, 343 | { 344 | "name": "stderr", 345 | "output_type": "stream", 346 | "text": [ 347 | "C:\\Users\\fermat39\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 348 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 349 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 350 | "\n", 351 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 352 | " \"\"\"Entry point for launching an IPython kernel.\n" 353 | ] 354 | } 355 | ], 356 | "source": [ 357 | "tips_sub_miss['total_bill'] = pd.to_numeric( tips_sub_miss['total_bill'], errors='coerce', downcast='float')\n", 358 | "\n", 359 | "print(tips_sub_miss.dtypes)" 360 | ] 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "metadata": {}, 365 | "source": [ 366 | "# 직접 해보세요!\n", 367 | "## 문자열을 카테고리로 변환하기(179쪽)" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 14, 373 | "metadata": {}, 374 | "outputs": [ 375 | { 376 | "name": "stdout", 377 | "output_type": "stream", 378 | "text": [ 379 | "\n", 380 | "RangeIndex: 244 entries, 0 to 243\n", 381 | "Data columns (total 8 columns):\n", 382 | "total_bill 244 non-null float64\n", 383 | "tip 244 non-null float64\n", 384 | "sex 244 non-null object\n", 385 | "smoker 244 non-null category\n", 386 | "day 244 non-null category\n", 387 | "time 244 non-null category\n", 388 | "size 244 non-null int64\n", 389 | "sex_str 244 non-null object\n", 390 | "dtypes: category(3), float64(2), int64(1), object(2)\n", 391 | "memory usage: 10.7+ KB\n", 392 | "None\n" 393 | ] 394 | } 395 | ], 396 | "source": [ 397 | "tips['sex'] = tips['sex'].astype('str') \n", 398 | "print(tips.info())" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 15, 404 | "metadata": {}, 405 | "outputs": [ 406 | { 407 | "name": "stdout", 408 | "output_type": "stream", 409 | "text": [ 410 | "\n", 411 | "RangeIndex: 244 entries, 0 to 243\n", 412 | "Data columns (total 8 columns):\n", 413 | "total_bill 244 non-null float64\n", 414 | "tip 244 non-null float64\n", 415 | "sex 244 non-null category\n", 416 | "smoker 244 non-null category\n", 417 | "day 244 non-null category\n", 418 | "time 244 non-null category\n", 419 | "size 244 non-null int64\n", 420 | "sex_str 244 non-null object\n", 421 | "dtypes: category(4), float64(2), int64(1), object(1)\n", 422 | "memory usage: 9.1+ KB\n", 423 | "None\n" 424 | ] 425 | } 426 | ], 427 | "source": [ 428 | "tips['sex'] = tips['sex'].astype('category') \n", 429 | "print(tips.info())" 430 | ] 431 | } 432 | ], 433 | "metadata": { 434 | "kernelspec": { 435 | "display_name": "Python 3", 436 | "language": "python", 437 | "name": "python3" 438 | }, 439 | "language_info": { 440 | "codemirror_mode": { 441 | "name": "ipython", 442 | "version": 3 443 | }, 444 | "file_extension": ".py", 445 | "mimetype": "text/x-python", 446 | "name": "python", 447 | "nbconvert_exporter": "python", 448 | "pygments_lexer": "ipython3", 449 | "version": "3.6.5" 450 | } 451 | }, 452 | "nbformat": 4, 453 | "nbformat_minor": 1 454 | } 455 | -------------------------------------------------------------------------------- /notebook/.ipynb_checkpoints/09_done-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 직접 해보세요!\n", 8 | "## 문자열 추출하기(183쪽)" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "word = 'grail'\n", 18 | "sent = 'a scratch'" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "name": "stdout", 28 | "output_type": "stream", 29 | "text": [ 30 | "g\n" 31 | ] 32 | } 33 | ], 34 | "source": [ 35 | "print(word[0])" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "name": "stdout", 45 | "output_type": "stream", 46 | "text": [ 47 | "a\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "print(sent[0])" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 4, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "name": "stdout", 62 | "output_type": "stream", 63 | "text": [ 64 | "gra\n" 65 | ] 66 | } 67 | ], 68 | "source": [ 69 | "print(word[0:3])" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "---" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 5, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "name": "stdout", 86 | "output_type": "stream", 87 | "text": [ 88 | "h\n" 89 | ] 90 | } 91 | ], 92 | "source": [ 93 | "print(sent[-1])" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 6, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "name": "stdout", 103 | "output_type": "stream", 104 | "text": [ 105 | "a\n" 106 | ] 107 | } 108 | ], 109 | "source": [ 110 | "print(sent[-9:-8])" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 7, 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "name": "stdout", 120 | "output_type": "stream", 121 | "text": [ 122 | "a\n" 123 | ] 124 | } 125 | ], 126 | "source": [ 127 | "print(sent[0:-8])" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "# 알아두면 좋아요!\n", 135 | "## 전체 문자열을 추출할 때 음수를 사용하면 안 됩니다(184쪽)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 8, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "name": "stdout", 145 | "output_type": "stream", 146 | "text": [ 147 | "scratc\n" 148 | ] 149 | } 150 | ], 151 | "source": [ 152 | "print(sent[2:-1])" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 11, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | "scratc\n" 165 | ] 166 | } 167 | ], 168 | "source": [ 169 | "print(sent[-7:-1])" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 12, 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "name": "stdout", 179 | "output_type": "stream", 180 | "text": [ 181 | "9\n" 182 | ] 183 | } 184 | ], 185 | "source": [ 186 | "s_len = len(sent)\n", 187 | "print(s_len)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 13, 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "name": "stdout", 197 | "output_type": "stream", 198 | "text": [ 199 | "scratch\n" 200 | ] 201 | } 202 | ], 203 | "source": [ 204 | "print(sent[2:s_len])" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "# 직접 해보세요!\n", 212 | "## 왼쪽이나 오른쪽 범위를 지정하지 않고 문자열 추출하기(185쪽)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 14, 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "name": "stdout", 222 | "output_type": "stream", 223 | "text": [ 224 | "gra\n" 225 | ] 226 | } 227 | ], 228 | "source": [ 229 | "print(word[0:3])" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 15, 235 | "metadata": {}, 236 | "outputs": [ 237 | { 238 | "name": "stdout", 239 | "output_type": "stream", 240 | "text": [ 241 | "gra\n" 242 | ] 243 | } 244 | ], 245 | "source": [ 246 | "print(word[ :3])" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 16, 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "name": "stdout", 256 | "output_type": "stream", 257 | "text": [ 258 | "scratch\n" 259 | ] 260 | } 261 | ], 262 | "source": [ 263 | "print(sent[2:len(sent)])" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 17, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "name": "stdout", 273 | "output_type": "stream", 274 | "text": [ 275 | "scratch\n" 276 | ] 277 | } 278 | ], 279 | "source": [ 280 | "print(sent[2: ])" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 18, 286 | "metadata": {}, 287 | "outputs": [ 288 | { 289 | "name": "stdout", 290 | "output_type": "stream", 291 | "text": [ 292 | "a scratch\n" 293 | ] 294 | } 295 | ], 296 | "source": [ 297 | "print(sent[ : ])" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 19, 303 | "metadata": {}, 304 | "outputs": [ 305 | { 306 | "name": "stdout", 307 | "output_type": "stream", 308 | "text": [ 309 | "asrth\n" 310 | ] 311 | } 312 | ], 313 | "source": [ 314 | "print(sent[::2])" 315 | ] 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "metadata": {}, 320 | "source": [ 321 | "# 직접 해보세요!\n", 322 | "## join, splitlines, replace 메서드 실습하기(188쪽)" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": {}, 328 | "source": [ 329 | "### 1. join 메서드" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 20, 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "d1 = '40°' \n", 339 | "m1 = \"46'\" \n", 340 | "s1 = '52.837\"' \n", 341 | "u1 = 'N'\n", 342 | "\n", 343 | "d2 = '73°' \n", 344 | "m2 = \"58'\" \n", 345 | "s2 = '26.302\"' \n", 346 | "u2 = 'W'" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": 21, 352 | "metadata": {}, 353 | "outputs": [ 354 | { 355 | "name": "stdout", 356 | "output_type": "stream", 357 | "text": [ 358 | "40° 46' 52.837\" N 73° 58' 26.302\" W\n" 359 | ] 360 | } 361 | ], 362 | "source": [ 363 | "coords = ' '.join([d1, m1, s1, u1, d2, m2, s2, u2])\n", 364 | "print(coords)" 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": {}, 370 | "source": [ 371 | "### 2. splitlines 메서드" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 22, 377 | "metadata": {}, 378 | "outputs": [ 379 | { 380 | "name": "stdout", 381 | "output_type": "stream", 382 | "text": [ 383 | "Guard: What? Ridden on a horse?\n", 384 | "King Arthur: Yes!\n", 385 | "Guard: You're using coconuts!\n", 386 | "King Arthur: What?\n", 387 | "Guard: You've got ... coconut[s] and you're bangin' 'em together. \n", 388 | "\n" 389 | ] 390 | } 391 | ], 392 | "source": [ 393 | "multi_str = \"\"\"Guard: What? Ridden on a horse?\n", 394 | "King Arthur: Yes!\n", 395 | "Guard: You're using coconuts!\n", 396 | "King Arthur: What?\n", 397 | "Guard: You've got ... coconut[s] and you're bangin' 'em together. \n", 398 | "\"\"\" \n", 399 | "print(multi_str)" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 23, 405 | "metadata": {}, 406 | "outputs": [ 407 | { 408 | "name": "stdout", 409 | "output_type": "stream", 410 | "text": [ 411 | "['Guard: What? Ridden on a horse?', 'King Arthur: Yes!', \"Guard: You're using coconuts!\", 'King Arthur: What?', \"Guard: You've got ... coconut[s] and you're bangin' 'em together. \"]\n" 412 | ] 413 | } 414 | ], 415 | "source": [ 416 | "multi_str_split = multi_str.splitlines() \n", 417 | "print(multi_str_split)" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": 24, 423 | "metadata": {}, 424 | "outputs": [ 425 | { 426 | "name": "stdout", 427 | "output_type": "stream", 428 | "text": [ 429 | "['Guard: What? Ridden on a horse?', \"Guard: You're using coconuts!\", \"Guard: You've got ... coconut[s] and you're bangin' 'em together. \"]\n" 430 | ] 431 | } 432 | ], 433 | "source": [ 434 | "guard = multi_str_split[::2] \n", 435 | "print(guard)" 436 | ] 437 | }, 438 | { 439 | "cell_type": "markdown", 440 | "metadata": {}, 441 | "source": [ 442 | "### 4. replace 메서드" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 25, 448 | "metadata": {}, 449 | "outputs": [ 450 | { 451 | "name": "stdout", 452 | "output_type": "stream", 453 | "text": [ 454 | "['What? Ridden on a horse?', \"You're using coconuts!\", \"You've got ... coconut[s] and you're bangin' 'em together. \"]\n" 455 | ] 456 | } 457 | ], 458 | "source": [ 459 | "guard = multi_str.replace(\"Guard: \", \"\").splitlines()[::2] \n", 460 | "print(guard)" 461 | ] 462 | }, 463 | { 464 | "cell_type": "markdown", 465 | "metadata": {}, 466 | "source": [ 467 | "# 직접 해보세요!\n", 468 | "## 문자열 포매팅하기(190쪽)" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": 26, 474 | "metadata": {}, 475 | "outputs": [ 476 | { 477 | "name": "stdout", 478 | "output_type": "stream", 479 | "text": [ 480 | "It's just a flesh wound!\n" 481 | ] 482 | } 483 | ], 484 | "source": [ 485 | "var = 'flesh wound' \n", 486 | "s = \"It's just a {}!\"\n", 487 | "\n", 488 | "print(s.format(var))" 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "execution_count": 27, 494 | "metadata": {}, 495 | "outputs": [ 496 | { 497 | "name": "stdout", 498 | "output_type": "stream", 499 | "text": [ 500 | "It's just a scratch!\n" 501 | ] 502 | } 503 | ], 504 | "source": [ 505 | "print(s.format('scratch'))" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": 28, 511 | "metadata": {}, 512 | "outputs": [ 513 | { 514 | "name": "stdout", 515 | "output_type": "stream", 516 | "text": [ 517 | "Black Knight: 'Tis but a scratch.\n", 518 | "King Arthur: A scratch? Your arm's off!\n", 519 | "\n" 520 | ] 521 | } 522 | ], 523 | "source": [ 524 | "s = \"\"\"Black Knight: 'Tis but a {0}.\n", 525 | "King Arthur: A {0}? Your arm's off!\n", 526 | "\"\"\" \n", 527 | "print(s.format('scratch'))" 528 | ] 529 | }, 530 | { 531 | "cell_type": "code", 532 | "execution_count": 29, 533 | "metadata": {}, 534 | "outputs": [ 535 | { 536 | "name": "stdout", 537 | "output_type": "stream", 538 | "text": [ 539 | "Hayden Planetarium Coordinates: 40.7815° N, 73.9733° W\n" 540 | ] 541 | } 542 | ], 543 | "source": [ 544 | "s = 'Hayden Planetarium Coordinates: {lat}, {lon}' \n", 545 | "print(s.format(lat='40.7815° N', lon='73.9733° W'))" 546 | ] 547 | }, 548 | { 549 | "cell_type": "markdown", 550 | "metadata": {}, 551 | "source": [ 552 | "# 직접 해보세요!\n", 553 | "## 숫자 데이터 포매팅하기(191쪽)" 554 | ] 555 | }, 556 | { 557 | "cell_type": "code", 558 | "execution_count": 30, 559 | "metadata": {}, 560 | "outputs": [ 561 | { 562 | "name": "stdout", 563 | "output_type": "stream", 564 | "text": [ 565 | "Some digits of pi: 3.14159265359\n" 566 | ] 567 | } 568 | ], 569 | "source": [ 570 | "print('Some digits of pi: {}'.format(3.14159265359))" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": 31, 576 | "metadata": {}, 577 | "outputs": [ 578 | { 579 | "name": "stdout", 580 | "output_type": "stream", 581 | "text": [ 582 | "In 2005, Lu Chao of China recited 67,890 digits of pi\n" 583 | ] 584 | } 585 | ], 586 | "source": [ 587 | "print(\"In 2005, Lu Chao of China recited {:,} digits of pi\".format(67890))" 588 | ] 589 | }, 590 | { 591 | "cell_type": "code", 592 | "execution_count": 32, 593 | "metadata": {}, 594 | "outputs": [ 595 | { 596 | "name": "stdout", 597 | "output_type": "stream", 598 | "text": [ 599 | "I remember 0.0001031 or 0.0103% of what Lu Chao recited\n" 600 | ] 601 | } 602 | ], 603 | "source": [ 604 | "print(\"I remember {0:.4} or {0:.4%} of what Lu Chao recited\".format(7/67890))" 605 | ] 606 | }, 607 | { 608 | "cell_type": "code", 609 | "execution_count": 33, 610 | "metadata": {}, 611 | "outputs": [ 612 | { 613 | "name": "stdout", 614 | "output_type": "stream", 615 | "text": [ 616 | "My ID number is 00042\n" 617 | ] 618 | } 619 | ], 620 | "source": [ 621 | "print(\"My ID number is {0:05d}\".format(42))" 622 | ] 623 | }, 624 | { 625 | "cell_type": "markdown", 626 | "metadata": {}, 627 | "source": [ 628 | "# 직접 해보세요!\n", 629 | "## % 연산자로 포매팅하기(192쪽)" 630 | ] 631 | }, 632 | { 633 | "cell_type": "code", 634 | "execution_count": 34, 635 | "metadata": {}, 636 | "outputs": [ 637 | { 638 | "name": "stdout", 639 | "output_type": "stream", 640 | "text": [ 641 | "I only know 7 digits of pi\n" 642 | ] 643 | } 644 | ], 645 | "source": [ 646 | "s = 'I only know %d digits of pi' % 7 \n", 647 | "print(s)" 648 | ] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "execution_count": 35, 653 | "metadata": {}, 654 | "outputs": [ 655 | { 656 | "name": "stdout", 657 | "output_type": "stream", 658 | "text": [ 659 | "Some digits of e: 2.72\n" 660 | ] 661 | } 662 | ], 663 | "source": [ 664 | "print('Some digits of %(cont)s: %(value).2f' % {'cont': 'e', 'value': 2.718})" 665 | ] 666 | }, 667 | { 668 | "cell_type": "markdown", 669 | "metadata": {}, 670 | "source": [ 671 | "# 알아두면 좋아요!\n", 672 | "## f-strings로 포매팅 사용하기(193쪽)" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": 36, 678 | "metadata": {}, 679 | "outputs": [ 680 | { 681 | "name": "stdout", 682 | "output_type": "stream", 683 | "text": [ 684 | "It's just a flesh wound!\n" 685 | ] 686 | } 687 | ], 688 | "source": [ 689 | "var = 'flesh wound' \n", 690 | "s = f\"It's just a {var}!\" \n", 691 | "print(s)" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": 37, 697 | "metadata": {}, 698 | "outputs": [ 699 | { 700 | "name": "stdout", 701 | "output_type": "stream", 702 | "text": [ 703 | "Hayden Planetarium Coordinates: 40.7815°N, 73.9733°W\n" 704 | ] 705 | } 706 | ], 707 | "source": [ 708 | "lat='40.7815°N' \n", 709 | "lon='73.9733°W' \n", 710 | "s = f'Hayden Planetarium Coordinates: {lat}, {lon}' \n", 711 | "print(s)" 712 | ] 713 | }, 714 | { 715 | "cell_type": "markdown", 716 | "metadata": {}, 717 | "source": [ 718 | "# 직접 해보세요!\n", 719 | "## 정규식으로 전화번호 패턴 찾기(196쪽)" 720 | ] 721 | }, 722 | { 723 | "cell_type": "code", 724 | "execution_count": 38, 725 | "metadata": {}, 726 | "outputs": [], 727 | "source": [ 728 | "import re\n", 729 | "\n", 730 | "tele_num = '1234567890'" 731 | ] 732 | }, 733 | { 734 | "cell_type": "code", 735 | "execution_count": 39, 736 | "metadata": {}, 737 | "outputs": [ 738 | { 739 | "name": "stdout", 740 | "output_type": "stream", 741 | "text": [ 742 | "\n" 743 | ] 744 | } 745 | ], 746 | "source": [ 747 | "m = re.match(pattern='\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d', string=tele_num) \n", 748 | "print(type(m))" 749 | ] 750 | }, 751 | { 752 | "cell_type": "code", 753 | "execution_count": 40, 754 | "metadata": {}, 755 | "outputs": [ 756 | { 757 | "name": "stdout", 758 | "output_type": "stream", 759 | "text": [ 760 | "<_sre.SRE_Match object; span=(0, 10), match='1234567890'>\n" 761 | ] 762 | } 763 | ], 764 | "source": [ 765 | "print(m)" 766 | ] 767 | }, 768 | { 769 | "cell_type": "code", 770 | "execution_count": 41, 771 | "metadata": {}, 772 | "outputs": [ 773 | { 774 | "name": "stdout", 775 | "output_type": "stream", 776 | "text": [ 777 | "True\n" 778 | ] 779 | } 780 | ], 781 | "source": [ 782 | "print(bool(m))" 783 | ] 784 | }, 785 | { 786 | "cell_type": "code", 787 | "execution_count": 42, 788 | "metadata": {}, 789 | "outputs": [ 790 | { 791 | "name": "stdout", 792 | "output_type": "stream", 793 | "text": [ 794 | "match\n" 795 | ] 796 | } 797 | ], 798 | "source": [ 799 | "if m:\n", 800 | " print('match') \n", 801 | "else:\n", 802 | " print('no match')" 803 | ] 804 | }, 805 | { 806 | "cell_type": "code", 807 | "execution_count": 43, 808 | "metadata": {}, 809 | "outputs": [ 810 | { 811 | "name": "stdout", 812 | "output_type": "stream", 813 | "text": [ 814 | "0\n" 815 | ] 816 | } 817 | ], 818 | "source": [ 819 | "print(m.start())" 820 | ] 821 | }, 822 | { 823 | "cell_type": "code", 824 | "execution_count": 44, 825 | "metadata": {}, 826 | "outputs": [ 827 | { 828 | "name": "stdout", 829 | "output_type": "stream", 830 | "text": [ 831 | "10\n" 832 | ] 833 | } 834 | ], 835 | "source": [ 836 | "print(m.end())" 837 | ] 838 | }, 839 | { 840 | "cell_type": "code", 841 | "execution_count": 45, 842 | "metadata": {}, 843 | "outputs": [ 844 | { 845 | "name": "stdout", 846 | "output_type": "stream", 847 | "text": [ 848 | "(0, 10)\n" 849 | ] 850 | } 851 | ], 852 | "source": [ 853 | "print(m.span())" 854 | ] 855 | }, 856 | { 857 | "cell_type": "code", 858 | "execution_count": 46, 859 | "metadata": {}, 860 | "outputs": [ 861 | { 862 | "name": "stdout", 863 | "output_type": "stream", 864 | "text": [ 865 | "1234567890\n" 866 | ] 867 | } 868 | ], 869 | "source": [ 870 | "print(m.group())" 871 | ] 872 | }, 873 | { 874 | "cell_type": "code", 875 | "execution_count": 47, 876 | "metadata": {}, 877 | "outputs": [], 878 | "source": [ 879 | "tele_num_spaces = '123 456 7890'" 880 | ] 881 | }, 882 | { 883 | "cell_type": "code", 884 | "execution_count": 48, 885 | "metadata": {}, 886 | "outputs": [ 887 | { 888 | "name": "stdout", 889 | "output_type": "stream", 890 | "text": [ 891 | "None\n" 892 | ] 893 | } 894 | ], 895 | "source": [ 896 | "m = re.match(pattern='\\d{10}', string=tele_num_spaces) \n", 897 | "print(m)" 898 | ] 899 | }, 900 | { 901 | "cell_type": "code", 902 | "execution_count": 49, 903 | "metadata": {}, 904 | "outputs": [ 905 | { 906 | "name": "stdout", 907 | "output_type": "stream", 908 | "text": [ 909 | "no match\n" 910 | ] 911 | } 912 | ], 913 | "source": [ 914 | "if m:\n", 915 | " print('match') \n", 916 | "else:\n", 917 | " print('no match')" 918 | ] 919 | }, 920 | { 921 | "cell_type": "code", 922 | "execution_count": 50, 923 | "metadata": {}, 924 | "outputs": [ 925 | { 926 | "name": "stdout", 927 | "output_type": "stream", 928 | "text": [ 929 | "<_sre.SRE_Match object; span=(0, 12), match='123 456 7890'>\n" 930 | ] 931 | } 932 | ], 933 | "source": [ 934 | "p = '\\d{3}\\s?\\d{3}\\s?\\d{4}' \n", 935 | "m = re.match(pattern=p, string=tele_num_spaces) \n", 936 | "print(m)" 937 | ] 938 | }, 939 | { 940 | "cell_type": "code", 941 | "execution_count": 51, 942 | "metadata": {}, 943 | "outputs": [ 944 | { 945 | "name": "stdout", 946 | "output_type": "stream", 947 | "text": [ 948 | "<_sre.SRE_Match object; span=(0, 14), match='(123) 456-7890'>\n" 949 | ] 950 | } 951 | ], 952 | "source": [ 953 | "tele_num_space_paren_dash = '(123) 456-7890' \n", 954 | "p = '\\(?\\d{3}\\)?\\s?\\d{3}\\s?-?\\d{4}' \n", 955 | "m = re.match(pattern=p, string=tele_num_space_paren_dash) \n", 956 | "print(m)" 957 | ] 958 | }, 959 | { 960 | "cell_type": "code", 961 | "execution_count": 52, 962 | "metadata": {}, 963 | "outputs": [ 964 | { 965 | "name": "stdout", 966 | "output_type": "stream", 967 | "text": [ 968 | "<_sre.SRE_Match object; span=(0, 17), match='+1 (123) 456-7890'>\n" 969 | ] 970 | } 971 | ], 972 | "source": [ 973 | "cnty_tele_num_space_paren_dash = '+1 (123) 456-7890' \n", 974 | "p = '\\+?1\\s?\\(?\\d{3}\\)?\\s?\\d{3}\\s?-?\\d{4}' \n", 975 | "m = re.match(pattern=p, string=cnty_tele_num_space_paren_dash) \n", 976 | "print(m)" 977 | ] 978 | }, 979 | { 980 | "cell_type": "markdown", 981 | "metadata": {}, 982 | "source": [ 983 | "# 알아두면 좋아요!\n", 984 | "## compile 메서드로 정규식 메서드 사용하기(200쪽)" 985 | ] 986 | }, 987 | { 988 | "cell_type": "code", 989 | "execution_count": 54, 990 | "metadata": {}, 991 | "outputs": [ 992 | { 993 | "name": "stdout", 994 | "output_type": "stream", 995 | "text": [ 996 | "<_sre.SRE_Match object; span=(0, 10), match='1234567890'>\n" 997 | ] 998 | } 999 | ], 1000 | "source": [ 1001 | "p = re.compile('\\d{10}') \n", 1002 | "s = '1234567890' \n", 1003 | "m = p.match(s) \n", 1004 | "print(m)" 1005 | ] 1006 | } 1007 | ], 1008 | "metadata": { 1009 | "kernelspec": { 1010 | "display_name": "Python 3", 1011 | "language": "python", 1012 | "name": "python3" 1013 | }, 1014 | "language_info": { 1015 | "codemirror_mode": { 1016 | "name": "ipython", 1017 | "version": 3 1018 | }, 1019 | "file_extension": ".py", 1020 | "mimetype": "text/x-python", 1021 | "name": "python", 1022 | "nbconvert_exporter": "python", 1023 | "pygments_lexer": "ipython3", 1024 | "version": "3.6.5" 1025 | } 1026 | }, 1027 | "nbformat": 4, 1028 | "nbformat_minor": 1 1029 | } 1030 | -------------------------------------------------------------------------------- /notebook/.ipynb_checkpoints/10_done-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 직접 해보세요!\n", 8 | "## 제곱 함수와 n 제곱 함수 만들기(202쪽)" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "def my_sq(x):\n", 18 | " return x ** 2" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "def my_exp(x, n):\n", 28 | " return x ** n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "name": "stdout", 38 | "output_type": "stream", 39 | "text": [ 40 | "16\n" 41 | ] 42 | } 43 | ], 44 | "source": [ 45 | "print(my_sq(4))" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 3, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "name": "stdout", 55 | "output_type": "stream", 56 | "text": [ 57 | "16\n" 58 | ] 59 | } 60 | ], 61 | "source": [ 62 | "print(my_exp(2, 4))" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "# 직접 해보세요!\n", 70 | "## 시리즈와 데이터프레임에 apply 메서드 사용하기(203쪽)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "### 1. 시리즈와 apply 메서드" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 5, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "name": "stdout", 87 | "output_type": "stream", 88 | "text": [ 89 | " a b\n", 90 | "0 10 20\n", 91 | "1 20 30\n", 92 | "2 30 40\n" 93 | ] 94 | } 95 | ], 96 | "source": [ 97 | "import pandas as pd\n", 98 | "\n", 99 | "df = pd.DataFrame({'a': [10, 20, 30], 'b': [20, 30, 40]}) \n", 100 | "\n", 101 | "print(df)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 6, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "name": "stdout", 111 | "output_type": "stream", 112 | "text": [ 113 | "0 100\n", 114 | "1 400\n", 115 | "2 900\n", 116 | "Name: a, dtype: int64\n" 117 | ] 118 | } 119 | ], 120 | "source": [ 121 | "print(df['a'] ** 2)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 7, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "0 100\n", 134 | "1 400\n", 135 | "2 900\n", 136 | "Name: a, dtype: int64\n" 137 | ] 138 | } 139 | ], 140 | "source": [ 141 | "sq = df['a'].apply(my_sq) \n", 142 | "print(sq)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 8, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "name": "stdout", 152 | "output_type": "stream", 153 | "text": [ 154 | "0 100\n", 155 | "1 400\n", 156 | "2 900\n", 157 | "Name: a, dtype: int64\n" 158 | ] 159 | } 160 | ], 161 | "source": [ 162 | "ex = df['a'].apply(my_exp, n=2) \n", 163 | "print(ex)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 9, 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "name": "stdout", 173 | "output_type": "stream", 174 | "text": [ 175 | "0 1000\n", 176 | "1 8000\n", 177 | "2 27000\n", 178 | "Name: a, dtype: int64\n" 179 | ] 180 | } 181 | ], 182 | "source": [ 183 | "ex = df['a'].apply(my_exp, n=3) \n", 184 | "print(ex)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "### 5. 데이터 프레임과 apply 메서드" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 5, 197 | "metadata": {}, 198 | "outputs": [ 199 | { 200 | "name": "stdout", 201 | "output_type": "stream", 202 | "text": [ 203 | " a b\n", 204 | "0 10 20\n", 205 | "1 20 30\n", 206 | "2 30 40\n" 207 | ] 208 | } 209 | ], 210 | "source": [ 211 | "df = pd.DataFrame({'a': [10, 20, 30], 'b': [20, 30, 40]}) \n", 212 | "print(df)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 6, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "def print_me(x): \n", 222 | " print(x)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 7, 228 | "metadata": {}, 229 | "outputs": [ 230 | { 231 | "name": "stdout", 232 | "output_type": "stream", 233 | "text": [ 234 | "0 10\n", 235 | "1 20\n", 236 | "2 30\n", 237 | "Name: a, dtype: int64\n", 238 | "0 20\n", 239 | "1 30\n", 240 | "2 40\n", 241 | "Name: b, dtype: int64\n", 242 | "a None\n", 243 | "b None\n", 244 | "dtype: object\n" 245 | ] 246 | } 247 | ], 248 | "source": [ 249 | "print(df.apply(print_me, axis=0))" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 8, 255 | "metadata": {}, 256 | "outputs": [ 257 | { 258 | "name": "stdout", 259 | "output_type": "stream", 260 | "text": [ 261 | "0 10\n", 262 | "1 20\n", 263 | "2 30\n", 264 | "Name: a, dtype: int64\n" 265 | ] 266 | } 267 | ], 268 | "source": [ 269 | "print(df['a'])" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 9, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "name": "stdout", 279 | "output_type": "stream", 280 | "text": [ 281 | "0 20\n", 282 | "1 30\n", 283 | "2 40\n", 284 | "Name: b, dtype: int64\n" 285 | ] 286 | } 287 | ], 288 | "source": [ 289 | "print(df['b'])" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 10, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [ 298 | "def avg_3(x, y, z):\n", 299 | " return (x + y + z) / 3" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 11, 305 | "metadata": {}, 306 | "outputs": [ 307 | { 308 | "ename": "TypeError", 309 | "evalue": "(\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')", 310 | "output_type": "error", 311 | "traceback": [ 312 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 313 | "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", 314 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mavg_3\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 315 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)\u001b[0m\n\u001b[0;32m 6002\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6003\u001b[0m kwds=kwds)\n\u001b[1;32m-> 6004\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 6005\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6006\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 316 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mget_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 316\u001b[0m *self.args, **self.kwds)\n\u001b[0;32m 317\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 318\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mFrameRowApply\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 319\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 320\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mapply_broadcast\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 317 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mget_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 140\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 141\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 142\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 143\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 144\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 318 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 246\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 247\u001b[0m \u001b[1;31m# compute the result using the series generator\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 248\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 249\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 250\u001b[0m \u001b[1;31m# wrap results\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 319 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 275\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 276\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 277\u001b[1;33m \u001b[0mresults\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 278\u001b[0m \u001b[0mkeys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 279\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 320 | "\u001b[1;31mTypeError\u001b[0m: (\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')" 321 | ] 322 | } 323 | ], 324 | "source": [ 325 | "print(df.apply(avg_3))" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 12, 331 | "metadata": {}, 332 | "outputs": [ 333 | { 334 | "name": "stdout", 335 | "output_type": "stream", 336 | "text": [ 337 | "a 20.0\n", 338 | "b 30.0\n", 339 | "dtype: float64\n" 340 | ] 341 | } 342 | ], 343 | "source": [ 344 | "def avg_3_apply(col):\n", 345 | " x = col[0] \n", 346 | " y = col[1] \n", 347 | " z = col[2] \n", 348 | " return (x + y + z) / 3\n", 349 | "\n", 350 | "\n", 351 | "print(df.apply(avg_3_apply))" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 23, 357 | "metadata": {}, 358 | "outputs": [], 359 | "source": [ 360 | "def avg_3_apply(col):\n", 361 | " sum = 0\n", 362 | " for item in col:\n", 363 | " sum += item\n", 364 | " return sum / df.shape[0]" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 31, 370 | "metadata": {}, 371 | "outputs": [], 372 | "source": [ 373 | "def avg_2_apply(row):\n", 374 | " sum = 0\n", 375 | " for item in row:\n", 376 | " sum += item\n", 377 | " return sum / df.shape[1]" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 32, 383 | "metadata": {}, 384 | "outputs": [ 385 | { 386 | "name": "stdout", 387 | "output_type": "stream", 388 | "text": [ 389 | "0 15.0\n", 390 | "1 25.0\n", 391 | "2 35.0\n", 392 | "dtype: float64\n" 393 | ] 394 | } 395 | ], 396 | "source": [ 397 | "print(df.apply(avg_2_apply, axis = 1))" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "# 직접 해보세요!\n", 405 | "## 데이터프레임의 누락값을 처리한 다음 apply 메서드 사용하기(208쪽)" 406 | ] 407 | }, 408 | { 409 | "cell_type": "markdown", 410 | "metadata": {}, 411 | "source": [ 412 | "### 1. 데이터프레임의 누락값 처리하기 ― 열 방향" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": 33, 418 | "metadata": {}, 419 | "outputs": [], 420 | "source": [ 421 | "import seaborn as sns\n", 422 | "\n", 423 | "titanic = sns.load_dataset(\"titanic\")" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 34, 429 | "metadata": {}, 430 | "outputs": [ 431 | { 432 | "name": "stdout", 433 | "output_type": "stream", 434 | "text": [ 435 | "\n", 436 | "RangeIndex: 891 entries, 0 to 890\n", 437 | "Data columns (total 15 columns):\n", 438 | "survived 891 non-null int64\n", 439 | "pclass 891 non-null int64\n", 440 | "sex 891 non-null object\n", 441 | "age 714 non-null float64\n", 442 | "sibsp 891 non-null int64\n", 443 | "parch 891 non-null int64\n", 444 | "fare 891 non-null float64\n", 445 | "embarked 889 non-null object\n", 446 | "class 891 non-null category\n", 447 | "who 891 non-null object\n", 448 | "adult_male 891 non-null bool\n", 449 | "deck 203 non-null category\n", 450 | "embark_town 889 non-null object\n", 451 | "alive 891 non-null object\n", 452 | "alone 891 non-null bool\n", 453 | "dtypes: bool(2), category(2), float64(2), int64(4), object(5)\n", 454 | "memory usage: 80.6+ KB\n", 455 | "None\n" 456 | ] 457 | } 458 | ], 459 | "source": [ 460 | "print(titanic.info())" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": 10, 466 | "metadata": {}, 467 | "outputs": [], 468 | "source": [ 469 | "import numpy as np\n", 470 | "\n", 471 | "def count_missing(vec):\n", 472 | " null_vec = pd.isnull(vec)\n", 473 | " null_count = np.sum(null_vec)\n", 474 | " return null_count" 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": 41, 480 | "metadata": {}, 481 | "outputs": [ 482 | { 483 | "name": "stdout", 484 | "output_type": "stream", 485 | "text": [ 486 | "survived 0\n", 487 | "pclass 0\n", 488 | "sex 0\n", 489 | "age 177\n", 490 | "sibsp 0\n", 491 | "parch 0\n", 492 | "fare 0\n", 493 | "embarked 2\n", 494 | "class 0\n", 495 | "who 0\n", 496 | "adult_male 0\n", 497 | "deck 688\n", 498 | "embark_town 2\n", 499 | "alive 0\n", 500 | "alone 0\n", 501 | "dtype: int64\n" 502 | ] 503 | } 504 | ], 505 | "source": [ 506 | "cmis_col = titanic.apply(count_missing)\n", 507 | "print(cmis_col)" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": 43, 513 | "metadata": {}, 514 | "outputs": [], 515 | "source": [ 516 | "def prop_missing(vec):\n", 517 | " num = count_missing(vec)\n", 518 | " dem = vec.size\n", 519 | " return num / dem" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": 45, 525 | "metadata": {}, 526 | "outputs": [ 527 | { 528 | "name": "stdout", 529 | "output_type": "stream", 530 | "text": [ 531 | "survived 0.000000\n", 532 | "pclass 0.000000\n", 533 | "sex 0.000000\n", 534 | "age 0.198653\n", 535 | "sibsp 0.000000\n", 536 | "parch 0.000000\n", 537 | "fare 0.000000\n", 538 | "embarked 0.002245\n", 539 | "class 0.000000\n", 540 | "who 0.000000\n", 541 | "adult_male 0.000000\n", 542 | "deck 0.772166\n", 543 | "embark_town 0.002245\n", 544 | "alive 0.000000\n", 545 | "alone 0.000000\n", 546 | "dtype: float64\n" 547 | ] 548 | } 549 | ], 550 | "source": [ 551 | "pmis_col = titanic.apply(prop_missing)\n", 552 | "print(pmis_col)" 553 | ] 554 | }, 555 | { 556 | "cell_type": "code", 557 | "execution_count": 46, 558 | "metadata": {}, 559 | "outputs": [], 560 | "source": [ 561 | "def prop_complete(vec):\n", 562 | " return 1 - prop_missing(vec)" 563 | ] 564 | }, 565 | { 566 | "cell_type": "markdown", 567 | "metadata": {}, 568 | "source": [ 569 | "### 8. 데이터프레임의 누락값을 처리하기 ― 행 방뱡" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": 48, 575 | "metadata": {}, 576 | "outputs": [ 577 | { 578 | "name": "stdout", 579 | "output_type": "stream", 580 | "text": [ 581 | "0 1\n", 582 | "1 0\n", 583 | "2 1\n", 584 | "3 0\n", 585 | "4 1\n", 586 | "dtype: int64\n" 587 | ] 588 | } 589 | ], 590 | "source": [ 591 | "cmis_row = titanic.apply(count_missing, axis=1)\n", 592 | "pmis_row = titanic.apply(prop_missing, axis=1)\n", 593 | "pcom_row = titanic.apply(prop_complete, axis=1)\n", 594 | "\n", 595 | "print(cmis_row.head())" 596 | ] 597 | }, 598 | { 599 | "cell_type": "code", 600 | "execution_count": 49, 601 | "metadata": {}, 602 | "outputs": [ 603 | { 604 | "name": "stdout", 605 | "output_type": "stream", 606 | "text": [ 607 | "0 0.066667\n", 608 | "1 0.000000\n", 609 | "2 0.066667\n", 610 | "3 0.000000\n", 611 | "4 0.066667\n", 612 | "dtype: float64\n" 613 | ] 614 | } 615 | ], 616 | "source": [ 617 | "print(pmis_row.head())" 618 | ] 619 | }, 620 | { 621 | "cell_type": "code", 622 | "execution_count": 50, 623 | "metadata": {}, 624 | "outputs": [ 625 | { 626 | "name": "stdout", 627 | "output_type": "stream", 628 | "text": [ 629 | "0 0.933333\n", 630 | "1 1.000000\n", 631 | "2 0.933333\n", 632 | "3 1.000000\n", 633 | "4 0.933333\n", 634 | "dtype: float64\n" 635 | ] 636 | } 637 | ], 638 | "source": [ 639 | "print(pcom_row.head())" 640 | ] 641 | }, 642 | { 643 | "cell_type": "code", 644 | "execution_count": 51, 645 | "metadata": {}, 646 | "outputs": [ 647 | { 648 | "name": "stdout", 649 | "output_type": "stream", 650 | "text": [ 651 | " survived pclass sex age sibsp parch fare embarked class \\\n", 652 | "0 0 3 male 22.0 1 0 7.2500 S Third \n", 653 | "1 1 1 female 38.0 1 0 71.2833 C First \n", 654 | "2 1 3 female 26.0 0 0 7.9250 S Third \n", 655 | "3 1 1 female 35.0 1 0 53.1000 S First \n", 656 | "4 0 3 male 35.0 0 0 8.0500 S Third \n", 657 | "\n", 658 | " who adult_male deck embark_town alive alone num_missing \n", 659 | "0 man True NaN Southampton no False 1 \n", 660 | "1 woman False C Cherbourg yes False 0 \n", 661 | "2 woman False NaN Southampton yes True 1 \n", 662 | "3 woman False C Southampton yes False 0 \n", 663 | "4 man True NaN Southampton no True 1 \n" 664 | ] 665 | } 666 | ], 667 | "source": [ 668 | "titanic['num_missing'] = titanic.apply(count_missing, axis=1)\n", 669 | "\n", 670 | "print(titanic.head())" 671 | ] 672 | }, 673 | { 674 | "cell_type": "code", 675 | "execution_count": 53, 676 | "metadata": {}, 677 | "outputs": [ 678 | { 679 | "name": "stdout", 680 | "output_type": "stream", 681 | "text": [ 682 | " survived pclass sex age sibsp parch fare embarked class \\\n", 683 | "186 1 3 female NaN 1 0 15.5000 Q Third \n", 684 | "274 1 3 female NaN 0 0 7.7500 Q Third \n", 685 | "410 0 3 male NaN 0 0 7.8958 S Third \n", 686 | "547 1 2 male NaN 0 0 13.8625 C Second \n", 687 | "601 0 3 male NaN 0 0 7.8958 S Third \n", 688 | "578 0 3 female NaN 1 0 14.4583 C Third \n", 689 | "76 0 3 male NaN 0 0 7.8958 S Third \n", 690 | "560 0 3 male NaN 0 0 7.7500 Q Third \n", 691 | "511 0 3 male NaN 0 0 8.0500 S Third \n", 692 | "495 0 3 male NaN 0 0 14.4583 C Third \n", 693 | "\n", 694 | " who adult_male deck embark_town alive alone num_missing \n", 695 | "186 woman False NaN Queenstown yes False 2 \n", 696 | "274 woman False NaN Queenstown yes True 2 \n", 697 | "410 man True NaN Southampton no True 2 \n", 698 | "547 man True NaN Cherbourg yes True 2 \n", 699 | "601 man True NaN Southampton no True 2 \n", 700 | "578 woman False NaN Cherbourg no False 2 \n", 701 | "76 man True NaN Southampton no True 2 \n", 702 | "560 man True NaN Queenstown no True 2 \n", 703 | "511 man True NaN Southampton no True 2 \n", 704 | "495 man True NaN Cherbourg no True 2 \n" 705 | ] 706 | } 707 | ], 708 | "source": [ 709 | "print(titanic.loc[titanic.num_missing > 1, :].sample(10))" 710 | ] 711 | } 712 | ], 713 | "metadata": { 714 | "kernelspec": { 715 | "display_name": "Python 3", 716 | "language": "python", 717 | "name": "python3" 718 | }, 719 | "language_info": { 720 | "codemirror_mode": { 721 | "name": "ipython", 722 | "version": 3 723 | }, 724 | "file_extension": ".py", 725 | "mimetype": "text/x-python", 726 | "name": "python", 727 | "nbconvert_exporter": "python", 728 | "pygments_lexer": "ipython3", 729 | "version": "3.6.5" 730 | } 731 | }, 732 | "nbformat": 4, 733 | "nbformat_minor": 1 734 | } 735 | -------------------------------------------------------------------------------- /notebook/.ipynb_checkpoints/Special-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 직접 해보세요!\n", 8 | "## 코드의 성능을 향상시켜 실행 시간 측정하기 ― timeit" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import pandas as pd\n", 18 | "import numpy as np" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "df = pd.DataFrame({'a': [10, 20, 30], 'b': [20, 30, 40]})" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "def avg_2_apply(row):\n", 37 | " x = row[0]\n", 38 | " y = row[1]\n", 39 | " if(x == 20):\n", 40 | " return np.nan\n", 41 | " else:\n", 42 | " return (x + y)/2" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "### 2. 판다스 데이터프레임 ― 실행 시간 측정" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "511 µs ± 5.98 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "%%timeit\n", 67 | "df.apply(avg_2_apply, axis = 1)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "### 3. 넘파이로 벡터화한 함수 사용하기 ― 실행 시간 측정" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "@np.vectorize\n", 84 | "def v_avg_2mod(x, y):\n", 85 | " if(x == 20):\n", 86 | " return (np.NaN)\n", 87 | " else:\n", 88 | " return (x + y) / 2" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 6, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "36 µs ± 1.1 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" 101 | ] 102 | } 103 | ], 104 | "source": [ 105 | "%%timeit\n", 106 | "v_avg_2mod(df['a'], df['b'])" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "### 5. numba 라이브러리로 벡터화한 함수 사용하기 ― 실행 시간 측정" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 7, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "import numba\n", 123 | "\n", 124 | "@numba.vectorize\n", 125 | "def v_avg_2_numba(x, y):\n", 126 | " if(x == 20):\n", 127 | " return (np.NaN)\n", 128 | " else:\n", 129 | " return (x + y) / 2" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 8, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | "4.46 µs ± 47.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n" 142 | ] 143 | } 144 | ], 145 | "source": [ 146 | "%%timeit\n", 147 | "v_avg_2_numba(df['a'].values, df['b'].values)" 148 | ] 149 | } 150 | ], 151 | "metadata": { 152 | "kernelspec": { 153 | "display_name": "Python 3", 154 | "language": "python", 155 | "name": "python3" 156 | }, 157 | "language_info": { 158 | "codemirror_mode": { 159 | "name": "ipython", 160 | "version": 3 161 | }, 162 | "file_extension": ".py", 163 | "mimetype": "text/x-python", 164 | "name": "python", 165 | "nbconvert_exporter": "python", 166 | "pygments_lexer": "ipython3", 167 | "version": "3.6.5" 168 | } 169 | }, 170 | "nbformat": 4, 171 | "nbformat_minor": 2 172 | } 173 | -------------------------------------------------------------------------------- /notebook/.ipynb_checkpoints/hello_jupyter_notebook-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [] 9 | } 10 | ], 11 | "metadata": { 12 | "kernelspec": { 13 | "display_name": "Python 3", 14 | "language": "python", 15 | "name": "python3" 16 | }, 17 | "language_info": { 18 | "codemirror_mode": { 19 | "name": "ipython", 20 | "version": 3 21 | }, 22 | "file_extension": ".py", 23 | "mimetype": "text/x-python", 24 | "name": "python", 25 | "nbconvert_exporter": "python", 26 | "pygments_lexer": "ipython3", 27 | "version": "3.6.5" 28 | } 29 | }, 30 | "nbformat": 4, 31 | "nbformat_minor": 2 32 | } 33 | -------------------------------------------------------------------------------- /notebook/02_practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 갭마인더 데이터 집합 불러오기" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# 불러온 데이터 집합 살펴보기" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# 열 단위로 데이터 추출하기" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "# loc 속성으로 행 단위 데이터 추출하기" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "# tail과 loc는 조금 달라요!" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "# iloc 속성으로 행 단위 데이터 추출하기" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "# 파이썬 슬라이싱 구문을 조합하여 원하는 데이터 추출하기" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "# iloc 속성과 range 메서드로 원하는 데이터 추출하기" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "# 열 지정값에 파이썬 슬라이싱을 사용하여 원하는 데이터 추출하기" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "# loc, iloc 자유자재로 사용하기" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "# 그룹화한 데이터의 평균 구하기" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "# 그룹화한 데이터의 개수 세어보기" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "# 그래프 만들기" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [] 184 | } 185 | ], 186 | "metadata": { 187 | "kernelspec": { 188 | "display_name": "Python 3", 189 | "language": "python", 190 | "name": "python3" 191 | }, 192 | "language_info": { 193 | "codemirror_mode": { 194 | "name": "ipython", 195 | "version": 3 196 | }, 197 | "file_extension": ".py", 198 | "mimetype": "text/x-python", 199 | "name": "python", 200 | "nbconvert_exporter": "python", 201 | "pygments_lexer": "ipython3", 202 | "version": "3.6.5" 203 | } 204 | }, 205 | "nbformat": 4, 206 | "nbformat_minor": 2 207 | } 208 | -------------------------------------------------------------------------------- /notebook/03_practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 시리즈 만들기" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# 데이터프레임 만들기" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# 데이터프레임에서 시리즈 선택하기" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "# index, values, keys 사용하기" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "# 시리즈의 mean, min, max, std 메서드 사용하기" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "# 시리즈와 불린 추출 사용하기" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "# 시리즈와 브로드캐스팅" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "# 데이터프레임과 불린 추출" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "# 데이터프레임과 브로드캐스팅" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "# 열의 자료형 바꾸기와 새로운 열 추가하기" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "# 시리즈, 데이터프레임의 데이터 섞어보기" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "# 데이터프레임의 열 삭제하기" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "# 피클 형식으로 저장하기" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "# CSV 불러오기" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [] 198 | } 199 | ], 200 | "metadata": { 201 | "kernelspec": { 202 | "display_name": "Python 3", 203 | "language": "python", 204 | "name": "python3" 205 | }, 206 | "language_info": { 207 | "codemirror_mode": { 208 | "name": "ipython", 209 | "version": 3 210 | }, 211 | "file_extension": ".py", 212 | "mimetype": "text/x-python", 213 | "name": "python", 214 | "nbconvert_exporter": "python", 215 | "pygments_lexer": "ipython3", 216 | "version": "3.6.5" 217 | } 218 | }, 219 | "nbformat": 4, 220 | "nbformat_minor": 2 221 | } 222 | -------------------------------------------------------------------------------- /notebook/04_practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 앤스콤 데이터 집합 불러오기" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# matplotlib 라이브러리로 간단한 그래프 그리기" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# 한 번에 4개의 그래프 그리기" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "# 기초 그래프 그리기 - 히스토그램, 산점도, 박스 그래프" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "# 단변량 그래프 그리기 - 히스토그램" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "# 다변량 그래프 그리기" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "# 데이터프레임과 시리즈로 그래프 그리기" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "# 알아두면 좋아요" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [] 114 | } 115 | ], 116 | "metadata": { 117 | "kernelspec": { 118 | "display_name": "Python 3", 119 | "language": "python", 120 | "name": "python3" 121 | }, 122 | "language_info": { 123 | "codemirror_mode": { 124 | "name": "ipython", 125 | "version": 3 126 | }, 127 | "file_extension": ".py", 128 | "mimetype": "text/x-python", 129 | "name": "python", 130 | "nbconvert_exporter": "python", 131 | "pygments_lexer": "ipython3", 132 | "version": "3.6.5" 133 | } 134 | }, 135 | "nbformat": 4, 136 | "nbformat_minor": 2 137 | } 138 | -------------------------------------------------------------------------------- /notebook/05_practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# concat 메서드 사용하기" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# 데이터프레임에 시리즈 연결하기" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# 행 1개로 구성된 데이터프레임 생성하여 연결하기" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "# 다양한 방법으로 데이터 연결하기" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "# 공통 열과 공통 인덱스만 연결하기" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "# merge 메서드 사용하기" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [] 86 | } 87 | ], 88 | "metadata": { 89 | "kernelspec": { 90 | "display_name": "Python 3", 91 | "language": "python", 92 | "name": "python3" 93 | }, 94 | "language_info": { 95 | "codemirror_mode": { 96 | "name": "ipython", 97 | "version": 3 98 | }, 99 | "file_extension": ".py", 100 | "mimetype": "text/x-python", 101 | "name": "python", 102 | "nbconvert_exporter": "python", 103 | "pygments_lexer": "ipython3", 104 | "version": "3.6.5" 105 | } 106 | }, 107 | "nbformat": 4, 108 | "nbformat_minor": 2 109 | } 110 | -------------------------------------------------------------------------------- /notebook/06_practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 누락값 확인하기" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# 누락값을 포함한 데이터를 불러올 때" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# 데이터 집합을 연결할 때 누락값이 발생하는 경우" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "# 데이터를 입력할 때 누락값이 발생하는 경우" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "# 인덱스를 다시 만들 때 누락값이 발생하는 경우" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "# 누락값의 개수 구하기" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "# 누락값을 다른 값으로 변경하기" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "# 누락값 삭제하기" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "# 누락값이 포함된 데이터 계산하기" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [] 128 | } 129 | ], 130 | "metadata": { 131 | "kernelspec": { 132 | "display_name": "Python 3", 133 | "language": "python", 134 | "name": "python3" 135 | }, 136 | "language_info": { 137 | "codemirror_mode": { 138 | "name": "ipython", 139 | "version": 3 140 | }, 141 | "file_extension": ".py", 142 | "mimetype": "text/x-python", 143 | "name": "python", 144 | "nbconvert_exporter": "python", 145 | "pygments_lexer": "ipython3", 146 | "version": "3.6.5" 147 | } 148 | }, 149 | "nbformat": 4, 150 | "nbformat_minor": 2 151 | } 152 | -------------------------------------------------------------------------------- /notebook/07_practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 1개의 열만 고정하고 나머지 열을 행으로 바꾸기" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# 2개 이상의 열을 고정하고 나머지 열을 행으로 바꾸기" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": { 34 | "scrolled": true 35 | }, 36 | "source": [ 37 | "# ebola 데이터 집합 살펴보기" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "scrolled": true 45 | }, 46 | "outputs": [], 47 | "source": [] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "# 열 이름 나누고 데이터 프레임에 추가하기" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "# concat 메서드를 응용하여 데이터프레임에 열 추가하기" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "# 기상 데이터의 여러 열을 하나로 정리하기" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "# 빌보드 차트의 중복 데이터 처리하기" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "# 뉴욕 택시 데이터 준비" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "# 반복문으로 데이터 준비하기" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [] 132 | } 133 | ], 134 | "metadata": { 135 | "kernelspec": { 136 | "display_name": "Python 3", 137 | "language": "python", 138 | "name": "python3" 139 | }, 140 | "language_info": { 141 | "codemirror_mode": { 142 | "name": "ipython", 143 | "version": 3 144 | }, 145 | "file_extension": ".py", 146 | "mimetype": "text/x-python", 147 | "name": "python", 148 | "nbconvert_exporter": "python", 149 | "pygments_lexer": "ipython3", 150 | "version": "3.6.5" 151 | } 152 | }, 153 | "nbformat": 4, 154 | "nbformat_minor": 2 155 | } 156 | -------------------------------------------------------------------------------- /notebook/08_done.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 직접 해보세요!\n", 8 | "## 자료형을 자유자재로 변환하기 ─ astype 메서드(172쪽)" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import pandas as pd\n", 18 | "import seaborn as sns\n", 19 | "\n", 20 | "tips = sns.load_dataset(\"tips\")" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "### 2. 여러 가지 자료형을 문자열로 변환하기" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | "total_bill float64\n", 40 | "tip float64\n", 41 | "sex category\n", 42 | "smoker category\n", 43 | "day category\n", 44 | "time category\n", 45 | "size int64\n", 46 | "sex_str object\n", 47 | "dtype: object\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "tips['sex_str'] = tips['sex'].astype(str)\n", 53 | "print(tips.dtypes)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "### 4. 자료형을 변환한 데이터 다시 원래대로 만들기" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 4, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "name": "stdout", 70 | "output_type": "stream", 71 | "text": [ 72 | "total_bill object\n", 73 | "tip float64\n", 74 | "sex category\n", 75 | "smoker category\n", 76 | "day category\n", 77 | "time category\n", 78 | "size int64\n", 79 | "sex_str object\n", 80 | "dtype: object\n" 81 | ] 82 | } 83 | ], 84 | "source": [ 85 | "tips['total_bill'] = tips['total_bill'].astype(str) \n", 86 | "print(tips.dtypes)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 5, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "name": "stdout", 96 | "output_type": "stream", 97 | "text": [ 98 | "total_bill float64\n", 99 | "tip float64\n", 100 | "sex category\n", 101 | "smoker category\n", 102 | "day category\n", 103 | "time category\n", 104 | "size int64\n", 105 | "sex_str object\n", 106 | "dtype: object\n" 107 | ] 108 | } 109 | ], 110 | "source": [ 111 | "tips['total_bill'] = tips['total_bill'].astype(float) \n", 112 | "print(tips.dtypes)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "# 직접 해보세요!\n", 120 | "## 잘못 입력한 문자열 처리하기 ─ to_numeric 메서드(174쪽)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 6, 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "name": "stdout", 130 | "output_type": "stream", 131 | "text": [ 132 | " total_bill tip sex smoker day time size sex_str\n", 133 | "0 16.99 1.01 Female No Sun Dinner 2 Female\n", 134 | "1 missing 1.66 Male No Sun Dinner 3 Male\n", 135 | "2 21.01 3.50 Male No Sun Dinner 3 Male\n", 136 | "3 missing 3.31 Male No Sun Dinner 2 Male\n", 137 | "4 24.59 3.61 Female No Sun Dinner 4 Female\n", 138 | "5 missing 4.71 Male No Sun Dinner 4 Male\n", 139 | "6 8.77 2.00 Male No Sun Dinner 2 Male\n", 140 | "7 missing 3.12 Male No Sun Dinner 4 Male\n", 141 | "8 15.04 1.96 Male No Sun Dinner 2 Male\n", 142 | "9 14.78 3.23 Male No Sun Dinner 2 Male\n" 143 | ] 144 | }, 145 | { 146 | "name": "stderr", 147 | "output_type": "stream", 148 | "text": [ 149 | "C:\\Users\\phk70\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:543: SettingWithCopyWarning: \n", 150 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 151 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 152 | "\n", 153 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 154 | " self.obj[item] = s\n" 155 | ] 156 | } 157 | ], 158 | "source": [ 159 | "tips_sub_miss = tips.head(10)\n", 160 | "tips_sub_miss.loc[[1, 3, 5, 7], 'total_bill'] = 'missing'\n", 161 | "\n", 162 | "print(tips_sub_miss)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 8, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "name": "stdout", 172 | "output_type": "stream", 173 | "text": [ 174 | "total_bill object\n", 175 | "tip float64\n", 176 | "sex category\n", 177 | "smoker category\n", 178 | "day category\n", 179 | "time category\n", 180 | "size int64\n", 181 | "sex_str object\n", 182 | "dtype: object\n" 183 | ] 184 | } 185 | ], 186 | "source": [ 187 | "print(tips_sub_miss.dtypes)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 9, 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "ename": "ValueError", 197 | "evalue": "could not convert string to float: 'missing'", 198 | "output_type": "error", 199 | "traceback": [ 200 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 201 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", 202 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtips_sub_miss\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'total_bill'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfloat\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 203 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\util\\_decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 175\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 176\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mnew_arg_name\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnew_arg_value\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 177\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 178\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 179\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0m_deprecate_kwarg\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 204 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36mastype\u001b[1;34m(self, dtype, copy, errors, **kwargs)\u001b[0m\n\u001b[0;32m 4995\u001b[0m \u001b[1;31m# else, only a single dtype is given\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4996\u001b[0m new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,\n\u001b[1;32m-> 4997\u001b[1;33m **kwargs)\n\u001b[0m\u001b[0;32m 4998\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnew_data\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4999\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 205 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mastype\u001b[1;34m(self, dtype, **kwargs)\u001b[0m\n\u001b[0;32m 3712\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3713\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3714\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'astype'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3715\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3716\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mconvert\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 206 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)\u001b[0m\n\u001b[0;32m 3579\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3580\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'mgr'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3581\u001b[1;33m \u001b[0mapplied\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3582\u001b[0m \u001b[0mresult_blocks\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_extend_blocks\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mapplied\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresult_blocks\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3583\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 207 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mastype\u001b[1;34m(self, dtype, copy, errors, values, **kwargs)\u001b[0m\n\u001b[0;32m 573\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'raise'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 574\u001b[0m return self._astype(dtype, copy=copy, errors=errors, values=values,\n\u001b[1;32m--> 575\u001b[1;33m **kwargs)\n\u001b[0m\u001b[0;32m 576\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 577\u001b[0m def _astype(self, dtype, copy=False, errors='raise', values=None,\n", 208 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36m_astype\u001b[1;34m(self, dtype, copy, errors, values, klass, mgr, **kwargs)\u001b[0m\n\u001b[0;32m 662\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 663\u001b[0m \u001b[1;31m# _astype_nansafe works fine with 1-d only\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 664\u001b[1;33m \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mastype_nansafe\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 665\u001b[0m \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalues\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 666\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 209 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\dtypes\\cast.py\u001b[0m in \u001b[0;36mastype_nansafe\u001b[1;34m(arr, dtype, copy)\u001b[0m\n\u001b[0;32m 728\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 729\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 730\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0marr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 731\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0marr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mview\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 732\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 210 | "\u001b[1;31mValueError\u001b[0m: could not convert string to float: 'missing'" 211 | ] 212 | } 213 | ], 214 | "source": [ 215 | "tips_sub_miss['total_bill'].astype(float)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 10, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "ename": "ValueError", 225 | "evalue": "Unable to parse string \"missing\" at position 1", 226 | "output_type": "error", 227 | "traceback": [ 228 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 229 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", 230 | "\u001b[1;32mpandas/_libs/src\\inference.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[1;34m()\u001b[0m\n", 231 | "\u001b[1;31mValueError\u001b[0m: Unable to parse string \"missing\"", 232 | "\nDuring handling of the above exception, another exception occurred:\n", 233 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", 234 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_numeric\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtips_sub_miss\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'total_bill'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 235 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\tools\\numeric.py\u001b[0m in \u001b[0;36mto_numeric\u001b[1;34m(arg, errors, downcast)\u001b[0m\n\u001b[0;32m 131\u001b[0m \u001b[0mcoerce_numeric\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mFalse\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;34m'ignore'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'raise'\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32melse\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 132\u001b[0m values = lib.maybe_convert_numeric(values, set(),\n\u001b[1;32m--> 133\u001b[1;33m coerce_numeric=coerce_numeric)\n\u001b[0m\u001b[0;32m 134\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 135\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 236 | "\u001b[1;32mpandas/_libs/src\\inference.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.maybe_convert_numeric\u001b[1;34m()\u001b[0m\n", 237 | "\u001b[1;31mValueError\u001b[0m: Unable to parse string \"missing\" at position 1" 238 | ] 239 | } 240 | ], 241 | "source": [ 242 | "pd.to_numeric(tips_sub_miss['total_bill'])" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 11, 248 | "metadata": {}, 249 | "outputs": [ 250 | { 251 | "name": "stdout", 252 | "output_type": "stream", 253 | "text": [ 254 | "total_bill object\n", 255 | "tip float64\n", 256 | "sex category\n", 257 | "smoker category\n", 258 | "day category\n", 259 | "time category\n", 260 | "size int64\n", 261 | "sex_str object\n", 262 | "dtype: object\n" 263 | ] 264 | }, 265 | { 266 | "name": "stderr", 267 | "output_type": "stream", 268 | "text": [ 269 | "C:\\Users\\fermat39\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 270 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 271 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 272 | "\n", 273 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 274 | " \"\"\"Entry point for launching an IPython kernel.\n" 275 | ] 276 | } 277 | ], 278 | "source": [ 279 | "tips_sub_miss['total_bill'] = pd.to_numeric( tips_sub_miss['total_bill'], errors='ignore')\n", 280 | "\n", 281 | "print(tips_sub_miss.dtypes)" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 12, 287 | "metadata": {}, 288 | "outputs": [ 289 | { 290 | "name": "stdout", 291 | "output_type": "stream", 292 | "text": [ 293 | "total_bill float64\n", 294 | "tip float64\n", 295 | "sex category\n", 296 | "smoker category\n", 297 | "day category\n", 298 | "time category\n", 299 | "size int64\n", 300 | "sex_str object\n", 301 | "dtype: object\n" 302 | ] 303 | }, 304 | { 305 | "name": "stderr", 306 | "output_type": "stream", 307 | "text": [ 308 | "C:\\Users\\fermat39\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 309 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 310 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 311 | "\n", 312 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 313 | " \"\"\"Entry point for launching an IPython kernel.\n" 314 | ] 315 | } 316 | ], 317 | "source": [ 318 | "tips_sub_miss['total_bill'] = pd.to_numeric( tips_sub_miss['total_bill'], errors='coerce')\n", 319 | "\n", 320 | "print(tips_sub_miss.dtypes)" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 13, 326 | "metadata": {}, 327 | "outputs": [ 328 | { 329 | "name": "stdout", 330 | "output_type": "stream", 331 | "text": [ 332 | "total_bill float32\n", 333 | "tip float64\n", 334 | "sex category\n", 335 | "smoker category\n", 336 | "day category\n", 337 | "time category\n", 338 | "size int64\n", 339 | "sex_str object\n", 340 | "dtype: object\n" 341 | ] 342 | }, 343 | { 344 | "name": "stderr", 345 | "output_type": "stream", 346 | "text": [ 347 | "C:\\Users\\fermat39\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n", 348 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 349 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 350 | "\n", 351 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 352 | " \"\"\"Entry point for launching an IPython kernel.\n" 353 | ] 354 | } 355 | ], 356 | "source": [ 357 | "tips_sub_miss['total_bill'] = pd.to_numeric( tips_sub_miss['total_bill'], errors='coerce', downcast='float')\n", 358 | "\n", 359 | "print(tips_sub_miss.dtypes)" 360 | ] 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "metadata": {}, 365 | "source": [ 366 | "# 직접 해보세요!\n", 367 | "## 문자열을 카테고리로 변환하기(179쪽)" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 14, 373 | "metadata": {}, 374 | "outputs": [ 375 | { 376 | "name": "stdout", 377 | "output_type": "stream", 378 | "text": [ 379 | "\n", 380 | "RangeIndex: 244 entries, 0 to 243\n", 381 | "Data columns (total 8 columns):\n", 382 | "total_bill 244 non-null float64\n", 383 | "tip 244 non-null float64\n", 384 | "sex 244 non-null object\n", 385 | "smoker 244 non-null category\n", 386 | "day 244 non-null category\n", 387 | "time 244 non-null category\n", 388 | "size 244 non-null int64\n", 389 | "sex_str 244 non-null object\n", 390 | "dtypes: category(3), float64(2), int64(1), object(2)\n", 391 | "memory usage: 10.7+ KB\n", 392 | "None\n" 393 | ] 394 | } 395 | ], 396 | "source": [ 397 | "tips['sex'] = tips['sex'].astype('str') \n", 398 | "print(tips.info())" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 15, 404 | "metadata": {}, 405 | "outputs": [ 406 | { 407 | "name": "stdout", 408 | "output_type": "stream", 409 | "text": [ 410 | "\n", 411 | "RangeIndex: 244 entries, 0 to 243\n", 412 | "Data columns (total 8 columns):\n", 413 | "total_bill 244 non-null float64\n", 414 | "tip 244 non-null float64\n", 415 | "sex 244 non-null category\n", 416 | "smoker 244 non-null category\n", 417 | "day 244 non-null category\n", 418 | "time 244 non-null category\n", 419 | "size 244 non-null int64\n", 420 | "sex_str 244 non-null object\n", 421 | "dtypes: category(4), float64(2), int64(1), object(1)\n", 422 | "memory usage: 9.1+ KB\n", 423 | "None\n" 424 | ] 425 | } 426 | ], 427 | "source": [ 428 | "tips['sex'] = tips['sex'].astype('category') \n", 429 | "print(tips.info())" 430 | ] 431 | } 432 | ], 433 | "metadata": { 434 | "kernelspec": { 435 | "display_name": "Python 3", 436 | "language": "python", 437 | "name": "python3" 438 | }, 439 | "language_info": { 440 | "codemirror_mode": { 441 | "name": "ipython", 442 | "version": 3 443 | }, 444 | "file_extension": ".py", 445 | "mimetype": "text/x-python", 446 | "name": "python", 447 | "nbconvert_exporter": "python", 448 | "pygments_lexer": "ipython3", 449 | "version": "3.6.5" 450 | } 451 | }, 452 | "nbformat": 4, 453 | "nbformat_minor": 1 454 | } 455 | -------------------------------------------------------------------------------- /notebook/08_practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# dtypes 속성으로 데이터프레임의 자료형 살펴보기" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# 여러 가지 자료형을 문자열로 변환하기" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# 자료형을 변환한 데이터 다시 원래대로 만들기" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "# 잘못 입력한 문자열 처리하기" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "# 문자열을 카테고리로 변환하기" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [] 72 | } 73 | ], 74 | "metadata": { 75 | "kernelspec": { 76 | "display_name": "Python 3", 77 | "language": "python", 78 | "name": "python3" 79 | }, 80 | "language_info": { 81 | "codemirror_mode": { 82 | "name": "ipython", 83 | "version": 3 84 | }, 85 | "file_extension": ".py", 86 | "mimetype": "text/x-python", 87 | "name": "python", 88 | "nbconvert_exporter": "python", 89 | "pygments_lexer": "ipython3", 90 | "version": "3.6.5" 91 | } 92 | }, 93 | "nbformat": 4, 94 | "nbformat_minor": 1 95 | } 96 | -------------------------------------------------------------------------------- /notebook/09_done.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 직접 해보세요!\n", 8 | "## 문자열 추출하기(183쪽)" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "word = 'grail'\n", 18 | "sent = 'a scratch'" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "name": "stdout", 28 | "output_type": "stream", 29 | "text": [ 30 | "g\n" 31 | ] 32 | } 33 | ], 34 | "source": [ 35 | "print(word[0])" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "name": "stdout", 45 | "output_type": "stream", 46 | "text": [ 47 | "a\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "print(sent[0])" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 4, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "name": "stdout", 62 | "output_type": "stream", 63 | "text": [ 64 | "gra\n" 65 | ] 66 | } 67 | ], 68 | "source": [ 69 | "print(word[0:3])" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "---" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 5, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "name": "stdout", 86 | "output_type": "stream", 87 | "text": [ 88 | "h\n" 89 | ] 90 | } 91 | ], 92 | "source": [ 93 | "print(sent[-1])" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 6, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "name": "stdout", 103 | "output_type": "stream", 104 | "text": [ 105 | "a\n" 106 | ] 107 | } 108 | ], 109 | "source": [ 110 | "print(sent[-9:-8])" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 7, 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "name": "stdout", 120 | "output_type": "stream", 121 | "text": [ 122 | "a\n" 123 | ] 124 | } 125 | ], 126 | "source": [ 127 | "print(sent[0:-8])" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "# 알아두면 좋아요!\n", 135 | "## 전체 문자열을 추출할 때 음수를 사용하면 안 됩니다(184쪽)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 8, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "name": "stdout", 145 | "output_type": "stream", 146 | "text": [ 147 | "scratc\n" 148 | ] 149 | } 150 | ], 151 | "source": [ 152 | "print(sent[2:-1])" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 11, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | "scratc\n" 165 | ] 166 | } 167 | ], 168 | "source": [ 169 | "print(sent[-7:-1])" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 12, 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "name": "stdout", 179 | "output_type": "stream", 180 | "text": [ 181 | "9\n" 182 | ] 183 | } 184 | ], 185 | "source": [ 186 | "s_len = len(sent)\n", 187 | "print(s_len)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 13, 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "name": "stdout", 197 | "output_type": "stream", 198 | "text": [ 199 | "scratch\n" 200 | ] 201 | } 202 | ], 203 | "source": [ 204 | "print(sent[2:s_len])" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "# 직접 해보세요!\n", 212 | "## 왼쪽이나 오른쪽 범위를 지정하지 않고 문자열 추출하기(185쪽)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 14, 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "name": "stdout", 222 | "output_type": "stream", 223 | "text": [ 224 | "gra\n" 225 | ] 226 | } 227 | ], 228 | "source": [ 229 | "print(word[0:3])" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 15, 235 | "metadata": {}, 236 | "outputs": [ 237 | { 238 | "name": "stdout", 239 | "output_type": "stream", 240 | "text": [ 241 | "gra\n" 242 | ] 243 | } 244 | ], 245 | "source": [ 246 | "print(word[ :3])" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 16, 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "name": "stdout", 256 | "output_type": "stream", 257 | "text": [ 258 | "scratch\n" 259 | ] 260 | } 261 | ], 262 | "source": [ 263 | "print(sent[2:len(sent)])" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 17, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "name": "stdout", 273 | "output_type": "stream", 274 | "text": [ 275 | "scratch\n" 276 | ] 277 | } 278 | ], 279 | "source": [ 280 | "print(sent[2: ])" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 18, 286 | "metadata": {}, 287 | "outputs": [ 288 | { 289 | "name": "stdout", 290 | "output_type": "stream", 291 | "text": [ 292 | "a scratch\n" 293 | ] 294 | } 295 | ], 296 | "source": [ 297 | "print(sent[ : ])" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 19, 303 | "metadata": {}, 304 | "outputs": [ 305 | { 306 | "name": "stdout", 307 | "output_type": "stream", 308 | "text": [ 309 | "asrth\n" 310 | ] 311 | } 312 | ], 313 | "source": [ 314 | "print(sent[::2])" 315 | ] 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "metadata": {}, 320 | "source": [ 321 | "# 직접 해보세요!\n", 322 | "## join, splitlines, replace 메서드 실습하기(188쪽)" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": {}, 328 | "source": [ 329 | "### 1. join 메서드" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 20, 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "d1 = '40°' \n", 339 | "m1 = \"46'\" \n", 340 | "s1 = '52.837\"' \n", 341 | "u1 = 'N'\n", 342 | "\n", 343 | "d2 = '73°' \n", 344 | "m2 = \"58'\" \n", 345 | "s2 = '26.302\"' \n", 346 | "u2 = 'W'" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": 21, 352 | "metadata": {}, 353 | "outputs": [ 354 | { 355 | "name": "stdout", 356 | "output_type": "stream", 357 | "text": [ 358 | "40° 46' 52.837\" N 73° 58' 26.302\" W\n" 359 | ] 360 | } 361 | ], 362 | "source": [ 363 | "coords = ' '.join([d1, m1, s1, u1, d2, m2, s2, u2])\n", 364 | "print(coords)" 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": {}, 370 | "source": [ 371 | "### 2. splitlines 메서드" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 22, 377 | "metadata": {}, 378 | "outputs": [ 379 | { 380 | "name": "stdout", 381 | "output_type": "stream", 382 | "text": [ 383 | "Guard: What? Ridden on a horse?\n", 384 | "King Arthur: Yes!\n", 385 | "Guard: You're using coconuts!\n", 386 | "King Arthur: What?\n", 387 | "Guard: You've got ... coconut[s] and you're bangin' 'em together. \n", 388 | "\n" 389 | ] 390 | } 391 | ], 392 | "source": [ 393 | "multi_str = \"\"\"Guard: What? Ridden on a horse?\n", 394 | "King Arthur: Yes!\n", 395 | "Guard: You're using coconuts!\n", 396 | "King Arthur: What?\n", 397 | "Guard: You've got ... coconut[s] and you're bangin' 'em together. \n", 398 | "\"\"\" \n", 399 | "print(multi_str)" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 23, 405 | "metadata": {}, 406 | "outputs": [ 407 | { 408 | "name": "stdout", 409 | "output_type": "stream", 410 | "text": [ 411 | "['Guard: What? Ridden on a horse?', 'King Arthur: Yes!', \"Guard: You're using coconuts!\", 'King Arthur: What?', \"Guard: You've got ... coconut[s] and you're bangin' 'em together. \"]\n" 412 | ] 413 | } 414 | ], 415 | "source": [ 416 | "multi_str_split = multi_str.splitlines() \n", 417 | "print(multi_str_split)" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": 24, 423 | "metadata": {}, 424 | "outputs": [ 425 | { 426 | "name": "stdout", 427 | "output_type": "stream", 428 | "text": [ 429 | "['Guard: What? Ridden on a horse?', \"Guard: You're using coconuts!\", \"Guard: You've got ... coconut[s] and you're bangin' 'em together. \"]\n" 430 | ] 431 | } 432 | ], 433 | "source": [ 434 | "guard = multi_str_split[::2] \n", 435 | "print(guard)" 436 | ] 437 | }, 438 | { 439 | "cell_type": "markdown", 440 | "metadata": {}, 441 | "source": [ 442 | "### 4. replace 메서드" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 25, 448 | "metadata": {}, 449 | "outputs": [ 450 | { 451 | "name": "stdout", 452 | "output_type": "stream", 453 | "text": [ 454 | "['What? Ridden on a horse?', \"You're using coconuts!\", \"You've got ... coconut[s] and you're bangin' 'em together. \"]\n" 455 | ] 456 | } 457 | ], 458 | "source": [ 459 | "guard = multi_str.replace(\"Guard: \", \"\").splitlines()[::2] \n", 460 | "print(guard)" 461 | ] 462 | }, 463 | { 464 | "cell_type": "markdown", 465 | "metadata": {}, 466 | "source": [ 467 | "# 직접 해보세요!\n", 468 | "## 문자열 포매팅하기(190쪽)" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": 26, 474 | "metadata": {}, 475 | "outputs": [ 476 | { 477 | "name": "stdout", 478 | "output_type": "stream", 479 | "text": [ 480 | "It's just a flesh wound!\n" 481 | ] 482 | } 483 | ], 484 | "source": [ 485 | "var = 'flesh wound' \n", 486 | "s = \"It's just a {}!\"\n", 487 | "\n", 488 | "print(s.format(var))" 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "execution_count": 27, 494 | "metadata": {}, 495 | "outputs": [ 496 | { 497 | "name": "stdout", 498 | "output_type": "stream", 499 | "text": [ 500 | "It's just a scratch!\n" 501 | ] 502 | } 503 | ], 504 | "source": [ 505 | "print(s.format('scratch'))" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": 28, 511 | "metadata": {}, 512 | "outputs": [ 513 | { 514 | "name": "stdout", 515 | "output_type": "stream", 516 | "text": [ 517 | "Black Knight: 'Tis but a scratch.\n", 518 | "King Arthur: A scratch? Your arm's off!\n", 519 | "\n" 520 | ] 521 | } 522 | ], 523 | "source": [ 524 | "s = \"\"\"Black Knight: 'Tis but a {0}.\n", 525 | "King Arthur: A {0}? Your arm's off!\n", 526 | "\"\"\" \n", 527 | "print(s.format('scratch'))" 528 | ] 529 | }, 530 | { 531 | "cell_type": "code", 532 | "execution_count": 29, 533 | "metadata": {}, 534 | "outputs": [ 535 | { 536 | "name": "stdout", 537 | "output_type": "stream", 538 | "text": [ 539 | "Hayden Planetarium Coordinates: 40.7815° N, 73.9733° W\n" 540 | ] 541 | } 542 | ], 543 | "source": [ 544 | "s = 'Hayden Planetarium Coordinates: {lat}, {lon}' \n", 545 | "print(s.format(lat='40.7815° N', lon='73.9733° W'))" 546 | ] 547 | }, 548 | { 549 | "cell_type": "markdown", 550 | "metadata": {}, 551 | "source": [ 552 | "# 직접 해보세요!\n", 553 | "## 숫자 데이터 포매팅하기(191쪽)" 554 | ] 555 | }, 556 | { 557 | "cell_type": "code", 558 | "execution_count": 30, 559 | "metadata": {}, 560 | "outputs": [ 561 | { 562 | "name": "stdout", 563 | "output_type": "stream", 564 | "text": [ 565 | "Some digits of pi: 3.14159265359\n" 566 | ] 567 | } 568 | ], 569 | "source": [ 570 | "print('Some digits of pi: {}'.format(3.14159265359))" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": 31, 576 | "metadata": {}, 577 | "outputs": [ 578 | { 579 | "name": "stdout", 580 | "output_type": "stream", 581 | "text": [ 582 | "In 2005, Lu Chao of China recited 67,890 digits of pi\n" 583 | ] 584 | } 585 | ], 586 | "source": [ 587 | "print(\"In 2005, Lu Chao of China recited {:,} digits of pi\".format(67890))" 588 | ] 589 | }, 590 | { 591 | "cell_type": "code", 592 | "execution_count": 32, 593 | "metadata": {}, 594 | "outputs": [ 595 | { 596 | "name": "stdout", 597 | "output_type": "stream", 598 | "text": [ 599 | "I remember 0.0001031 or 0.0103% of what Lu Chao recited\n" 600 | ] 601 | } 602 | ], 603 | "source": [ 604 | "print(\"I remember {0:.4} or {0:.4%} of what Lu Chao recited\".format(7/67890))" 605 | ] 606 | }, 607 | { 608 | "cell_type": "code", 609 | "execution_count": 33, 610 | "metadata": {}, 611 | "outputs": [ 612 | { 613 | "name": "stdout", 614 | "output_type": "stream", 615 | "text": [ 616 | "My ID number is 00042\n" 617 | ] 618 | } 619 | ], 620 | "source": [ 621 | "print(\"My ID number is {0:05d}\".format(42))" 622 | ] 623 | }, 624 | { 625 | "cell_type": "markdown", 626 | "metadata": {}, 627 | "source": [ 628 | "# 직접 해보세요!\n", 629 | "## % 연산자로 포매팅하기(192쪽)" 630 | ] 631 | }, 632 | { 633 | "cell_type": "code", 634 | "execution_count": 34, 635 | "metadata": {}, 636 | "outputs": [ 637 | { 638 | "name": "stdout", 639 | "output_type": "stream", 640 | "text": [ 641 | "I only know 7 digits of pi\n" 642 | ] 643 | } 644 | ], 645 | "source": [ 646 | "s = 'I only know %d digits of pi' % 7 \n", 647 | "print(s)" 648 | ] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "execution_count": 35, 653 | "metadata": {}, 654 | "outputs": [ 655 | { 656 | "name": "stdout", 657 | "output_type": "stream", 658 | "text": [ 659 | "Some digits of e: 2.72\n" 660 | ] 661 | } 662 | ], 663 | "source": [ 664 | "print('Some digits of %(cont)s: %(value).2f' % {'cont': 'e', 'value': 2.718})" 665 | ] 666 | }, 667 | { 668 | "cell_type": "markdown", 669 | "metadata": {}, 670 | "source": [ 671 | "# 알아두면 좋아요!\n", 672 | "## f-strings로 포매팅 사용하기(193쪽)" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": 36, 678 | "metadata": {}, 679 | "outputs": [ 680 | { 681 | "name": "stdout", 682 | "output_type": "stream", 683 | "text": [ 684 | "It's just a flesh wound!\n" 685 | ] 686 | } 687 | ], 688 | "source": [ 689 | "var = 'flesh wound' \n", 690 | "s = f\"It's just a {var}!\" \n", 691 | "print(s)" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": 37, 697 | "metadata": {}, 698 | "outputs": [ 699 | { 700 | "name": "stdout", 701 | "output_type": "stream", 702 | "text": [ 703 | "Hayden Planetarium Coordinates: 40.7815°N, 73.9733°W\n" 704 | ] 705 | } 706 | ], 707 | "source": [ 708 | "lat='40.7815°N' \n", 709 | "lon='73.9733°W' \n", 710 | "s = f'Hayden Planetarium Coordinates: {lat}, {lon}' \n", 711 | "print(s)" 712 | ] 713 | }, 714 | { 715 | "cell_type": "markdown", 716 | "metadata": {}, 717 | "source": [ 718 | "# 직접 해보세요!\n", 719 | "## 정규식으로 전화번호 패턴 찾기(196쪽)" 720 | ] 721 | }, 722 | { 723 | "cell_type": "code", 724 | "execution_count": 38, 725 | "metadata": {}, 726 | "outputs": [], 727 | "source": [ 728 | "import re\n", 729 | "\n", 730 | "tele_num = '1234567890'" 731 | ] 732 | }, 733 | { 734 | "cell_type": "code", 735 | "execution_count": 39, 736 | "metadata": {}, 737 | "outputs": [ 738 | { 739 | "name": "stdout", 740 | "output_type": "stream", 741 | "text": [ 742 | "\n" 743 | ] 744 | } 745 | ], 746 | "source": [ 747 | "m = re.match(pattern='\\d\\d\\d\\d\\d\\d\\d\\d\\d\\d', string=tele_num) \n", 748 | "print(type(m))" 749 | ] 750 | }, 751 | { 752 | "cell_type": "code", 753 | "execution_count": 40, 754 | "metadata": {}, 755 | "outputs": [ 756 | { 757 | "name": "stdout", 758 | "output_type": "stream", 759 | "text": [ 760 | "<_sre.SRE_Match object; span=(0, 10), match='1234567890'>\n" 761 | ] 762 | } 763 | ], 764 | "source": [ 765 | "print(m)" 766 | ] 767 | }, 768 | { 769 | "cell_type": "code", 770 | "execution_count": 41, 771 | "metadata": {}, 772 | "outputs": [ 773 | { 774 | "name": "stdout", 775 | "output_type": "stream", 776 | "text": [ 777 | "True\n" 778 | ] 779 | } 780 | ], 781 | "source": [ 782 | "print(bool(m))" 783 | ] 784 | }, 785 | { 786 | "cell_type": "code", 787 | "execution_count": 42, 788 | "metadata": {}, 789 | "outputs": [ 790 | { 791 | "name": "stdout", 792 | "output_type": "stream", 793 | "text": [ 794 | "match\n" 795 | ] 796 | } 797 | ], 798 | "source": [ 799 | "if m:\n", 800 | " print('match') \n", 801 | "else:\n", 802 | " print('no match')" 803 | ] 804 | }, 805 | { 806 | "cell_type": "code", 807 | "execution_count": 43, 808 | "metadata": {}, 809 | "outputs": [ 810 | { 811 | "name": "stdout", 812 | "output_type": "stream", 813 | "text": [ 814 | "0\n" 815 | ] 816 | } 817 | ], 818 | "source": [ 819 | "print(m.start())" 820 | ] 821 | }, 822 | { 823 | "cell_type": "code", 824 | "execution_count": 44, 825 | "metadata": {}, 826 | "outputs": [ 827 | { 828 | "name": "stdout", 829 | "output_type": "stream", 830 | "text": [ 831 | "10\n" 832 | ] 833 | } 834 | ], 835 | "source": [ 836 | "print(m.end())" 837 | ] 838 | }, 839 | { 840 | "cell_type": "code", 841 | "execution_count": 45, 842 | "metadata": {}, 843 | "outputs": [ 844 | { 845 | "name": "stdout", 846 | "output_type": "stream", 847 | "text": [ 848 | "(0, 10)\n" 849 | ] 850 | } 851 | ], 852 | "source": [ 853 | "print(m.span())" 854 | ] 855 | }, 856 | { 857 | "cell_type": "code", 858 | "execution_count": 46, 859 | "metadata": {}, 860 | "outputs": [ 861 | { 862 | "name": "stdout", 863 | "output_type": "stream", 864 | "text": [ 865 | "1234567890\n" 866 | ] 867 | } 868 | ], 869 | "source": [ 870 | "print(m.group())" 871 | ] 872 | }, 873 | { 874 | "cell_type": "code", 875 | "execution_count": 47, 876 | "metadata": {}, 877 | "outputs": [], 878 | "source": [ 879 | "tele_num_spaces = '123 456 7890'" 880 | ] 881 | }, 882 | { 883 | "cell_type": "code", 884 | "execution_count": 48, 885 | "metadata": {}, 886 | "outputs": [ 887 | { 888 | "name": "stdout", 889 | "output_type": "stream", 890 | "text": [ 891 | "None\n" 892 | ] 893 | } 894 | ], 895 | "source": [ 896 | "m = re.match(pattern='\\d{10}', string=tele_num_spaces) \n", 897 | "print(m)" 898 | ] 899 | }, 900 | { 901 | "cell_type": "code", 902 | "execution_count": 49, 903 | "metadata": {}, 904 | "outputs": [ 905 | { 906 | "name": "stdout", 907 | "output_type": "stream", 908 | "text": [ 909 | "no match\n" 910 | ] 911 | } 912 | ], 913 | "source": [ 914 | "if m:\n", 915 | " print('match') \n", 916 | "else:\n", 917 | " print('no match')" 918 | ] 919 | }, 920 | { 921 | "cell_type": "code", 922 | "execution_count": 50, 923 | "metadata": {}, 924 | "outputs": [ 925 | { 926 | "name": "stdout", 927 | "output_type": "stream", 928 | "text": [ 929 | "<_sre.SRE_Match object; span=(0, 12), match='123 456 7890'>\n" 930 | ] 931 | } 932 | ], 933 | "source": [ 934 | "p = '\\d{3}\\s?\\d{3}\\s?\\d{4}' \n", 935 | "m = re.match(pattern=p, string=tele_num_spaces) \n", 936 | "print(m)" 937 | ] 938 | }, 939 | { 940 | "cell_type": "code", 941 | "execution_count": 51, 942 | "metadata": {}, 943 | "outputs": [ 944 | { 945 | "name": "stdout", 946 | "output_type": "stream", 947 | "text": [ 948 | "<_sre.SRE_Match object; span=(0, 14), match='(123) 456-7890'>\n" 949 | ] 950 | } 951 | ], 952 | "source": [ 953 | "tele_num_space_paren_dash = '(123) 456-7890' \n", 954 | "p = '\\(?\\d{3}\\)?\\s?\\d{3}\\s?-?\\d{4}' \n", 955 | "m = re.match(pattern=p, string=tele_num_space_paren_dash) \n", 956 | "print(m)" 957 | ] 958 | }, 959 | { 960 | "cell_type": "code", 961 | "execution_count": 52, 962 | "metadata": {}, 963 | "outputs": [ 964 | { 965 | "name": "stdout", 966 | "output_type": "stream", 967 | "text": [ 968 | "<_sre.SRE_Match object; span=(0, 17), match='+1 (123) 456-7890'>\n" 969 | ] 970 | } 971 | ], 972 | "source": [ 973 | "cnty_tele_num_space_paren_dash = '+1 (123) 456-7890' \n", 974 | "p = '\\+?1\\s?\\(?\\d{3}\\)?\\s?\\d{3}\\s?-?\\d{4}' \n", 975 | "m = re.match(pattern=p, string=cnty_tele_num_space_paren_dash) \n", 976 | "print(m)" 977 | ] 978 | }, 979 | { 980 | "cell_type": "markdown", 981 | "metadata": {}, 982 | "source": [ 983 | "# 알아두면 좋아요!\n", 984 | "## compile 메서드로 정규식 메서드 사용하기(200쪽)" 985 | ] 986 | }, 987 | { 988 | "cell_type": "code", 989 | "execution_count": 54, 990 | "metadata": {}, 991 | "outputs": [ 992 | { 993 | "name": "stdout", 994 | "output_type": "stream", 995 | "text": [ 996 | "<_sre.SRE_Match object; span=(0, 10), match='1234567890'>\n" 997 | ] 998 | } 999 | ], 1000 | "source": [ 1001 | "p = re.compile('\\d{10}') \n", 1002 | "s = '1234567890' \n", 1003 | "m = p.match(s) \n", 1004 | "print(m)" 1005 | ] 1006 | } 1007 | ], 1008 | "metadata": { 1009 | "kernelspec": { 1010 | "display_name": "Python 3", 1011 | "language": "python", 1012 | "name": "python3" 1013 | }, 1014 | "language_info": { 1015 | "codemirror_mode": { 1016 | "name": "ipython", 1017 | "version": 3 1018 | }, 1019 | "file_extension": ".py", 1020 | "mimetype": "text/x-python", 1021 | "name": "python", 1022 | "nbconvert_exporter": "python", 1023 | "pygments_lexer": "ipython3", 1024 | "version": "3.6.5" 1025 | } 1026 | }, 1027 | "nbformat": 4, 1028 | "nbformat_minor": 1 1029 | } 1030 | -------------------------------------------------------------------------------- /notebook/09_practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 문자열 추출하기" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# 전체 문자열 추출하기" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# 왼쪽이나 오른쪽 범위를 지정하지 않고 문자열 추출하기" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "# join, splitlines, replace 메서드 실습하기" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "# 문자열 포매팅 실습하기" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "# 수치값 포매팅 실습" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "# % 연산자로 포매팅하기" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "# f-strings로 포매팅 사용하기" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "# 정규식으로 전화번호 패턴 찾기" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "# compile 메서드로 정규식 메서드 사용하기" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [] 142 | } 143 | ], 144 | "metadata": { 145 | "kernelspec": { 146 | "display_name": "Python 3", 147 | "language": "python", 148 | "name": "python3" 149 | }, 150 | "language_info": { 151 | "codemirror_mode": { 152 | "name": "ipython", 153 | "version": 3 154 | }, 155 | "file_extension": ".py", 156 | "mimetype": "text/x-python", 157 | "name": "python", 158 | "nbconvert_exporter": "python", 159 | "pygments_lexer": "ipython3", 160 | "version": "3.6.5" 161 | } 162 | }, 163 | "nbformat": 4, 164 | "nbformat_minor": 1 165 | } 166 | -------------------------------------------------------------------------------- /notebook/10_done.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 직접 해보세요!\n", 8 | "## 제곱 함수와 n 제곱 함수 만들기(202쪽)" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "def my_sq(x):\n", 18 | " return x ** 2" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "def my_exp(x, n):\n", 28 | " return x ** n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "name": "stdout", 38 | "output_type": "stream", 39 | "text": [ 40 | "16\n" 41 | ] 42 | } 43 | ], 44 | "source": [ 45 | "print(my_sq(4))" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 3, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "name": "stdout", 55 | "output_type": "stream", 56 | "text": [ 57 | "16\n" 58 | ] 59 | } 60 | ], 61 | "source": [ 62 | "print(my_exp(2, 4))" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "# 직접 해보세요!\n", 70 | "## 시리즈와 데이터프레임에 apply 메서드 사용하기(203쪽)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "### 1. 시리즈와 apply 메서드" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 5, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "name": "stdout", 87 | "output_type": "stream", 88 | "text": [ 89 | " a b\n", 90 | "0 10 20\n", 91 | "1 20 30\n", 92 | "2 30 40\n" 93 | ] 94 | } 95 | ], 96 | "source": [ 97 | "import pandas as pd\n", 98 | "\n", 99 | "df = pd.DataFrame({'a': [10, 20, 30], 'b': [20, 30, 40]}) \n", 100 | "\n", 101 | "print(df)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 6, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "name": "stdout", 111 | "output_type": "stream", 112 | "text": [ 113 | "0 100\n", 114 | "1 400\n", 115 | "2 900\n", 116 | "Name: a, dtype: int64\n" 117 | ] 118 | } 119 | ], 120 | "source": [ 121 | "print(df['a'] ** 2)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 7, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "0 100\n", 134 | "1 400\n", 135 | "2 900\n", 136 | "Name: a, dtype: int64\n" 137 | ] 138 | } 139 | ], 140 | "source": [ 141 | "sq = df['a'].apply(my_sq) \n", 142 | "print(sq)" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 8, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "name": "stdout", 152 | "output_type": "stream", 153 | "text": [ 154 | "0 100\n", 155 | "1 400\n", 156 | "2 900\n", 157 | "Name: a, dtype: int64\n" 158 | ] 159 | } 160 | ], 161 | "source": [ 162 | "ex = df['a'].apply(my_exp, n=2) \n", 163 | "print(ex)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 9, 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "name": "stdout", 173 | "output_type": "stream", 174 | "text": [ 175 | "0 1000\n", 176 | "1 8000\n", 177 | "2 27000\n", 178 | "Name: a, dtype: int64\n" 179 | ] 180 | } 181 | ], 182 | "source": [ 183 | "ex = df['a'].apply(my_exp, n=3) \n", 184 | "print(ex)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "### 5. 데이터 프레임과 apply 메서드" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 5, 197 | "metadata": {}, 198 | "outputs": [ 199 | { 200 | "name": "stdout", 201 | "output_type": "stream", 202 | "text": [ 203 | " a b\n", 204 | "0 10 20\n", 205 | "1 20 30\n", 206 | "2 30 40\n" 207 | ] 208 | } 209 | ], 210 | "source": [ 211 | "df = pd.DataFrame({'a': [10, 20, 30], 'b': [20, 30, 40]}) \n", 212 | "print(df)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 6, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "def print_me(x): \n", 222 | " print(x)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 7, 228 | "metadata": {}, 229 | "outputs": [ 230 | { 231 | "name": "stdout", 232 | "output_type": "stream", 233 | "text": [ 234 | "0 10\n", 235 | "1 20\n", 236 | "2 30\n", 237 | "Name: a, dtype: int64\n", 238 | "0 20\n", 239 | "1 30\n", 240 | "2 40\n", 241 | "Name: b, dtype: int64\n", 242 | "a None\n", 243 | "b None\n", 244 | "dtype: object\n" 245 | ] 246 | } 247 | ], 248 | "source": [ 249 | "print(df.apply(print_me, axis=0))" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 8, 255 | "metadata": {}, 256 | "outputs": [ 257 | { 258 | "name": "stdout", 259 | "output_type": "stream", 260 | "text": [ 261 | "0 10\n", 262 | "1 20\n", 263 | "2 30\n", 264 | "Name: a, dtype: int64\n" 265 | ] 266 | } 267 | ], 268 | "source": [ 269 | "print(df['a'])" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 9, 275 | "metadata": {}, 276 | "outputs": [ 277 | { 278 | "name": "stdout", 279 | "output_type": "stream", 280 | "text": [ 281 | "0 20\n", 282 | "1 30\n", 283 | "2 40\n", 284 | "Name: b, dtype: int64\n" 285 | ] 286 | } 287 | ], 288 | "source": [ 289 | "print(df['b'])" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 10, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [ 298 | "def avg_3(x, y, z):\n", 299 | " return (x + y + z) / 3" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 11, 305 | "metadata": {}, 306 | "outputs": [ 307 | { 308 | "ename": "TypeError", 309 | "evalue": "(\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')", 310 | "output_type": "error", 311 | "traceback": [ 312 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 313 | "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", 314 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mavg_3\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 315 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)\u001b[0m\n\u001b[0;32m 6002\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6003\u001b[0m kwds=kwds)\n\u001b[1;32m-> 6004\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 6005\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6006\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 316 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mget_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 316\u001b[0m *self.args, **self.kwds)\n\u001b[0;32m 317\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 318\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mFrameRowApply\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 319\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 320\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mapply_broadcast\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 317 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mget_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 140\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 141\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 142\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 143\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 144\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 318 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 246\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 247\u001b[0m \u001b[1;31m# compute the result using the series generator\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 248\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 249\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 250\u001b[0m \u001b[1;31m# wrap results\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 319 | "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 275\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 276\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 277\u001b[1;33m \u001b[0mresults\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 278\u001b[0m \u001b[0mkeys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 279\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 320 | "\u001b[1;31mTypeError\u001b[0m: (\"avg_3() missing 2 required positional arguments: 'y' and 'z'\", 'occurred at index a')" 321 | ] 322 | } 323 | ], 324 | "source": [ 325 | "print(df.apply(avg_3))" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 12, 331 | "metadata": {}, 332 | "outputs": [ 333 | { 334 | "name": "stdout", 335 | "output_type": "stream", 336 | "text": [ 337 | "a 20.0\n", 338 | "b 30.0\n", 339 | "dtype: float64\n" 340 | ] 341 | } 342 | ], 343 | "source": [ 344 | "def avg_3_apply(col):\n", 345 | " x = col[0] \n", 346 | " y = col[1] \n", 347 | " z = col[2] \n", 348 | " return (x + y + z) / 3\n", 349 | "\n", 350 | "\n", 351 | "print(df.apply(avg_3_apply))" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 23, 357 | "metadata": {}, 358 | "outputs": [], 359 | "source": [ 360 | "def avg_3_apply(col):\n", 361 | " sum = 0\n", 362 | " for item in col:\n", 363 | " sum += item\n", 364 | " return sum / df.shape[0]" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 31, 370 | "metadata": {}, 371 | "outputs": [], 372 | "source": [ 373 | "def avg_2_apply(row):\n", 374 | " sum = 0\n", 375 | " for item in row:\n", 376 | " sum += item\n", 377 | " return sum / df.shape[1]" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 32, 383 | "metadata": {}, 384 | "outputs": [ 385 | { 386 | "name": "stdout", 387 | "output_type": "stream", 388 | "text": [ 389 | "0 15.0\n", 390 | "1 25.0\n", 391 | "2 35.0\n", 392 | "dtype: float64\n" 393 | ] 394 | } 395 | ], 396 | "source": [ 397 | "print(df.apply(avg_2_apply, axis = 1))" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "# 직접 해보세요!\n", 405 | "## 데이터프레임의 누락값을 처리한 다음 apply 메서드 사용하기(208쪽)" 406 | ] 407 | }, 408 | { 409 | "cell_type": "markdown", 410 | "metadata": {}, 411 | "source": [ 412 | "### 1. 데이터프레임의 누락값 처리하기 ― 열 방향" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": 33, 418 | "metadata": {}, 419 | "outputs": [], 420 | "source": [ 421 | "import seaborn as sns\n", 422 | "\n", 423 | "titanic = sns.load_dataset(\"titanic\")" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 34, 429 | "metadata": {}, 430 | "outputs": [ 431 | { 432 | "name": "stdout", 433 | "output_type": "stream", 434 | "text": [ 435 | "\n", 436 | "RangeIndex: 891 entries, 0 to 890\n", 437 | "Data columns (total 15 columns):\n", 438 | "survived 891 non-null int64\n", 439 | "pclass 891 non-null int64\n", 440 | "sex 891 non-null object\n", 441 | "age 714 non-null float64\n", 442 | "sibsp 891 non-null int64\n", 443 | "parch 891 non-null int64\n", 444 | "fare 891 non-null float64\n", 445 | "embarked 889 non-null object\n", 446 | "class 891 non-null category\n", 447 | "who 891 non-null object\n", 448 | "adult_male 891 non-null bool\n", 449 | "deck 203 non-null category\n", 450 | "embark_town 889 non-null object\n", 451 | "alive 891 non-null object\n", 452 | "alone 891 non-null bool\n", 453 | "dtypes: bool(2), category(2), float64(2), int64(4), object(5)\n", 454 | "memory usage: 80.6+ KB\n", 455 | "None\n" 456 | ] 457 | } 458 | ], 459 | "source": [ 460 | "print(titanic.info())" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": 10, 466 | "metadata": {}, 467 | "outputs": [], 468 | "source": [ 469 | "import numpy as np\n", 470 | "\n", 471 | "def count_missing(vec):\n", 472 | " null_vec = pd.isnull(vec)\n", 473 | " null_count = np.sum(null_vec)\n", 474 | " return null_count" 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": 41, 480 | "metadata": {}, 481 | "outputs": [ 482 | { 483 | "name": "stdout", 484 | "output_type": "stream", 485 | "text": [ 486 | "survived 0\n", 487 | "pclass 0\n", 488 | "sex 0\n", 489 | "age 177\n", 490 | "sibsp 0\n", 491 | "parch 0\n", 492 | "fare 0\n", 493 | "embarked 2\n", 494 | "class 0\n", 495 | "who 0\n", 496 | "adult_male 0\n", 497 | "deck 688\n", 498 | "embark_town 2\n", 499 | "alive 0\n", 500 | "alone 0\n", 501 | "dtype: int64\n" 502 | ] 503 | } 504 | ], 505 | "source": [ 506 | "cmis_col = titanic.apply(count_missing)\n", 507 | "print(cmis_col)" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": 43, 513 | "metadata": {}, 514 | "outputs": [], 515 | "source": [ 516 | "def prop_missing(vec):\n", 517 | " num = count_missing(vec)\n", 518 | " dem = vec.size\n", 519 | " return num / dem" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": 45, 525 | "metadata": {}, 526 | "outputs": [ 527 | { 528 | "name": "stdout", 529 | "output_type": "stream", 530 | "text": [ 531 | "survived 0.000000\n", 532 | "pclass 0.000000\n", 533 | "sex 0.000000\n", 534 | "age 0.198653\n", 535 | "sibsp 0.000000\n", 536 | "parch 0.000000\n", 537 | "fare 0.000000\n", 538 | "embarked 0.002245\n", 539 | "class 0.000000\n", 540 | "who 0.000000\n", 541 | "adult_male 0.000000\n", 542 | "deck 0.772166\n", 543 | "embark_town 0.002245\n", 544 | "alive 0.000000\n", 545 | "alone 0.000000\n", 546 | "dtype: float64\n" 547 | ] 548 | } 549 | ], 550 | "source": [ 551 | "pmis_col = titanic.apply(prop_missing)\n", 552 | "print(pmis_col)" 553 | ] 554 | }, 555 | { 556 | "cell_type": "code", 557 | "execution_count": 46, 558 | "metadata": {}, 559 | "outputs": [], 560 | "source": [ 561 | "def prop_complete(vec):\n", 562 | " return 1 - prop_missing(vec)" 563 | ] 564 | }, 565 | { 566 | "cell_type": "markdown", 567 | "metadata": {}, 568 | "source": [ 569 | "### 8. 데이터프레임의 누락값을 처리하기 ― 행 방뱡" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": 48, 575 | "metadata": {}, 576 | "outputs": [ 577 | { 578 | "name": "stdout", 579 | "output_type": "stream", 580 | "text": [ 581 | "0 1\n", 582 | "1 0\n", 583 | "2 1\n", 584 | "3 0\n", 585 | "4 1\n", 586 | "dtype: int64\n" 587 | ] 588 | } 589 | ], 590 | "source": [ 591 | "cmis_row = titanic.apply(count_missing, axis=1)\n", 592 | "pmis_row = titanic.apply(prop_missing, axis=1)\n", 593 | "pcom_row = titanic.apply(prop_complete, axis=1)\n", 594 | "\n", 595 | "print(cmis_row.head())" 596 | ] 597 | }, 598 | { 599 | "cell_type": "code", 600 | "execution_count": 49, 601 | "metadata": {}, 602 | "outputs": [ 603 | { 604 | "name": "stdout", 605 | "output_type": "stream", 606 | "text": [ 607 | "0 0.066667\n", 608 | "1 0.000000\n", 609 | "2 0.066667\n", 610 | "3 0.000000\n", 611 | "4 0.066667\n", 612 | "dtype: float64\n" 613 | ] 614 | } 615 | ], 616 | "source": [ 617 | "print(pmis_row.head())" 618 | ] 619 | }, 620 | { 621 | "cell_type": "code", 622 | "execution_count": 50, 623 | "metadata": {}, 624 | "outputs": [ 625 | { 626 | "name": "stdout", 627 | "output_type": "stream", 628 | "text": [ 629 | "0 0.933333\n", 630 | "1 1.000000\n", 631 | "2 0.933333\n", 632 | "3 1.000000\n", 633 | "4 0.933333\n", 634 | "dtype: float64\n" 635 | ] 636 | } 637 | ], 638 | "source": [ 639 | "print(pcom_row.head())" 640 | ] 641 | }, 642 | { 643 | "cell_type": "code", 644 | "execution_count": 51, 645 | "metadata": {}, 646 | "outputs": [ 647 | { 648 | "name": "stdout", 649 | "output_type": "stream", 650 | "text": [ 651 | " survived pclass sex age sibsp parch fare embarked class \\\n", 652 | "0 0 3 male 22.0 1 0 7.2500 S Third \n", 653 | "1 1 1 female 38.0 1 0 71.2833 C First \n", 654 | "2 1 3 female 26.0 0 0 7.9250 S Third \n", 655 | "3 1 1 female 35.0 1 0 53.1000 S First \n", 656 | "4 0 3 male 35.0 0 0 8.0500 S Third \n", 657 | "\n", 658 | " who adult_male deck embark_town alive alone num_missing \n", 659 | "0 man True NaN Southampton no False 1 \n", 660 | "1 woman False C Cherbourg yes False 0 \n", 661 | "2 woman False NaN Southampton yes True 1 \n", 662 | "3 woman False C Southampton yes False 0 \n", 663 | "4 man True NaN Southampton no True 1 \n" 664 | ] 665 | } 666 | ], 667 | "source": [ 668 | "titanic['num_missing'] = titanic.apply(count_missing, axis=1)\n", 669 | "\n", 670 | "print(titanic.head())" 671 | ] 672 | }, 673 | { 674 | "cell_type": "code", 675 | "execution_count": 53, 676 | "metadata": {}, 677 | "outputs": [ 678 | { 679 | "name": "stdout", 680 | "output_type": "stream", 681 | "text": [ 682 | " survived pclass sex age sibsp parch fare embarked class \\\n", 683 | "186 1 3 female NaN 1 0 15.5000 Q Third \n", 684 | "274 1 3 female NaN 0 0 7.7500 Q Third \n", 685 | "410 0 3 male NaN 0 0 7.8958 S Third \n", 686 | "547 1 2 male NaN 0 0 13.8625 C Second \n", 687 | "601 0 3 male NaN 0 0 7.8958 S Third \n", 688 | "578 0 3 female NaN 1 0 14.4583 C Third \n", 689 | "76 0 3 male NaN 0 0 7.8958 S Third \n", 690 | "560 0 3 male NaN 0 0 7.7500 Q Third \n", 691 | "511 0 3 male NaN 0 0 8.0500 S Third \n", 692 | "495 0 3 male NaN 0 0 14.4583 C Third \n", 693 | "\n", 694 | " who adult_male deck embark_town alive alone num_missing \n", 695 | "186 woman False NaN Queenstown yes False 2 \n", 696 | "274 woman False NaN Queenstown yes True 2 \n", 697 | "410 man True NaN Southampton no True 2 \n", 698 | "547 man True NaN Cherbourg yes True 2 \n", 699 | "601 man True NaN Southampton no True 2 \n", 700 | "578 woman False NaN Cherbourg no False 2 \n", 701 | "76 man True NaN Southampton no True 2 \n", 702 | "560 man True NaN Queenstown no True 2 \n", 703 | "511 man True NaN Southampton no True 2 \n", 704 | "495 man True NaN Cherbourg no True 2 \n" 705 | ] 706 | } 707 | ], 708 | "source": [ 709 | "print(titanic.loc[titanic.num_missing > 1, :].sample(10))" 710 | ] 711 | } 712 | ], 713 | "metadata": { 714 | "kernelspec": { 715 | "display_name": "Python 3", 716 | "language": "python", 717 | "name": "python3" 718 | }, 719 | "language_info": { 720 | "codemirror_mode": { 721 | "name": "ipython", 722 | "version": 3 723 | }, 724 | "file_extension": ".py", 725 | "mimetype": "text/x-python", 726 | "name": "python", 727 | "nbconvert_exporter": "python", 728 | "pygments_lexer": "ipython3", 729 | "version": "3.6.5" 730 | } 731 | }, 732 | "nbformat": 4, 733 | "nbformat_minor": 1 734 | } 735 | -------------------------------------------------------------------------------- /notebook/10_practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 제곱 함수와 n 제곱 함수 만들기" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# 시리즈와 apply 메서드" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# 데이터 프레임과 apply 메서드" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "# 데이터프레임의 누락값을 처리한 다음 apply 메서드 사용하기 - 열 방향" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "# 데이터프레임의 누락값을 처리한 다음 apply 메서드 사용하기 - 행 방뱡" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [] 72 | } 73 | ], 74 | "metadata": { 75 | "kernelspec": { 76 | "display_name": "Python 3", 77 | "language": "python", 78 | "name": "python3" 79 | }, 80 | "language_info": { 81 | "codemirror_mode": { 82 | "name": "ipython", 83 | "version": 3 84 | }, 85 | "file_extension": ".py", 86 | "mimetype": "text/x-python", 87 | "name": "python", 88 | "nbconvert_exporter": "python", 89 | "pygments_lexer": "ipython3", 90 | "version": "3.6.5" 91 | } 92 | }, 93 | "nbformat": 4, 94 | "nbformat_minor": 1 95 | } 96 | -------------------------------------------------------------------------------- /notebook/11_practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# groupby 메서드로 평균값 구하기" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# 분할-반영-결합 과정 살펴보기" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# 평균값을 구하는 사용자 함수와 groupby 메서드" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "# 두 개의 인잣값을 받아 처리하는 사용자 함수와 groupby 메서드" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "# 집계 메서드를 리스트, 딕셔너리에 담아 전달하기" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "# 표준 점수 계산하기" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "# 누락값을 평균값으로 처리하기" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "# 데이터 필터링 사용하기 ─ filter 메서드" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "# 그룹 오브젝트 저장하여 살펴보기" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "# 그룹 오브젝트의 평균 구하기" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "# 그룹 오브젝트에서 데이터 추출하고 반복하기" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "# 그룹 오브젝트 계산하고 살펴보기" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [] 170 | } 171 | ], 172 | "metadata": { 173 | "kernelspec": { 174 | "display_name": "Python 3", 175 | "language": "python", 176 | "name": "python3" 177 | }, 178 | "language_info": { 179 | "codemirror_mode": { 180 | "name": "ipython", 181 | "version": 3 182 | }, 183 | "file_extension": ".py", 184 | "mimetype": "text/x-python", 185 | "name": "python", 186 | "nbconvert_exporter": "python", 187 | "pygments_lexer": "ipython3", 188 | "version": "3.6.5" 189 | } 190 | }, 191 | "nbformat": 4, 192 | "nbformat_minor": 1 193 | } 194 | -------------------------------------------------------------------------------- /notebook/12_practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# datetime 오브젝트 사용하기" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# 문자열을 datetime 오브젝트로 변환하기" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "# 시간 데이터를 잘라내고 싶어요" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "# datetime 오브젝트로 변환하려는 열을 지정하여 데이터 집합 불러오기" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "# datetime 오브젝트에서 날짜 정보 추출하기" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "# dt 접근자로 시간 데이터 정리하기" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "# 에볼라 최초 발생일 계산해보기" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "# 파산한 은행의 개수 계산하기" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "# 테슬라 주식 데이터로 시간 계산하기" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "# datetime 오브젝트를 인덱스로 설정하여 데이터 추출하기" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "# 시간 간격을 인덱스로 설정하여 데이터 추출하기" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "# 시간 범위 생성하여 인덱스로 지정하기" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "# 시간 범위의 주기 설정하기" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "# 에볼라 발병 시간 비교하기" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [] 198 | } 199 | ], 200 | "metadata": { 201 | "kernelspec": { 202 | "display_name": "Python 3", 203 | "language": "python", 204 | "name": "python3" 205 | }, 206 | "language_info": { 207 | "codemirror_mode": { 208 | "name": "ipython", 209 | "version": 3 210 | }, 211 | "file_extension": ".py", 212 | "mimetype": "text/x-python", 213 | "name": "python", 214 | "nbconvert_exporter": "python", 215 | "pygments_lexer": "ipython3", 216 | "version": "3.6.5" 217 | } 218 | }, 219 | "nbformat": 4, 220 | "nbformat_minor": 1 221 | } 222 | -------------------------------------------------------------------------------- /notebook/Special.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 직접 해보세요!\n", 8 | "## 코드의 성능을 향상시켜 실행 시간 측정하기 ― timeit" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import pandas as pd\n", 18 | "import numpy as np" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "df = pd.DataFrame({'a': [10, 20, 30], 'b': [20, 30, 40]})" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "def avg_2_apply(row):\n", 37 | " x = row[0]\n", 38 | " y = row[1]\n", 39 | " if(x == 20):\n", 40 | " return np.nan\n", 41 | " else:\n", 42 | " return (x + y)/2" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "### 2. 판다스 데이터프레임 ― 실행 시간 측정" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "511 µs ± 5.98 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "%%timeit\n", 67 | "df.apply(avg_2_apply, axis = 1)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "### 3. 넘파이로 벡터화한 함수 사용하기 ― 실행 시간 측정" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "@np.vectorize\n", 84 | "def v_avg_2mod(x, y):\n", 85 | " if(x == 20):\n", 86 | " return (np.NaN)\n", 87 | " else:\n", 88 | " return (x + y) / 2" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 6, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "36 µs ± 1.1 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" 101 | ] 102 | } 103 | ], 104 | "source": [ 105 | "%%timeit\n", 106 | "v_avg_2mod(df['a'], df['b'])" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "### 5. numba 라이브러리로 벡터화한 함수 사용하기 ― 실행 시간 측정" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 7, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "import numba\n", 123 | "\n", 124 | "@numba.vectorize\n", 125 | "def v_avg_2_numba(x, y):\n", 126 | " if(x == 20):\n", 127 | " return (np.NaN)\n", 128 | " else:\n", 129 | " return (x + y) / 2" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 8, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | "4.46 µs ± 47.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n" 142 | ] 143 | } 144 | ], 145 | "source": [ 146 | "%%timeit\n", 147 | "v_avg_2_numba(df['a'].values, df['b'].values)" 148 | ] 149 | } 150 | ], 151 | "metadata": { 152 | "kernelspec": { 153 | "display_name": "Python 3", 154 | "language": "python", 155 | "name": "python3" 156 | }, 157 | "language_info": { 158 | "codemirror_mode": { 159 | "name": "ipython", 160 | "version": 3 161 | }, 162 | "file_extension": ".py", 163 | "mimetype": "text/x-python", 164 | "name": "python", 165 | "nbconvert_exporter": "python", 166 | "pygments_lexer": "ipython3", 167 | "version": "3.6.5" 168 | } 169 | }, 170 | "nbformat": 4, 171 | "nbformat_minor": 2 172 | } 173 | -------------------------------------------------------------------------------- /notebook/hello_jupyter_notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [] 9 | } 10 | ], 11 | "metadata": { 12 | "kernelspec": { 13 | "display_name": "Python 3", 14 | "language": "python", 15 | "name": "python3" 16 | }, 17 | "language_info": { 18 | "codemirror_mode": { 19 | "name": "ipython", 20 | "version": 3 21 | }, 22 | "file_extension": ".py", 23 | "mimetype": "text/x-python", 24 | "name": "python", 25 | "nbconvert_exporter": "python", 26 | "pygments_lexer": "ipython3", 27 | "version": "3.6.5" 28 | } 29 | }, 30 | "nbformat": 4, 31 | "nbformat_minor": 2 32 | } 33 | -------------------------------------------------------------------------------- /output/scientist_names_series.csv: -------------------------------------------------------------------------------- 1 | 0,Rosaline Franklin 2 | 1,William Gosset 3 | 2,Florence Nightingale 4 | 3,Marie Curie 5 | 4,Rachel Carson 6 | 5,John Snow 7 | 6,Alan Turing 8 | 7,Johann Gauss 9 | -------------------------------------------------------------------------------- /output/scientists_df.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/easysIT/doit_pandas/4b9d91839f0cc0b4a79ae35d14f6387431fd3c00/output/scientists_df.pickle -------------------------------------------------------------------------------- /output/scientists_df.tsv: -------------------------------------------------------------------------------- 1 | Name Born Died Age Occupation born_dt died_dt age_days_dt 2 | 0 Rosaline Franklin 1920-07-25 1958-04-16 66 Chemist 1920-07-25 1958-04-16 13779 days 00:00:00.000000000 3 | 1 William Gosset 1876-06-13 1937-10-16 56 Statistician 1876-06-13 1937-10-16 22404 days 00:00:00.000000000 4 | 2 Florence Nightingale 1820-05-12 1910-08-13 41 Nurse 1820-05-12 1910-08-13 32964 days 00:00:00.000000000 5 | 3 Marie Curie 1867-11-07 1934-07-04 77 Chemist 1867-11-07 1934-07-04 24345 days 00:00:00.000000000 6 | 4 Rachel Carson 1907-05-27 1964-04-14 90 Biologist 1907-05-27 1964-04-14 20777 days 00:00:00.000000000 7 | 5 John Snow 1813-03-15 1858-06-16 45 Physician 1813-03-15 1858-06-16 16529 days 00:00:00.000000000 8 | 6 Alan Turing 1912-06-23 1954-06-07 37 Computer Scientist 1912-06-23 1954-06-07 15324 days 00:00:00.000000000 9 | 7 Johann Gauss 1777-04-30 1855-02-23 61 Mathematician 1777-04-30 1855-02-23 28422 days 00:00:00.000000000 10 | -------------------------------------------------------------------------------- /output/scientists_df.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/easysIT/doit_pandas/4b9d91839f0cc0b4a79ae35d14f6387431fd3c00/output/scientists_df.xlsx -------------------------------------------------------------------------------- /output/scientists_df_no_index.csv: -------------------------------------------------------------------------------- 1 | Name,Born,Died,Age,Occupation,born_dt,died_dt,age_days_dt 2 | Rosaline Franklin,1920-07-25,1958-04-16,66,Chemist,1920-07-25,1958-04-16,13779 days 00:00:00.000000000 3 | William Gosset,1876-06-13,1937-10-16,56,Statistician,1876-06-13,1937-10-16,22404 days 00:00:00.000000000 4 | Florence Nightingale,1820-05-12,1910-08-13,41,Nurse,1820-05-12,1910-08-13,32964 days 00:00:00.000000000 5 | Marie Curie,1867-11-07,1934-07-04,77,Chemist,1867-11-07,1934-07-04,24345 days 00:00:00.000000000 6 | Rachel Carson,1907-05-27,1964-04-14,90,Biologist,1907-05-27,1964-04-14,20777 days 00:00:00.000000000 7 | John Snow,1813-03-15,1858-06-16,45,Physician,1813-03-15,1858-06-16,16529 days 00:00:00.000000000 8 | Alan Turing,1912-06-23,1954-06-07,37,Computer Scientist,1912-06-23,1954-06-07,15324 days 00:00:00.000000000 9 | Johann Gauss,1777-04-30,1855-02-23,61,Mathematician,1777-04-30,1855-02-23,28422 days 00:00:00.000000000 10 | -------------------------------------------------------------------------------- /output/scientists_names_series.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/easysIT/doit_pandas/4b9d91839f0cc0b4a79ae35d14f6387431fd3c00/output/scientists_names_series.pickle -------------------------------------------------------------------------------- /output/scientists_names_series_df.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/easysIT/doit_pandas/4b9d91839f0cc0b4a79ae35d14f6387431fd3c00/output/scientists_names_series_df.xls -------------------------------------------------------------------------------- /output/scientists_names_series_df.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/easysIT/doit_pandas/4b9d91839f0cc0b4a79ae35d14f6387431fd3c00/output/scientists_names_series_df.xlsx --------------------------------------------------------------------------------