├── .github └── workflows │ ├── issue-response.yml │ └── respond_to_issue.py ├── README.md ├── part1 ├── ch1 │ └── ch1_python.ipynb ├── ch2 │ └── ch2_pandas.ipynb └── ch3 │ ├── ch3_ex_type1.ipynb │ ├── delivery_time.csv │ ├── school_data.csv │ ├── school_data_science.csv │ ├── school_data_social.csv │ ├── type1_data1.csv │ └── type1_data2.csv ├── part2 ├── ch2 │ ├── ch2_classification.ipynb │ ├── test.csv │ └── train.csv ├── ch3 │ └── ch3_metrics.ipynb ├── ch4 │ ├── ch4_regression.ipynb │ ├── test.csv │ └── train.csv ├── ch5 │ ├── ch5_multi_class_classification.ipynb │ ├── test.csv │ └── train.csv ├── ch6 │ ├── ch6_ex_classification.ipynb │ ├── creditcard_test.csv │ ├── creditcard_train.csv │ ├── diabetes_test.csv │ ├── diabetes_train.csv │ ├── hr_test.csv │ └── hr_train.csv ├── ch7 │ ├── ch7_ex_multi_class_classification.ipynb │ ├── drug_test.csv │ ├── drug_train.csv │ ├── glass_test.csv │ ├── glass_train.csv │ ├── score_test.csv │ └── score_train.csv └── ch8 │ ├── car_test.csv │ ├── car_train.csv │ ├── ch8_ex_regression.ipynb │ ├── flight_test.csv │ ├── flight_train.csv │ ├── laptop_test.csv │ └── laptop_train.csv ├── part3 ├── ch1 │ └── ch1_hypothesis_testing.ipynb ├── ch2 │ ├── ch2_anova.ipynb │ ├── fertilizer.csv │ └── tree.csv ├── ch3 │ └── ch3_chi_square.ipynb ├── ch4 │ ├── ch4_linear_regression.ipynb │ └── study.csv ├── ch5 │ ├── ch5_logistic_regression.ipynb │ └── health_survey.csv └── ch6 │ ├── ch6_ex_type3.ipynb │ ├── customer_travel.csv │ ├── math.csv │ └── tomato2.csv └── part4 ├── ch2 ├── X_test.csv ├── X_train.csv ├── members.csv ├── p2_type1.ipynb ├── p2_type2.ipynb └── y_train.csv ├── ch3 ├── members.csv ├── p3_type1.ipynb ├── p3_type2.ipynb ├── test.csv ├── train.csv └── year.csv ├── ch4 ├── data4-1.csv ├── data4-2.csv ├── data4-3.csv ├── p4_type1.ipynb ├── p4_type2.ipynb ├── test.csv └── train.csv ├── ch5 ├── data5-1.csv ├── data5-2.csv ├── data5-3.csv ├── p5_type1.ipynb ├── p5_type2.ipynb ├── test.csv └── train.csv ├── ch6 ├── data6-1-1.csv ├── data6-1-2.csv ├── data6-1-3.csv ├── data6-3-2.csv ├── energy_test.csv ├── energy_train.csv ├── p6_type1.ipynb ├── p6_type2.ipynb └── p6_type3.ipynb ├── ch7 ├── air_quality.csv ├── clam.csv ├── mart_test.csv ├── mart_train.csv ├── p7_type1.ipynb ├── p7_type2.ipynb ├── p7_type3.ipynb ├── stock_market.csv ├── student_assessment.csv └── system_cpu.csv └── ch8 ├── chem.csv ├── churn.csv ├── churn_test.csv ├── churn_train.csv ├── drinks.csv ├── p8_type1.ipynb ├── p8_type2.ipynb ├── p8_type3.ipynb ├── piq.csv └── tourist.csv /.github/workflows/issue-response.yml: -------------------------------------------------------------------------------- 1 | name: Issue Response Bot 2 | 3 | on: 4 | issues: 5 | types: [opened] 6 | issue_comment: 7 | types: [created] 8 | 9 | permissions: 10 | issues: write # 이슈 및 댓글에 대한 쓰기 권한 부여 11 | contents: read # 필요한 경우 리포지토리 읽기 권한 12 | 13 | jobs: 14 | respond: 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Check out repository 19 | uses: actions/checkout@v4 20 | 21 | - name: Set up Python 22 | uses: actions/setup-python@v4 23 | with: 24 | python-version: '3.x' 25 | 26 | - name: Install dependencies 27 | run: | 28 | 29 | pip install openai==0.28.0 requests 30 | 31 | - name: Generate Response 32 | env: 33 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 34 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 35 | GITHUB_REPOSITORY: ${{ github.repository }} 36 | ISSUE_NUMBER: ${{ github.event.issue.number }} 37 | COMMENT_BODY: ${{ github.event.issue.body || github.event.comment.body }} 38 | COMMENT_AUTHOR: ${{ github.event.issue.user.login || github.event.comment.user.login }} 39 | run: | 40 | python .github/workflows/respond_to_issue.py 41 | -------------------------------------------------------------------------------- /.github/workflows/respond_to_issue.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | import requests 4 | 5 | # GitHub 환경 변수 설정 6 | GITHUB_TOKEN = os.getenv('GITHUB_TOKEN') 7 | REPO_NAME = os.getenv('GITHUB_REPOSITORY') 8 | ISSUE_NUMBER = os.getenv('ISSUE_NUMBER') 9 | COMMENT_BODY = os.getenv('COMMENT_BODY') 10 | COMMENT_AUTHOR = os.getenv('COMMENT_AUTHOR') 11 | OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') 12 | 13 | # OpenAI API 키 설정 14 | openai.api_key = OPENAI_API_KEY 15 | 16 | # 이슈의 제목, 본문, 그리고 모든 댓글 가져오기 17 | def get_issue_and_comments(): 18 | issue_url = f"https://api.github.com/repos/{REPO_NAME}/issues/{ISSUE_NUMBER}" 19 | comments_url = f"https://api.github.com/repos/{REPO_NAME}/issues/{ISSUE_NUMBER}/comments" 20 | headers = {"Authorization": f"token {GITHUB_TOKEN}"} 21 | 22 | # 이슈 정보 가져오기 23 | issue_response = requests.get(issue_url, headers=headers) 24 | print(f"Issue API response status: {issue_response.status_code}") 25 | 26 | if issue_response.status_code == 200: 27 | issue_data = issue_response.json() 28 | issue_title = issue_data.get('title', '') 29 | issue_body = issue_data.get('body', '') 30 | else: 31 | print("Failed to fetch issue data from GitHub API.") 32 | return None, None, None 33 | 34 | # 이슈의 모든 댓글 가져오기 35 | comments_response = requests.get(comments_url, headers=headers) 36 | print(f"Comments API response status: {comments_response.status_code}") 37 | 38 | if comments_response.status_code == 200: 39 | comments_data = comments_response.json() 40 | comments = [f"{comment['user']['login']} said: {comment['body']}" for comment in comments_data] 41 | else: 42 | print("Failed to fetch comments from GitHub API.") 43 | return issue_title, issue_body, None 44 | 45 | return issue_title, issue_body, comments 46 | 47 | # ChatGPT API 호출 48 | def get_chatgpt_response(title, body, comments, new_comment): 49 | try: 50 | # 프롬프트 생성: 제목, 본문, 기존 댓글, 새 댓글 51 | prompt = f"Title: {title}\n\nBody: {body}\n\nComments:\n" 52 | if comments: 53 | prompt += "\n".join(comments) 54 | if new_comment: 55 | prompt += f"\n\nNew comment from {COMMENT_AUTHOR}: {new_comment}\n\nRespond to the new comment:" 56 | else: 57 | prompt += "\n\nRespond to the issue:" 58 | 59 | response = openai.ChatCompletion.create( 60 | model="gpt-3.5-turbo", 61 | messages=[ 62 | {"role": "system", "content": "You are a helpful assistant."}, 63 | {"role": "user", "content": prompt} 64 | ], 65 | max_tokens=2024 # 필요에 따라 조정 66 | ) 67 | return response.choices[0].message['content'].strip() 68 | except Exception as e: 69 | print(f"Error calling OpenAI API: {e}") 70 | return None 71 | 72 | # 이슈 댓글에 답글 추가 73 | def comment_on_issue(response): 74 | if not response: 75 | print("No response to post.") 76 | return 77 | 78 | url = f"https://api.github.com/repos/{REPO_NAME}/issues/{ISSUE_NUMBER}/comments" 79 | headers = {"Authorization": f"token {GITHUB_TOKEN}"} 80 | data = {"body": response} 81 | response = requests.post(url, json=data, headers=headers) 82 | 83 | if response.status_code == 201: 84 | print("Response posted successfully.") 85 | else: 86 | print(f"Failed to post response. Status code: {response.status_code}, Response: {response.text}") 87 | 88 | def main(): 89 | issue_title, issue_body, comments = get_issue_and_comments() 90 | if issue_title and issue_body: # 제목과 본문이 모두 있을 때만 응답 생성 91 | response = get_chatgpt_response(issue_title, issue_body, comments, COMMENT_BODY) 92 | print(f"Generated response: {response}") # 디버깅용 출력 93 | comment_on_issue(response) 94 | else: 95 | print("No issue title or body found.") 96 | 97 | if __name__ == "__main__": 98 | main() 99 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [퇴근후딴짓] 빅데이터 분석기사 실기 - 길벗 시나공 시리즈 2 | 3 | [![Python](https://img.shields.io/badge/Python-3.10.12-blue)]() 4 | [![Pandas](https://img.shields.io/badge/Pandas-2.0.3-orange)]() 5 | [![Statsmodels](https://img.shields.io/badge/Statsmodels-0.14.1-green)]() 6 | [![SciPy](https://img.shields.io/badge/SciPy-1.11.4-blue)]() 7 | [![Scikit-learn](https://img.shields.io/badge/Scikit_learn-1.2.2-black)]() 8 | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/) 9 | 10 | ## 🌱 도서 링크 11 | - [교보문고](https://product.kyobobook.co.kr/detail/S000214299800), [yes24](https://www.yes24.com/Product/Goods/133311973) 12 | 13 | 14 | ## 🌱 정오표: 15 | - https://bit.ly/3YahBcW 16 | - 시험환경 업데이트 9회 준비 기준 (판다스, 사이킷런 등이 코랩과 동일한 버전으로 업데이트 되었어요) 17 | 18 | ## 🌱 목차 19 | - Intro. 시험 응시 전략, 시험 환경 소개, 코드 및 데이터 불러오기, 자주하는 질문 등 20 | - PART1. 작업형1 (파이썬, 판다스, 연습문제) 21 | - PART2. 작업형2 (이진분류, 다중분류, 회귀, 평가지표, 연습문제) 22 | - PART3. 작업형3 (가설검정, 분산 분석, 카이제곱, 회귀, 로지스틱 회귀, 연습문제) 23 | - PART4. 기출유형 (예시문제, 2회 ~ 8회까지) 24 | 25 | 26 | ## 🌱 예제코드 바로 실행하는 방법 27 | - 노트북 선택(part/chapter) -> 구글 코랩에서 실행하기 -> Drive로 복사 -> 실행 28 | ![guide_colab](https://github.com/user-attachments/assets/840d2a4f-a725-4320-9c84-c76d37f910d7) 29 | 30 | ## 🌱 예제코드 전체 다운로드 방법 31 | - "Code" 버튼 클릭 -> 풀다운 메뉴에서 "Download Zip"을 선택 32 | - 입문자는 "예제코드 바로 실행하는 방법"을 추천합니다. 33 | Screenshot 2024-08-21 at 11 58 59 AM 34 | 35 | 36 | ## 🌱 실습 중 오류가 발생했을 때 37 | - 제공된 최종 노트북 코드와 현재 코드를 비교하여 문제를 파악해보세요. 코드를 복사하여 붙여넣기 한 후 정상적으로 실행되는지 확인 38 | - 문제가 지속될 경우, ChatGPT(https://chat.openai.com/), Claude(https://claude.ai) 를 활용 39 | 40 | 41 | ## 🌱 안내사항 42 | - 아래와 같은 lightgbm 모델에서 학습시 발생하는 워닝은 무시해 주세요. (시험환경에서도 워닝은 무시해도 됨) 43 | ```text 44 | /usr/local/lib/python3.10/dist-packages/dask/dataframe/__init__.py:42: FutureWarning: 45 | Dask dataframe query planning is disabled because dask-expr is not installed. 46 | You can install it with `pip install dask[dataframe]` or `conda install dask`. 47 | This will raise in a future version. 48 | warnings.warn(msg, FutureWarning) 49 | ``` 50 | 51 | ## 🌱 커뮤니티 52 | - 9회 스터디(오징어게임) 초대장 : https://bit.ly/3zFDXK0 53 | - 디스코드 입장 링크: https://discord.gg/V8acvTnHhH 54 | - 학습과 관련해 1:1 질의응답은 진행하지 않습니다. 미션을 수행하고, 멤버간 질의응답을 하는 공간입니다. 55 | 56 | 57 | ## 레포지토리 구조 58 | ```text 59 | . 60 | ├── README.md 61 | ├── part1 (작업형1) 62 | │   ├── ch1 63 | │   │   └── ch1_python.ipynb (코드) 64 | │   ├── ch2 65 | │   │   └── ch2_pandas.ipynb (코드) 66 | │   └── ch3 67 | │   ├── ch3_ex_type1.ipynb (코드) 68 | │   ├── delivery_time.csv 69 | │   ├── school_data.csv 70 | │   ├── school_data_science.csv 71 | │   ├── school_data_social.csv 72 | │   ├── type1_data1.csv 73 | │   └── type1_data2.csv 74 | ├── part2 (작업형2) 75 | │   ├── ch2 76 | │   │   ├── ch2_classification.ipynb (코드) 77 | │   │   ├── test.csv 78 | │   │   └── train.csv 79 | │   ├── ch3 80 | │   │   └── ch3_metrics.ipynb (코드) 81 | │   ├── ch4 82 | │   │   ├── ch4_regression.ipynb (코드) 83 | │   │   ├── test.csv 84 | │   │   └── train.csv 85 | │   ├── ch5 86 | │   │   ├── ch5_multi_class_classification.ipynb (코드) 87 | │   │   ├── test.csv 88 | │   │   └── train.csv 89 | │   ├── ch6 90 | │   │   ├── ch6_ex_classification.ipynb (코드) 91 | │   │   ├── creditcard_test.csv 92 | │   │   ├── creditcard_train.csv 93 | │   │   ├── diabetes_test.csv 94 | │   │   ├── diabetes_train.csv 95 | │   │   ├── hr_test.csv 96 | │   │   └── hr_train.csv 97 | │   ├── ch7 98 | │   │   ├── ch7_ex_multi_class_classification.ipynb (코드) 99 | │   │   ├── drug_test.csv 100 | │   │   ├── drug_train.csv 101 | │   │   ├── glass_test.csv 102 | │   │   ├── glass_train.csv 103 | │   │   ├── score_test.csv 104 | │   │   └── score_train.csv 105 | │   └── ch8 106 | │   ├── car_test.csv 107 | │   ├── car_train.csv 108 | │   ├── ch8_ex_regression.ipynb (코드) 109 | │   ├── flight_test.csv 110 | │   ├── flight_train.csv 111 | │   ├── laptop_test.csv 112 | │   └── laptop_train.csv 113 | ├── part3 (작업형3) 114 | │   ├── ch1 115 | │   │   └── ch1_hypothesis_testing.ipynb (코드) 116 | │   ├── ch2 117 | │   │   ├── ch2_anova.ipynb (코드) 118 | │   │   ├── fertilizer.csv 119 | │   │   └── tree.csv 120 | │   ├── ch3 121 | │   │   └── ch3_chi_square.ipynb (코드) 122 | │   ├── ch4 123 | │   │   ├── ch4_linear_regression.ipynb (코드) 124 | │   │   └── study.csv 125 | │   ├── ch5 126 | │   │   ├── ch5_logistic_regression.ipynb (코드) 127 | │   │   └── health_survey.csv 128 | │   └── ch6 129 | │   ├── ch6_ex_type3.ipynb (코드) 130 | │   ├── math.csv 131 | │   └── tomato2.csv 132 | └── part4 (기출유형) 133 | ├── ch2 134 | │   ├── X_test.csv 135 | │   ├── X_train.csv 136 | │   ├── members.csv 137 | │   ├── p2_type1.ipynb (작업형1 코드) 138 | │   ├── p2_type2.ipynb (작업형2 코드) 139 | │   └── y_train.csv 140 | ├── ch3 141 | │   ├── members.csv 142 | │   ├── p3_type1.ipynb (작업형1 코드) 143 | │   ├── p3_type2.ipynb (작업형2 코드) 144 | │   ├── test.csv 145 | │   ├── train.csv 146 | │   └── year.csv 147 | ├── ch4 148 | │   ├── data4-1.csv 149 | │   ├── data4-2.csv 150 | │   ├── data4-3.csv 151 | │   ├── p4_type1.ipynb (작업형1 코드) 152 | │   ├── p4_type2.ipynb (작업형2 코드) 153 | │   ├── test.csv 154 | │   └── train.csv 155 | ├── ch5 156 | │   ├── data5-1.csv 157 | │   ├── data5-2.csv 158 | │   ├── data5-3.csv 159 | │   ├── p5_type1.ipynb (작업형1 코드) 160 | │   ├── p5_type2.ipynb (작업형2 코드) 161 | │   ├── test.csv 162 | │   └── train.csv 163 | ├── ch6 164 | │   ├── data6-1-1.csv 165 | │   ├── data6-1-2.csv 166 | │   ├── data6-1-3.csv 167 | │   ├── data6-3-2.csv 168 | │   ├── energy_test.csv 169 | │   ├── energy_train.csv 170 | │   ├── p6_type1.ipynb (작업형1 코드) 171 | │   ├── p6_type2.ipynb (작업형2 코드) 172 | │   └── p6_type3.ipynb (작업형3 코드) 173 | ├── ch7 174 | │   ├── air_quality.csv 175 | │   ├── clam.csv 176 | │   ├── mart_test.csv 177 | │   ├── mart_train.csv 178 | │   ├── p7_type1.ipynb (작업형1 코드) 179 | │   ├── p7_type2.ipynb (작업형2 코드) 180 | │   ├── p7_type3.ipynb (작업형3 코드) 181 | │   ├── stock_market.csv 182 | │   ├── student_assessment.csv 183 | │   └── system_cpu.csv 184 | └── ch8 185 | ├── chem.csv 186 | ├── churn.csv 187 | ├── churn_test.csv 188 | ├── churn_train.csv 189 | ├── customer_travel.csv 190 | ├── drinks.csv 191 |    ├── p8_type1.ipynb (작업형1 코드) 192 |    ├── p8_type2.ipynb (작업형2 코드) 193 |    ├── p8_type3.ipynb (작업형3 코드) 194 | ├── piq.csv 195 | └── tourist.csv 196 | ``` 197 | 198 | 이 레포지토리에 실린 모든 내용의 저작권은 저자에게 있으며, 저자의 허락 없이 이 코드의 일부 또는 전부를 복제, 배포할 수 없습니다. 199 | -------------------------------------------------------------------------------- /part1/ch3/school_data.csv: -------------------------------------------------------------------------------- 1 | 이름,수학,영어,국어,수학교사,영어교사,국어교사 2 | 강아지,66,61,26,김선생,장선생,최선생 3 | 고양이,92,48,80,김선생,장선생,이선생 4 | 토끼,98,7,6,김선생,장선생,최선생 5 | 사자,17,99,14,김선생,유선생,최선생 6 | 호랑이,83,92,75,박선생,장선생,이선생 7 | 곰,57,52,54,김선생,유선생,이선생 8 | 원숭이,86,97,71,김선생,장선생,이선생 9 | 기린,97,85,1,김선생,유선생,최선생 10 | 코끼리,96,94,43,김선생,유선생,이선생 11 | 판다,47,27,58,박선생,장선생,이선생 12 | 늑대,73,34,55,박선생,장선생,최선생 13 | 여우,32,97,25,박선생,유선생,이선생 14 | 펭귄,46,76,50,김선생,장선생,이선생 15 | 하이에나,96,40,84,김선생,장선생,최선생 16 | 코알라,25,3,56,박선생,장선생,이선생 17 | 강치,83,69,49,박선생,유선생,이선생 18 | 햄스터,78,64,12,김선생,장선생,최선생 19 | 뱀,36,75,18,김선생,장선생,이선생 20 | 독수리,96,34,81,박선생,장선생,이선생 21 | 침팬지,80,58,1,박선생,장선생,이선생 22 | 하마,68,10,51,박선생,유선생,이선생 23 | 두더지,49,22,44,김선생,유선생,최선생 24 | 물소,55,77,48,박선생,장선생,이선생 25 | 캥거루,67,18,56,김선생,장선생,최선생 26 | 참새,2,100,91,김선생,유선생,최선생 27 | 타조,84,15,49,박선생,유선생,이선생 28 | 개구리,39,27,86,박선생,장선생,이선생 29 | 펠리칸,66,30,3,김선생,유선생,이선생 30 | 돌고래,84,52,67,박선생,유선생,최선생 31 | 매,47,70,11,김선생,유선생,이선생 32 | -------------------------------------------------------------------------------- /part1/ch3/school_data_science.csv: -------------------------------------------------------------------------------- 1 | 이름,과학,과학교사 2 | 강아지,66,황선생 3 | 고양이,92,임선생 4 | 토끼,98,황선생 5 | 사자,17,임선생 6 | 호랑이,83,임선생 7 | 곰,57,임선생 8 | 원숭이,86,황선생 9 | 기린,97,황선생 10 | 코끼리,96,황선생 11 | 판다,47,황선생 12 | 늑대,73,임선생 13 | 여우,32,임선생 14 | 펭귄,46,황선생 15 | 하이에나,96,황선생 16 | 코알라,25,임선생 17 | 강치,83,황선생 18 | 햄스터,78,임선생 19 | 뱀,36,황선생 20 | 독수리,96,황선생 21 | 침팬지,80,임선생 22 | 하마,68,황선생 23 | 두더지,49,임선생 24 | 물소,55,황선생 25 | 캥거루,67,임선생 26 | 참새,2,황선생 27 | 타조,84,황선생 28 | 개구리,39,황선생 29 | 펠리칸,66,황선생 30 | 돌고래,84,임선생 31 | 매,47,황선생 32 | -------------------------------------------------------------------------------- /part1/ch3/school_data_social.csv: -------------------------------------------------------------------------------- 1 | 이름,사회,사회교사 2 | 기린,47,오선생 3 | 매,61,우선생 4 | 곰,48,우선생 5 | 개구리,7,오선생 6 | 코끼리,99,오선생 7 | 펠리칸,92,오선생 8 | 펭귄,52,오선생 9 | 두더지,97,우선생 10 | 여우,85,오선생 11 | 캥거루,94,우선생 12 | 독수리,27,우선생 13 | 호랑이,34,오선생 14 | 사자,97,우선생 15 | 참새,76,우선생 16 | 햄스터,40,우선생 17 | 코알라,3,오선생 18 | 하마,69,우선생 19 | 판다,64,우선생 20 | 강치,75,우선생 21 | 타조,34,오선생 22 | 강아지,58,오선생 23 | 고양이,10,오선생 24 | 물소,22,우선생 25 | 늑대,77,오선생 26 | 침팬지,18,오선생 27 | 뱀,100,우선생 28 | 원숭이,15,우선생 29 | 돌고래,27,우선생 30 | 토끼,30,오선생 31 | 하이에나,52,우선생 32 | -------------------------------------------------------------------------------- /part1/ch3/type1_data1.csv: -------------------------------------------------------------------------------- 1 | id,age,city,f1,f2,f3,f4,f5,subscribed,views 2 | id01,2,서울,,0,gold,ENFJ,91.29779092,2024-07-16,6820 3 | id02,9,서울,70,1,,ENFJ,60.33982554,2024-05-12,2534 4 | id03,27,서울,61,1,gold,ISTJ,17.25298557,2024-03-16,7312 5 | id04,75,서울,,2,,INFP,52.66707799,2024-07-21,493 6 | id05,24,서울,85,2,,ISFJ,29.26986926,2024-03-07,1338 7 | id06,22,서울,57,0,vip,INTP,20.1294441,2024-09-12,21550 8 | id07,36.3,서울,60,1,,ISFJ,9.796377581,2024-01-11,61 9 | id08,38,서울,101,1,silver,INFJ,83.68538032,2024-03-06,3260 10 | id09,3.3,서울,35,2,,ESFJ,17.25298557,2024-03-21,2764 11 | id10,95,서울,74,1,gold,ISFP,98.42989897,2024-04-03,9992 12 | id100,47,경기,53,0,vip,ESFP,33.30899901,2024-02-21,15535 13 | id11,40,서울,68,0,gold,ENFP,98.42989897,2024-10-29,6752 14 | id12,20,서울,,0,,ESTP,91.29779092,2024-11-30,1367 15 | id13,15,서울,68,0,gold,ESFJ,83.68538032,2024-12-30,5643 16 | id14,77,서울,50,1,gold,ENTJ,67.8863732,2024-09-19,5700 17 | id15,22,서울,67,1,gold,ENTP,9.796377581,2024-05-26,7676 18 | id16,68,서울,85,0,gold,ESFP,16.2838541,2024-07-25,9472 19 | id17,74,서울,,1,gold,ISTP,67.8863732,2024-10-26,9441 20 | id18,41,서울,87,2,gold,ISFJ,80.13828012,2024-03-03,7933 21 | id19,53,서울,,0,gold,ISFP,83.68538032,2024-12-24,5287 22 | id20,11,서울,51,1,,INTJ,91.29779092,2024-07-16, 23 | id21,90,부산,,1,gold,ISFP,29.26986926,2024-05-03,9690 24 | id22,-6.3,부산,72,1,gold,ENFP,52.66707799,2024-02-09,6147 25 | id23,34,부산,75,1,gold,ISTP,69.73031281,2024-05-21,6236 26 | id24,80,부산,44,0,gold,INFJ,73.58639712,2024-09-11,5976 27 | id25,34,부산,,0,gold,ESTP,60.33982554,2024-07-12,8954 28 | id26,55,부산,57,1,gold,ENFP,83.68538032,2024-05-01,5857 29 | id27,37,부산,60,0,silver,ESTP,73.58639712,2024-10-13,4255 30 | id28,38,부산,34,1,gold,ENTP,80.13828012,2024-10-31,5068 31 | id29,-13.5,부산,47,2,gold,ENTP,67.8863732,2024-08-28,6793 32 | id30,16,부산,,0,,ESTJ,17.25298557,2024-05-28,240 33 | id31,86,부산,77,0,gold,ESFJ,73.58639712,2024-02-11,8014 34 | id32,25,부산,64,0,vip,ISFJ,13.04992129,2024-05-24,17421 35 | id33,47,부산,94,0,silver,ENFJ,17.25298557,2024-04-02,3880 36 | id34,65,부산,,1,silver,INFP,48.43118381,2024-02-01,3163 37 | id35,30,부산,,2,silver,ESTJ,33.30899901,2024-06-10,3084 38 | id36,68,부산,77,1,gold,INTP,13.04992129,2024-07-20,9713 39 | id37,100,부산,,0,silver,ESTP,33.30899901,2024-07-08,4068 40 | id38,87,부산,,1,,ESTP,83.68538032,2024-06-21,1048 41 | id39,56,부산,50,0,,INFJ,33.30899901,2024-12-22, 42 | id40,56,대구,75,0,gold,ENFP,17.25298557,2024-01-22,8481 43 | id41,81,대구,55,0,gold,ENFJ,37.11373918,2024-10-04,8640 44 | id42,65,대구,48,2,gold,ESTP,33.30899901,2024-02-09,5999 45 | id43,23,대구,60,0,silver,ISTP,29.26986926,2024-05-18,3878 46 | id44,44,대구,,0,,INTP,16.2838541,2024-11-10,546 47 | id45,97,대구,88,0,gold,ENFJ,13.04992129,2024-06-21,8317 48 | id46,93,대구,,0,gold,ESTJ,67.8863732,2024-05-23,9711 49 | id47,34.6,대구,75,1,gold,ESTJ,90.49699927,2024-05-28,8628 50 | id48,18,대구,,0,,ENFP,20.1294441,2024-03-25, 51 | id49,75,대구,88,0,gold,INTP,37.11373918,2024-03-31,9737 52 | id50,86,대구,78,1,,ENFP,60.33982554,2024-12-05,1935 53 | id51,36,대구,,0,gold,ISTJ,83.68538032,2024-08-20,7217 54 | id52,97,대구,82,1,gold,ISFJ,90.49699927,2024-05-20,8518 55 | id53,52,대구,50,0,gold,ESTP,20.1294441,2024-09-09,7012 56 | id54,53,대구,,1,gold,ENFJ,69.73031281,2024-06-21,5872 57 | id55,75,대구,63,2,gold,ENTP,13.04992129,2024-02-06,6042 58 | id56,59,대구,,1,vip,ESTJ,73.58639712,2024-04-05,19589 59 | id57,3,대구,111,0,silver,ISFJ,29.26986926,2024-01-12,4421 60 | id58,0,대구,100,2,,ESTP,33.30899901,2024-04-18,1928 61 | id59,64,대구,,1,silver,ESFJ,20.1294441,2024-06-23,4994 62 | id60,56,경기,,0,gold,ESFP,52.66707799,2024-11-24,6794 63 | id61,87,경기,62,2,,INTP,69.73031281,2024-02-03,218 64 | id62,52,경기,,0,,INTP,60.33982554,2024-04-10,2100 65 | id63,88,경기,86,1,silver,ISFJ,73.58639712,2024-12-01,4053 66 | id64,43,경기,62,2,gold,ESFP,73.58639712,2024-02-22,5995 67 | id65,26.5,경기,,0,silver,ISFP,91.29779092,2024-01-10,3336 68 | id66,87,경기,,1,gold,ISFP,17.25298557,2024-08-05,8471 69 | id67,66,경기,52,1,,ISFJ,73.58639712,2024-06-17,1159 70 | id68,35,경기,45,2,gold,ISFP,67.8863732,2024-07-29,8599 71 | id69,75,경기,85,0,,ESTJ,69.73031281,2024-11-14,2708 72 | id70,-9,경기,96,1,silver,ISTP,48.43118381,2024-11-17,4442 73 | id71,35,경기,84,2,gold,ISFP,52.66707799,2024-07-15,8087 74 | id72,8,경기,97,0,,ESTJ,97.38103419,2024-01-30,602 75 | id73,90,경기,,1,,ISFJ,73.58639712,2024-08-12,512 76 | id74,45,경기,98,0,gold,ESTP,52.66707799,2024-05-27,7739 77 | id75,63,경기,47,0,gold,ESTP,20.1294441,2024-06-12,6779 78 | id76,71,경기,12,0,,ENTJ,83.68538032,2024-07-28,2872 79 | id77,77,경기,31,0,,INFP,98.42989897,2024-01-16,1518 80 | id78,92,경기,96,1,gold,INTJ,69.73031281,2024-10-27,7565 81 | id79,30,경기,,0,gold,INTJ,80.13828012,2024-08-14,8777 82 | id80,67,경기,60,0,silver,ISFP,83.68538032,2024-01-14,4381 83 | id81,86,경기,50,1,,ISFJ,37.11373918,2024-09-14,244 84 | id82,48,경기,,0,,ENTJ,37.11373918,2024-10-17, 85 | id83,73,경기,50,1,vip,ENTP,80.13828012,2024-09-26,19139 86 | id84,66,경기,44,0,gold,INTP,83.68538032,2024-12-19,5650 87 | id85,83.6,경기,55,0,gold,INFJ,80.13828012,2024-09-24,6719 88 | id86,2,경기,,0,,ESTP,29.26986926,2024-02-16,2155 89 | id87,19,경기,,1,gold,ISFP,97.38103419,2024-08-30,6516 90 | id88,89,경기,75,0,,ESTJ,60.33982554,2024-01-06,2713 91 | id89,34,경기,66,1,gold,ENTJ,33.30899901,2024-10-14,6119 92 | id90,54,경기,,0,silver,ENTP,29.26986926,2024-04-03,3818 93 | id91,6,경기,72,0,gold,INTP,9.796377581,2024-08-23,8988 94 | id92,97,경기,78,1,gold,INFP,97.38103419,2024-05-08,9625 95 | id93,21.8,경기,57,0,,ISFP,73.58639712,2024-06-07,42 96 | id94,84,경기,,1,silver,ESTJ,90.49699927,2024-08-16,3774 97 | id95,77,경기,43,1,gold,INTJ,91.29779092,2024-05-21,8697 98 | id96,92,경기,53,1,silver,ENTJ,52.66707799,2024-05-06,4336 99 | id97,100,경기,,0,gold,INFP,67.8863732,2024-03-18,6687 100 | id98,39,경기,58,2,,INFP,98.42989897,2024-10-02,865 101 | id99,1,경기,47,0,gold,ESFJ,97.38103419,2024-12-02,6090 102 | id100,47,경기,53,0,vip,ESFP,33.30899901,2024-02-21,15535 103 | id68,35,경기,45,2,gold,ISFP,67.8863732,2024-07-29,8599 104 | id101,36,경기,65,0,silver,ISFP,0,2025-01-29,5735 105 | id102,43,대구,34,0,,ISFP,0,2025-01-30,8765 106 | id103,21,부산,53,0,vip,ISFP,0,2025-02-17,13543 107 | id104,21,서울,13,0,silver,ESFJ,0,2025-02-17,4556 108 | id105,21,경기,24,0,gold,ESFJ,0,2025-02-17,7654 109 | id106,21,대구,65,0,vip,INFP,0,2025-02-17,4212 110 | id107,21,부산,76,1,silver,ESTJ,0,2025-02-17,2346 111 | id108,21,서울,54,1,gold,ESTJ,0,2025-02-17,2178 112 | id109,21,경기,78,1,vip,ESTJ,0,2025-02-17,6790 113 | id110,22,대구,45,2,silver,ESTJ,0,2025-02-17,2112 114 | id111,22,부산,65,2,gold,ENTP,0,2025-02-17,7894 115 | id112,22,서울,45,2,vip,ENTP,0,2025-02-17,9877 116 | id113,22,경기,34,2,silver,ENTP,0,2025-02-17,10346 117 | id114,22,대구,23,0,gold,INTP,0,2025-02-17,9747 118 | id115,23,부산,65,0,vip,ENTP,0,2025-02-17,5628 119 | id116,23,서울,12,1,silver,INFP,0,2025-02-17,1267 120 | id117,23,대구,65,2,gold,INFP,0,2025-02-17,6543 121 | id118,24,부산,94,1,vip,ESFJ,0,2025-02-17,2356 -------------------------------------------------------------------------------- /part1/ch3/type1_data2.csv: -------------------------------------------------------------------------------- 1 | year,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199 2 | 2000,137,74,114,140,80,150,16,133,178,181,106,176,163,81,4,195,111,80,166,161,117,141,134,146,10,191,68,11,200,95,74,104,18,107,68,114,43,4,14,59,173,124,108,18,175,30,174,28,144,100,162,116,71,9,138,162,83,112,66,1,111,35,8,180,191,7,108,174,180,189,83,139,109,73,24,197,28,148,47,194,26,106,103,196,115,200,145,33,182,70,158,112,196,41,27,50,34,19,120,33,103,26,43,140,184,159,16,111,34,191,192,168,93,53,95,141,184,85,68,81,164,129,27,119,128,172,103,105,100,162,55,139,193,114,128,57,165,38,39,175,184,119,98,73,23,62,67,180,163,145,68,48,75,187,17,199,74,48,35,54,117,173,76,51,1,40,81,5,165,175,176,72,103,7,175,178,172,152,104,195,15,153,44,111,176,55,39,40,80,85,124,94,118,12,50,191,137,174,56,128 3 | 2001,176,87,64,110,128,16,8,4,123,87,190,146,53,52,21,55,75,131,76,181,72,82,121,182,97,162,86,179,68,36,77,146,155,13,133,134,28,14,108,4,194,197,153,96,16,53,172,125,57,50,184,122,3,3,168,32,99,189,197,27,7,188,120,181,23,172,56,45,68,191,109,14,66,101,58,1,33,72,74,55,74,87,62,112,14,47,68,15,172,173,196,190,79,117,137,141,171,105,186,128,159,194,29,8,147,24,199,120,94,94,171,158,115,141,92,13,86,54,182,117,193,186,171,198,136,38,77,35,93,80,88,74,199,9,85,78,176,25,137,54,94,181,35,156,82,175,74,15,16,52,122,162,106,36,17,190,135,128,169,78,160,137,147,188,130,146,97,7,37,25,133,134,162,155,19,112,170,154,170,103,197,8,55,131,48,117,112,68,57,117,59,22,3,108,17,104,101,161,156,43 4 | 2002,128,132,123,112,134,160,180,47,115,155,50,152,13,137,89,193,156,122,137,88,173,173,57,59,114,99,77,27,41,4,191,172,11,165,45,39,38,181,178,23,151,76,188,19,110,28,39,89,88,100,174,12,116,142,70,141,91,112,72,147,195,142,89,34,179,3,101,171,144,33,122,196,142,145,88,159,167,44,84,173,52,31,123,187,99,187,5,95,193,101,186,30,176,134,14,164,9,122,29,126,38,137,110,130,153,167,14,22,185,54,162,124,100,155,98,3,18,193,199,168,126,121,152,174,131,61,148,16,171,64,169,95,109,45,139,86,179,17,186,151,88,100,147,74,31,200,91,16,32,142,137,162,74,31,20,131,77,106,96,187,102,166,85,179,178,37,39,158,186,17,36,122,6,125,190,156,90,116,68,146,39,136,123,54,186,103,154,46,22,87,160,111,116,75,48,51,81,101,194,73 5 | 2003,78,45,26,50,177,119,47,72,163,125,5,22,162,177,8,85,199,191,13,162,113,176,9,65,151,51,154,67,146,28,141,150,69,33,159,16,79,190,180,20,29,65,187,41,25,133,70,161,62,194,113,109,175,113,20,7,132,141,126,41,105,43,74,190,12,113,144,126,3,27,25,49,39,102,74,133,124,136,200,154,186,45,36,87,147,30,60,28,132,103,15,85,131,24,100,128,84,173,44,48,189,9,102,62,108,65,122,192,86,15,184,200,70,198,13,88,126,189,123,113,60,17,36,96,197,101,65,57,31,70,154,169,174,75,113,55,85,48,107,13,16,115,96,127,111,120,78,24,144,120,146,84,54,187,134,67,1,177,95,133,107,126,156,60,20,51,70,190,119,144,188,32,135,38,50,149,79,87,109,116,72,176,79,47,26,34,147,186,151,15,163,88,52,79,192,83,5,75,196,119 6 | 2004,1,162,124,12,1,126,58,123,186,161,69,39,56,122,112,134,15,17,123,29,101,124,161,140,80,116,189,67,147,72,159,18,147,36,116,47,122,126,32,180,63,149,167,151,4,108,193,71,185,16,186,156,92,12,141,119,65,179,192,100,198,174,183,92,130,15,60,45,122,132,119,88,127,124,139,131,17,47,5,80,16,37,133,198,182,100,191,188,76,33,111,185,96,24,17,62,60,64,33,151,26,38,43,60,109,109,165,53,111,135,6,83,87,125,132,49,156,93,58,128,176,11,81,1,200,90,183,157,180,37,19,193,1,97,88,200,115,115,181,54,17,139,57,182,80,81,141,22,119,164,114,196,20,129,117,191,117,150,148,12,73,150,65,52,132,62,21,3,19,192,154,164,129,37,31,158,163,177,128,23,34,157,94,86,75,151,43,122,17,157,100,93,86,12,115,72,29,30,88,150 7 | -------------------------------------------------------------------------------- /part2/ch3/ch3_metrics.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyMCK1VEa3NmQxvmuxyJe94e"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part2/ch3/ch3_metrics.ipynb)"],"metadata":{"id":"zpGaMMdZUki7"}},{"cell_type":"markdown","source":["## 이진분류 평가지표"],"metadata":{"id":"xkQxClqpjqya"}},{"cell_type":"code","source":["# 이진분류 데이터\n","import pandas as pd\n","y_true = pd.DataFrame([1, 1, 1, 0, 0, 1, 1, 1, 1, 0]) #실제값\n","y_pred = pd.DataFrame([1, 0, 1, 1, 0, 0, 0, 1, 1, 0]) #예측값\n","\n","y_true_str = pd.DataFrame(['A', 'A', 'A', 'B', 'B', 'A', 'A', 'A', 'A', 'B']) #실제값\n","y_pred_str = pd.DataFrame(['A', 'B', 'A', 'A', 'B', 'B', 'B', 'A', 'A', 'B']) #예측값"],"metadata":{"id":"qVxYWt82irzy"},"execution_count":null,"outputs":[]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"pvCf5mUdihRO","executionInfo":{"status":"ok","timestamp":1711787831407,"user_tz":-540,"elapsed":496,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"840ef927-4cdc-4ef0-c5e6-11b35fef5703"},"outputs":[{"output_type":"stream","name":"stdout","text":["정확도: 0.6\n","정확도: 0.6\n"]}],"source":["# 정확도(Accuracy)\n","from sklearn.metrics import accuracy_score\n","accuracy = accuracy_score(y_true, y_pred)\n","print(\"정확도:\", accuracy)\n","\n","accuracy = accuracy_score(y_true_str, y_pred_str)\n","print(\"정확도:\", accuracy)"]},{"cell_type":"code","source":["# 정밀도(Precision)\n","from sklearn.metrics import precision_score\n","precision = precision_score(y_true, y_pred)\n","print(\"정밀도:\", precision)\n","\n","precision = precision_score(y_true_str, y_pred_str, pos_label='A')\n","print(\"정밀도:\", precision)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"kYd9ae01jPb4","executionInfo":{"status":"ok","timestamp":1711787831407,"user_tz":-540,"elapsed":4,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"c4d8207b-c032-419a-b290-4f8a2a07c583"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["정밀도: 0.8\n","정밀도: 0.8\n"]}]},{"cell_type":"code","source":["# 재현율(Recall)\n","from sklearn.metrics import recall_score\n","recall = recall_score(y_true, y_pred)\n","print(\"재현율:\", recall)\n","\n","recall = recall_score(y_true_str, y_pred_str, pos_label='A')\n","print(\"재현율:\", recall)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"_m3LRdF7jPgD","executionInfo":{"status":"ok","timestamp":1711787831709,"user_tz":-540,"elapsed":305,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"d36cafd1-7e82-411d-d9df-e120360da41b"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["재현율: 0.5714285714285714\n","재현율: 0.5714285714285714\n"]}]},{"cell_type":"code","source":["# F1 스코어(F1 Score)\n","from sklearn.metrics import f1_score\n","f1 = f1_score(y_true, y_pred)\n","print(\"F1 스코어:\", f1)\n","\n","f1 = f1_score(y_true_str, y_pred_str, pos_label='A')\n","print(\"F1 스코어:\", f1)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nhbdwRnWjPil","executionInfo":{"status":"ok","timestamp":1711787832084,"user_tz":-540,"elapsed":377,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"366c1a7f-28cb-4788-e731-d077c73faaa4"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["F1 스코어: 0.6666666666666666\n","F1 스코어: 0.6666666666666666\n"]}]},{"cell_type":"code","source":["# ROC-AUC\n","from sklearn.metrics import roc_auc_score\n","# 실제값 (0: 음성, 1: 양성)\n","y_true = pd.DataFrame([0, 1, 0, 1, 1, 0, 0, 0, 1, 1])\n","# 예측값 중 양성(1) 확률\n","y_pred_proba = pd.DataFrame([0.4, 0.9, 0.1, 0.3, 0.8, 0.6, 0.4, 0.2, 0.7, 0.6])\n","\n","roc_auc = roc_auc_score(y_true, y_pred_proba)\n","print(\"ROC-AUC:\", roc_auc)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"A8ymIAM3jPk-","executionInfo":{"status":"ok","timestamp":1711787832084,"user_tz":-540,"elapsed":5,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"3f23d2cf-832c-4094-c51e-7ee94abc917c"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["ROC-AUC: 0.86\n"]}]},{"cell_type":"code","source":["# 실제값\n","y_true_str = pd.DataFrame(['A', 'B', 'A', 'B', 'B', 'A', 'A', 'A', 'B', 'B'])\n","\n","# 예측값 중 B 확률\n","y_pred_proba_str = pd.DataFrame([0.4, 0.9, 0.1, 0.3, 0.8, 0.6, 0.4, 0.2, 0.7, 0.6])\n","roc_auc = roc_auc_score(y_true_str, y_pred_proba_str)\n","\n","print(\"ROC-AUC:\", roc_auc)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"kDHt8BmA82ml","executionInfo":{"status":"ok","timestamp":1711787905418,"user_tz":-540,"elapsed":274,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"ac1ed1b7-2eba-4738-8b79-ae73274cbe97"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["ROC-AUC: 0.86\n"]}]},{"cell_type":"code","source":["# 참고 (실제값을 0과 1로 변경)\n","from sklearn.metrics import roc_auc_score\n","\n","y_true_str = pd.DataFrame(['A', 'B', 'A', 'B', 'B', 'A', 'A', 'A', 'B', 'B'])\n","y_pred_proba_str = pd.DataFrame([0.4, 0.9, 0.1, 0.3, 0.8, 0.6, 0.4, 0.2, 0.7, 0.6])\n","\n","# 'A'를 0, 'B'를 1으로 변환\n","y_true_binary = (y_true_str == 'B').astype(int)\n","\n","roc_auc = roc_auc_score(y_true_binary, y_pred_proba)\n","print(\"ROC-AUC:\", roc_auc)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"EGq3WO9rgRGB","executionInfo":{"status":"ok","timestamp":1711787947340,"user_tz":-540,"elapsed":4,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"4b5bcb0e-bc33-4f98-8c79-89c88f300441"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["ROC-AUC: 0.86\n"]}]},{"cell_type":"markdown","source":["## 다중분류 평가지표"],"metadata":{"id":"CCrgqDtQlWLe"}},{"cell_type":"code","source":["# 다중분류 데이터\n","y_true = pd.DataFrame([1, 2, 3, 3, 2, 1, 3, 3, 2, 1]) # 실제값\n","y_pred = pd.DataFrame([1, 2, 1, 3, 2, 1, 1, 2, 2, 1]) # 예측값\n","\n","y_true_str = pd.DataFrame(['A', 'B', 'C', 'C', 'B', 'A', 'C', 'C', 'B', 'A']) # 실제값\n","y_pred_str = pd.DataFrame(['A', 'B', 'A', 'C', 'B', 'A', 'A', 'B', 'B', 'A']) # 예측값"],"metadata":{"id":"CQhA_jUQjHxW"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# 정확도(Accuracy)\n","from sklearn.metrics import accuracy_score\n","accuracy = accuracy_score(y_true, y_pred)\n","print(\"정확도:\", accuracy)\n","\n","accuracy = accuracy_score(y_true_str, y_pred_str)\n","print(\"정확도:\", accuracy)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"lQGHo-DdlYIp","executionInfo":{"status":"ok","timestamp":1711787832397,"user_tz":-540,"elapsed":316,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"080ebd81-b958-4584-e85e-73a7c4e30c72"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["정확도: 0.7\n","정확도: 0.7\n"]}]},{"cell_type":"code","source":["# 정밀도(Precision)\n","from sklearn.metrics import precision_score\n","precision = precision_score(y_true, y_pred, average='macro') # average= micro, macro, weighted\n","print(\"정밀도:\", precision)\n","\n","precision = precision_score(y_true_str, y_pred_str, average='macro')\n","print(\"정밀도:\", precision)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"4mSLzqU3lzTr","executionInfo":{"status":"ok","timestamp":1711787832709,"user_tz":-540,"elapsed":314,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"c816bb9e-d408-457e-bdeb-fc450ac65730"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["정밀도: 0.7833333333333333\n","정밀도: 0.7833333333333333\n"]}]},{"cell_type":"code","source":["# 재현율(Recall)\n","from sklearn.metrics import recall_score\n","recall = recall_score(y_true, y_pred, average='macro') # average= micro, macro, weighted\n","print(\"재현율:\", recall)\n","\n","recall = recall_score(y_true_str, y_pred_str, average='macro')\n","print(\"재현율:\", recall)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"E_n4u8wUl3nS","executionInfo":{"status":"ok","timestamp":1711787832709,"user_tz":-540,"elapsed":4,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"ea47f782-ef01-4c05-d05e-bf2018516108"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["재현율: 0.75\n","재현율: 0.75\n"]}]},{"cell_type":"code","source":["# F1 스코어(F1 Score)\n","from sklearn.metrics import f1_score\n","f1 = f1_score(y_true, y_pred, average='macro') # average= micro, macro, weighted\n","print(\"F1 스코어:\", f1)\n","\n","f1 = f1_score(y_true_str, y_pred_str, average='macro')\n","print(\"F1 스코어:\", f1)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"fPNIshTOmzhA","executionInfo":{"status":"ok","timestamp":1711787833028,"user_tz":-540,"elapsed":322,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"6eb5b091-836d-430c-fb6b-9464563d3084"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["F1 스코어: 0.669047619047619\n","F1 스코어: 0.669047619047619\n"]}]},{"cell_type":"markdown","source":["## 회귀 평가지표"],"metadata":{"id":"CPpvxE1gox0C"}},{"cell_type":"code","source":["# 회귀 데이터\n","import pandas as pd\n","y_true = pd.DataFrame([1, 2, 5, 2, 4, 4, 7, 9]) # 실제값\n","y_pred = pd.DataFrame([1.14, 2.53, 4.87, 3.08, 4.21, 5.53, 7.51, 10.32]) # 예측값"],"metadata":{"id":"gxaFPg9urVDH"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# MSE(Mean Squared Error)\n","from sklearn.metrics import mean_squared_error\n","mse = mean_squared_error(y_true, y_pred)\n","print(\"MSE:\", mse)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"JkPY1sH_ofQc","executionInfo":{"status":"ok","timestamp":1711787833028,"user_tz":-540,"elapsed":8,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"c71cb29e-e6d4-4240-bf3e-abd9cb7aa6d1"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["MSE: 0.7339125000000001\n"]}]},{"cell_type":"code","source":["# MAE(Mean Absolute Error)\n","from sklearn.metrics import mean_absolute_error\n","mae = mean_absolute_error(y_true, y_pred)\n","print(\"MAE:\", mae)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"foCZLZSso-Sg","executionInfo":{"status":"ok","timestamp":1711787833028,"user_tz":-540,"elapsed":6,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"f7492a39-e940-4b34-a869-a40ecd1a78db"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["MAE: 0.68125\n"]}]},{"cell_type":"code","source":["# 결정 계수(R-squared)\n","from sklearn.metrics import r2_score\n","r2 = r2_score(y_true, y_pred)\n","print(\"결정 계수:\", r2)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"pijoRvYdo-aD","executionInfo":{"status":"ok","timestamp":1711787833372,"user_tz":-540,"elapsed":349,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"f9172311-68a4-49ee-e5af-8ca8f04b7f88"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["결정 계수: 0.8859941747572815\n"]}]},{"cell_type":"code","source":["# RMSE(Root Mean Squared Error)\n","from sklearn.metrics import mean_squared_error\n","mse = mean_squared_error(y_true, y_pred)\n","rmse = mse ** 0.5\n","print(\"RMSE:\", rmse)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0tsm8n_5o-hT","executionInfo":{"status":"ok","timestamp":1711787833372,"user_tz":-540,"elapsed":10,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"dcc7eec9-af0c-4196-9d5b-2ed4634046af"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["RMSE: 0.8566869323154171\n"]}]},{"cell_type":"code","source":["# MSLE(Mean Squared Log Error)\n","from sklearn.metrics import mean_squared_log_error\n","msle = mean_squared_log_error(y_true, y_pred)\n","print(\"MSLE:\", msle)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"RHG3TIHqq1M0","executionInfo":{"status":"ok","timestamp":1711787833372,"user_tz":-540,"elapsed":9,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"e76deb4f-0b93-448f-8f8a-9f29330dab89"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["MSLE: 0.027278486182156975\n"]}]},{"cell_type":"code","source":["# RMSLE(Root Mean Squared Log Error)\n","from sklearn.metrics import mean_squared_log_error\n","rmsle = mean_squared_log_error(y_true, y_pred) ** 0.5\n","print(\"RMSLE:\", rmsle)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"wsAnyeb1qGyt","executionInfo":{"status":"ok","timestamp":1711787833372,"user_tz":-540,"elapsed":8,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"a5bab992-0e65-4a2f-a405-f522f4042a09"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["RMSLE: 0.1651619998127807\n"]}]},{"cell_type":"code","source":["# MAPE(Mean Absolute Percentage Error)\n","mape = (abs((y_true - y_pred) / y_true)).mean() * 100\n","print(\"MAPE:\", mape)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cZVmLXZJ_pxE","executionInfo":{"status":"ok","timestamp":1711787833705,"user_tz":-540,"elapsed":9,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"20a3d800-3cb7-4410-eda2-3d0c6fefb827"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["MAPE: 0 20.319048\n","dtype: float64\n"]}]},{"cell_type":"code","source":["epsilon = 1e-10\n","mape = (abs((y_true - y_pred) / (y_true + epsilon))).mean() * 100\n","print(\"MAPE:\", mape)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"i9gT--uOCW-b","executionInfo":{"status":"ok","timestamp":1711787833705,"user_tz":-540,"elapsed":7,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"1dfc4141-2fe8-4f2a-f528-3a937db8ad0a"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["MAPE: 0 20.319048\n","dtype: float64\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"RGg7hqRkCZJb"},"execution_count":null,"outputs":[]}]} -------------------------------------------------------------------------------- /part2/ch6/diabetes_test.csv: -------------------------------------------------------------------------------- 1 | Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age 2 | 3,102,74,0,0,29.5,0.121,32 3 | 5,104,74,0,0,28.8,0.153,48 4 | 4,95,70,32,0,32.1,0.612,24 5 | 1,88,62,24,44,29.9,0.422,23 6 | 2,68,70,32,66,25.0,0.187,25 7 | 4,173,70,14,168,29.7,0.361,33 8 | 6,111,64,39,0,34.2,0.26,24 9 | 4,136,70,0,0,31.2,1.182,22 10 | 9,112,82,32,175,34.2,0.26,36 11 | 2,93,64,32,160,38.0,0.674,23 12 | 3,111,56,39,0,30.1,0.557,30 13 | 6,117,96,0,0,28.7,0.157,30 14 | 1,112,72,30,176,34.4,0.528,25 15 | 0,167,0,0,0,32.3,0.839,30 16 | 7,129,68,49,125,38.5,0.439,43 17 | 10,101,76,48,180,32.9,0.171,63 18 | 5,158,70,0,0,29.8,0.207,63 19 | 6,99,60,19,54,26.9,0.497,32 20 | 0,177,60,29,478,34.6,1.072,21 21 | 5,139,80,35,160,31.6,0.361,25 22 | 7,106,92,18,0,22.7,0.235,48 23 | 13,158,114,0,0,42.3,0.257,44 24 | 1,95,66,13,38,19.6,0.334,25 25 | 4,197,70,39,744,36.7,2.329,31 26 | 3,130,64,0,0,23.1,0.314,22 27 | 0,93,100,39,72,43.4,1.021,35 28 | 1,136,74,50,204,37.4,0.399,24 29 | 5,117,86,30,105,39.1,0.251,42 30 | 9,170,74,31,0,44.0,0.403,43 31 | 10,162,84,0,0,27.7,0.182,54 32 | 4,116,72,12,87,22.1,0.463,37 33 | 3,173,84,33,474,35.7,0.258,22 34 | 0,146,70,0,0,37.9,0.334,28 35 | 0,95,80,45,92,36.5,0.33,26 36 | 6,147,80,0,0,29.5,0.178,50 37 | 0,95,64,39,105,44.6,0.366,22 38 | 2,114,68,22,0,28.7,0.092,25 39 | 3,61,82,28,0,34.4,0.243,46 40 | 1,99,58,10,0,25.4,0.551,21 41 | 0,120,74,18,63,30.5,0.285,26 42 | 4,146,78,0,0,38.5,0.52,67 43 | 9,156,86,28,155,34.3,1.189,42 44 | 7,147,76,0,0,39.4,0.257,43 45 | 6,80,66,30,0,26.2,0.313,41 46 | 6,195,70,0,0,30.9,0.328,31 47 | 1,117,60,23,106,33.8,0.466,27 48 | 7,62,78,0,0,32.6,0.391,41 49 | 0,165,76,43,255,47.9,0.259,26 50 | 1,97,70,40,0,38.1,0.218,30 51 | 5,166,76,0,0,45.7,0.34,27 52 | 4,99,72,17,0,25.6,0.294,28 53 | 11,135,0,0,0,52.3,0.578,40 54 | 4,123,62,0,0,32.0,0.226,35 55 | 12,100,84,33,105,30.0,0.488,46 56 | 3,180,64,25,70,34.0,0.271,26 57 | 0,126,84,29,215,30.7,0.52,24 58 | 7,133,84,0,0,40.2,0.696,37 59 | 2,90,70,17,0,27.3,0.085,22 60 | 11,120,80,37,150,42.3,0.785,48 61 | 0,137,40,35,168,43.1,2.288,33 62 | 1,135,54,0,0,26.7,0.687,62 63 | 8,126,74,38,75,25.9,0.162,39 64 | 3,99,80,11,64,19.3,0.284,30 65 | 5,116,74,29,0,32.3,0.66,35 66 | 1,97,68,21,0,27.2,1.095,22 67 | 10,129,62,36,0,41.2,0.441,38 68 | 4,132,0,0,0,32.9,0.302,23 69 | 7,168,88,42,321,38.2,0.787,40 70 | 3,111,62,0,0,22.6,0.142,21 71 | 4,151,90,38,0,29.7,0.294,36 72 | 2,100,66,20,90,32.9,0.867,28 73 | 1,124,60,32,0,35.8,0.514,21 74 | 10,111,70,27,0,27.5,0.141,40 75 | 6,119,50,22,176,27.1,1.318,33 76 | 5,132,80,0,0,26.8,0.186,69 77 | 2,56,56,28,45,24.2,0.332,22 78 | 1,114,66,36,200,38.1,0.289,21 79 | 4,146,85,27,100,28.9,0.189,27 80 | 10,101,86,37,0,45.6,1.136,38 81 | 5,155,84,44,545,38.7,0.619,34 82 | 1,80,74,11,60,30.0,0.527,22 83 | 1,90,62,18,59,25.1,1.268,25 84 | 8,133,72,0,0,32.9,0.27,39 85 | 1,106,70,28,135,34.2,0.142,22 86 | 0,100,70,26,50,30.8,0.597,21 87 | 0,119,64,18,92,34.9,0.725,23 88 | 11,138,74,26,144,36.1,0.557,50 89 | 6,114,0,0,0,0.0,0.189,26 90 | 2,118,80,0,0,42.9,0.693,21 91 | 6,92,92,0,0,19.9,0.188,28 92 | 9,120,72,22,56,20.8,0.733,48 93 | 4,95,64,0,0,32.0,0.161,31 94 | 4,97,60,23,0,28.2,0.443,22 95 | 6,134,80,37,370,46.2,0.238,46 96 | 0,125,96,0,0,22.5,0.262,21 97 | 1,95,74,21,73,25.9,0.673,36 98 | 4,96,56,17,49,20.8,0.34,26 99 | 9,145,88,34,165,30.3,0.771,53 100 | 2,88,74,19,53,29.0,0.229,22 101 | 0,119,0,0,0,32.4,0.141,24 102 | 5,108,72,43,75,36.1,0.263,33 103 | 9,130,70,0,0,34.2,0.652,45 104 | 3,126,88,41,235,39.3,0.704,27 105 | 3,128,72,25,190,32.4,0.549,27 106 | 5,168,64,0,0,32.9,0.135,41 107 | 1,138,82,0,0,40.1,0.236,28 108 | 4,99,68,38,0,32.8,0.145,33 109 | 8,120,86,0,0,28.4,0.259,22 110 | 10,168,74,0,0,38.0,0.537,34 111 | 1,130,60,23,170,28.6,0.692,21 112 | 3,182,74,0,0,30.5,0.345,29 113 | 7,81,78,40,48,46.7,0.261,42 114 | 2,90,80,14,55,24.4,0.249,24 115 | 0,137,68,14,148,24.8,0.143,21 116 | 8,120,0,0,0,30.0,0.183,38 117 | 9,140,94,0,0,32.7,0.734,45 118 | 3,191,68,15,130,30.9,0.299,34 119 | 4,158,78,0,0,32.9,0.803,31 120 | 1,90,62,12,43,27.2,0.58,24 121 | 3,99,54,19,86,25.6,0.154,24 122 | 7,142,60,33,190,28.8,0.687,61 123 | 1,91,54,25,100,25.2,0.234,23 124 | 4,110,66,0,0,31.9,0.471,29 125 | 10,75,82,0,0,33.3,0.263,38 126 | 10,115,0,0,0,35.3,0.134,29 127 | 1,143,86,30,330,30.1,0.892,23 128 | 3,87,60,18,0,21.8,0.444,21 129 | 8,125,96,0,0,0.0,0.232,54 130 | 2,112,86,42,160,38.4,0.246,28 131 | 2,92,52,0,0,30.1,0.141,22 132 | 2,121,70,32,95,39.1,0.886,23 133 | 2,146,0,0,0,27.5,0.24,28 134 | 1,79,75,30,0,32.0,0.396,22 135 | 1,73,50,10,0,23.0,0.248,21 136 | 0,94,0,0,0,0.0,0.256,25 137 | 3,150,76,0,0,21.0,0.207,37 138 | 0,104,64,37,64,33.6,0.51,22 139 | 2,128,64,42,0,40.0,1.101,24 140 | 1,95,82,25,180,35.0,0.233,43 141 | 5,86,68,28,71,30.2,0.364,24 142 | 1,133,102,28,140,32.8,0.234,45 143 | 1,196,76,36,249,36.5,0.875,29 144 | 1,189,60,23,846,30.1,0.398,59 145 | 5,106,82,30,0,39.5,0.286,38 146 | 2,120,54,0,0,26.8,0.455,27 147 | 4,122,68,0,0,35.0,0.394,29 148 | 3,121,52,0,0,36.0,0.127,25 149 | 13,153,88,37,140,40.6,1.174,39 150 | 9,91,68,0,0,24.2,0.2,58 151 | 5,147,78,0,0,33.7,0.218,65 152 | 0,105,84,0,0,27.9,0.741,62 153 | 2,101,58,35,90,21.8,0.155,22 154 | 1,144,82,46,180,46.1,0.335,46 155 | 3,141,0,0,0,30.0,0.761,27 156 | -------------------------------------------------------------------------------- /part2/ch7/drug_test.csv: -------------------------------------------------------------------------------- 1 | Age,Sex,BP,Cholesterol,Na_to_K 2 | 74,F,LOW,HIGH,20.942 3 | 65,M,HIGH,NORMAL,34.997 4 | 58,F,LOW,HIGH,38.247 5 | 34,M,NORMAL,HIGH,22.456 6 | 59,M,HIGH,HIGH,13.935 7 | 47,M,LOW,HIGH,10.114 8 | 24,M,HIGH,NORMAL,9.475 9 | 41,F,LOW,NORMAL,18.739 10 | 39,F,NORMAL,NORMAL,9.709 11 | 60,M,NORMAL,HIGH,15.171 12 | 58,F,HIGH,HIGH,19.416 13 | 61,F,LOW,HIGH,18.043 14 | 69,M,LOW,NORMAL,11.455 15 | 58,M,HIGH,HIGH,18.991 16 | 59,F,LOW,HIGH,10.444 17 | 63,M,NORMAL,HIGH,25.917 18 | 20,F,HIGH,HIGH,11.262 19 | 31,M,HIGH,NORMAL,11.227 20 | 32,F,HIGH,NORMAL,10.292 21 | 38,M,LOW,HIGH,18.295 22 | 20,M,HIGH,NORMAL,35.639 23 | 26,F,HIGH,NORMAL,12.307 24 | 49,M,LOW,NORMAL,11.014 25 | 48,F,LOW,HIGH,15.036 26 | 36,F,NORMAL,HIGH,16.753 27 | 32,F,LOW,HIGH,9.712 28 | 29,F,HIGH,HIGH,29.45 29 | 49,M,LOW,HIGH,10.537 30 | 35,F,HIGH,HIGH,12.894 31 | 41,M,HIGH,NORMAL,15.156 32 | 64,F,LOW,NORMAL,25.741 33 | 43,M,LOW,NORMAL,19.368 34 | 57,F,NORMAL,NORMAL,25.893 35 | 43,M,HIGH,HIGH,13.972 36 | 16,M,LOW,HIGH,12.006 37 | 34,M,HIGH,HIGH,18.703 38 | 28,F,NORMAL,HIGH,12.879 39 | 65,F,LOW,NORMAL,13.769 40 | 21,F,HIGH,NORMAL,28.632 41 | 51,F,LOW,NORMAL,23.003 42 | 54,M,NORMAL,HIGH,24.658 43 | 60,M,HIGH,HIGH,13.934 44 | 55,F,HIGH,HIGH,10.977 45 | 49,M,HIGH,NORMAL,6.269 46 | 36,F,HIGH,HIGH,11.198 47 | 60,F,HIGH,HIGH,13.303 48 | 65,F,HIGH,NORMAL,31.876 49 | 28,M,NORMAL,HIGH,27.064 50 | 39,F,NORMAL,NORMAL,17.225 51 | 74,M,HIGH,NORMAL,15.436 52 | 53,M,LOW,HIGH,22.963 53 | 64,M,NORMAL,HIGH,7.761 54 | 20,F,LOW,NORMAL,11.686 55 | 18,F,NORMAL,NORMAL,8.75 56 | 23,M,NORMAL,HIGH,12.26 57 | 49,M,HIGH,NORMAL,8.7 58 | 68,M,HIGH,HIGH,11.009 59 | 58,F,HIGH,NORMAL,14.239 60 | 15,F,HIGH,NORMAL,16.725 61 | 61,F,LOW,NORMAL,7.34 62 | 23,M,NORMAL,HIGH,16.85 63 | 66,M,HIGH,HIGH,16.347 64 | 70,F,NORMAL,HIGH,20.489 65 | 34,F,HIGH,NORMAL,19.199 66 | 47,F,LOW,HIGH,10.067 67 | 46,M,NORMAL,NORMAL,7.285 68 | 66,F,NORMAL,NORMAL,8.107 69 | 41,F,NORMAL,NORMAL,22.905 70 | 23,M,NORMAL,HIGH,31.686 71 | 39,M,NORMAL,HIGH,15.969 72 | 72,F,LOW,NORMAL,14.642 73 | 50,F,NORMAL,HIGH,12.703 74 | 47,F,LOW,HIGH,11.767 75 | 56,F,LOW,HIGH,11.567 76 | 23,M,LOW,HIGH,7.298 77 | 22,M,LOW,HIGH,8.151 78 | 42,M,LOW,HIGH,20.013 79 | 70,M,HIGH,HIGH,13.967 80 | 40,M,HIGH,HIGH,27.826 81 | 72,M,LOW,HIGH,16.31 82 | 43,M,LOW,HIGH,15.376 83 | 60,M,HIGH,NORMAL,8.621 84 | 46,F,HIGH,HIGH,34.686 85 | 57,M,LOW,NORMAL,19.128 86 | 26,M,LOW,NORMAL,20.909 87 | 37,M,LOW,NORMAL,8.968 88 | 42,F,HIGH,HIGH,21.036 89 | 22,F,NORMAL,HIGH,8.607 90 | 67,M,LOW,NORMAL,20.693 91 | 37,M,LOW,NORMAL,16.724 92 | 28,F,LOW,HIGH,13.127 93 | 22,M,NORMAL,HIGH,11.953 94 | 50,M,NORMAL,NORMAL,15.79 95 | 31,M,HIGH,NORMAL,17.069 96 | 49,F,NORMAL,NORMAL,9.381 97 | 50,F,NORMAL,NORMAL,12.295 98 | 47,M,LOW,HIGH,13.093 99 | 47,M,LOW,NORMAL,33.542 100 | 56,F,HIGH,HIGH,25.395 101 | 56,M,NORMAL,HIGH,8.966 102 | -------------------------------------------------------------------------------- /part2/ch7/drug_train.csv: -------------------------------------------------------------------------------- 1 | Age,Sex,BP,Cholesterol,Na_to_K,Drug 2 | 70,M,HIGH,HIGH,9.849,drugB 3 | 36,M,LOW,NORMAL,11.424,drugX 4 | 23,F,HIGH,HIGH,25.355,DrugY 5 | 40,F,NORMAL,HIGH,10.103,drugX 6 | 45,M,LOW,NORMAL,10.017,drugX 7 | 67,F,NORMAL,HIGH,15.891,DrugY 8 | 65,M,HIGH,NORMAL,11.34,drugB 9 | 68,M,LOW,HIGH,10.291,drugC 10 | 56,M,LOW,HIGH,15.015,DrugY 11 | 62,M,NORMAL,HIGH,16.594,DrugY 12 | 49,M,LOW,NORMAL,13.598,drugX 13 | 22,F,HIGH,NORMAL,22.818,DrugY 14 | 73,F,NORMAL,HIGH,19.221,DrugY 15 | 16,M,HIGH,NORMAL,19.007,DrugY 16 | 28,F,NORMAL,HIGH,19.675,DrugY 17 | 37,F,LOW,NORMAL,12.006,drugX 18 | 17,M,NORMAL,NORMAL,10.832,drugX 19 | 32,F,LOW,NORMAL,10.84,drugX 20 | 50,F,NORMAL,NORMAL,17.211,DrugY 21 | 72,M,LOW,HIGH,6.769,drugC 22 | 33,F,LOW,HIGH,33.486,DrugY 23 | 31,M,HIGH,NORMAL,11.871,drugA 24 | 68,F,HIGH,NORMAL,10.189,drugB 25 | 39,M,HIGH,HIGH,9.664,drugA 26 | 51,F,NORMAL,HIGH,13.597,drugX 27 | 31,M,HIGH,HIGH,30.366,DrugY 28 | 38,F,LOW,NORMAL,29.875,DrugY 29 | 53,F,HIGH,NORMAL,12.495,drugB 30 | 57,F,NORMAL,HIGH,14.216,drugX 31 | 18,F,HIGH,HIGH,37.188,DrugY 32 | 29,M,HIGH,HIGH,12.856,drugA 33 | 42,F,LOW,NORMAL,29.271,DrugY 34 | 32,M,HIGH,NORMAL,9.445,drugA 35 | 64,M,HIGH,NORMAL,20.932,DrugY 36 | 39,M,LOW,NORMAL,13.938,drugX 37 | 19,F,HIGH,HIGH,13.313,drugA 38 | 37,F,HIGH,HIGH,13.091,drugA 39 | 52,M,NORMAL,HIGH,9.894,drugX 40 | 61,F,HIGH,HIGH,25.475,DrugY 41 | 49,F,NORMAL,HIGH,16.275,DrugY 42 | 19,F,HIGH,NORMAL,25.969,DrugY 43 | 26,F,LOW,HIGH,14.16,drugC 44 | 38,F,HIGH,NORMAL,11.326,drugA 45 | 41,M,LOW,HIGH,11.037,drugC 46 | 51,M,HIGH,NORMAL,11.343,drugB 47 | 24,M,NORMAL,HIGH,25.786,DrugY 48 | 15,M,HIGH,NORMAL,17.206,DrugY 49 | 74,M,HIGH,HIGH,9.567,drugB 50 | 55,M,NORMAL,NORMAL,7.261,drugX 51 | 32,F,HIGH,NORMAL,25.974,DrugY 52 | 67,M,NORMAL,NORMAL,10.898,drugX 53 | 72,M,HIGH,NORMAL,9.677,drugB 54 | 18,F,HIGH,NORMAL,24.276,DrugY 55 | 32,F,NORMAL,HIGH,7.477,drugX 56 | 69,F,NORMAL,HIGH,10.065,drugX 57 | 22,M,HIGH,NORMAL,28.294,DrugY 58 | 43,M,NORMAL,NORMAL,12.859,drugX 59 | 40,F,LOW,NORMAL,11.349,drugX 60 | 34,F,LOW,NORMAL,12.923,drugX 61 | 51,M,HIGH,HIGH,18.295,DrugY 62 | 47,M,HIGH,HIGH,10.403,drugA 63 | 42,M,HIGH,NORMAL,12.766,drugA 64 | 53,M,NORMAL,HIGH,14.133,drugX 65 | 23,M,NORMAL,NORMAL,14.02,drugX 66 | 74,M,LOW,NORMAL,11.939,drugX 67 | 23,M,HIGH,HIGH,8.011,drugA 68 | 68,F,NORMAL,NORMAL,27.05,DrugY 69 | 24,F,NORMAL,HIGH,10.605,drugX 70 | 37,F,HIGH,NORMAL,23.091,DrugY 71 | 35,M,NORMAL,NORMAL,7.845,drugX 72 | 47,F,NORMAL,NORMAL,6.683,drugX 73 | 16,F,HIGH,NORMAL,15.516,DrugY 74 | 58,F,LOW,HIGH,26.645,DrugY 75 | 24,F,HIGH,NORMAL,18.457,DrugY 76 | 25,M,NORMAL,HIGH,19.011,DrugY 77 | 35,M,LOW,NORMAL,9.17,drugX 78 | 20,F,NORMAL,NORMAL,9.281,drugX 79 | 28,F,LOW,HIGH,19.796,DrugY 80 | 45,M,LOW,HIGH,17.951,DrugY 81 | 30,F,NORMAL,HIGH,10.443,drugX 82 | 48,M,HIGH,NORMAL,10.446,drugA 83 | 45,M,LOW,NORMAL,8.37,drugX 84 | 50,M,HIGH,HIGH,7.49,drugA 85 | 39,F,LOW,NORMAL,22.697,DrugY 86 | 60,M,NORMAL,NORMAL,10.091,drugX 87 | 52,M,LOW,NORMAL,32.922,DrugY 88 | 15,M,NORMAL,HIGH,9.084,drugX 89 | 26,F,HIGH,NORMAL,19.161,DrugY 90 | 73,F,HIGH,HIGH,18.348,DrugY 91 | 59,F,NORMAL,HIGH,13.884,drugX 92 | 61,M,NORMAL,HIGH,9.443,drugX 93 | 47,M,LOW,NORMAL,30.568,DrugY 94 | 45,F,HIGH,HIGH,12.854,drugA 95 | 62,M,LOW,NORMAL,27.183,DrugY 96 | 67,M,NORMAL,NORMAL,9.514,drugX 97 | 28,F,NORMAL,HIGH,7.798,drugX 98 | 57,F,HIGH,NORMAL,9.945,drugB 99 | 36,F,HIGH,NORMAL,15.49,DrugY 100 | 28,F,HIGH,NORMAL,18.809,DrugY 101 | 69,M,LOW,HIGH,15.478,DrugY 102 | -------------------------------------------------------------------------------- /part2/ch7/glass_test.csv: -------------------------------------------------------------------------------- 1 | RI,Na,Mg,Al,Si,K,Ca,Ba,Fe 2 | 1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0.0,0.17 3 | 1.52058,12.85,1.61,2.17,72.18,0.76,9.7,0.24,0.51 4 | 1.52475,11.45,0.0,1.88,72.19,0.81,13.24,0.0,0.34 5 | 1.5169,13.33,3.54,1.61,72.54,0.68,8.11,0.0,0.0 6 | 1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0.0,0.0 7 | 1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0 8 | 1.51811,12.96,2.96,1.43,72.92,0.6,8.79,0.14,0.0 9 | 1.51784,12.68,3.67,1.16,73.11,0.61,8.7,0.0,0.0 10 | 1.51623,14.2,0.0,2.79,73.46,0.04,9.04,0.4,0.09 11 | 1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22 12 | 1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0.0,0.0 13 | 1.51918,14.04,3.58,1.37,72.08,0.56,8.3,0.0,0.0 14 | 1.51852,14.09,2.19,1.66,72.67,0.0,9.32,0.0,0.0 15 | 1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0 16 | 1.51966,14.77,3.75,0.29,72.02,0.03,9.0,0.0,0.0 17 | 1.5182,12.62,2.76,0.83,73.81,0.35,9.42,0.0,0.2 18 | 1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0.0,0.11 19 | 1.51732,14.95,0.0,1.8,72.99,0.0,8.61,1.55,0.0 20 | 1.51605,12.9,3.44,1.45,73.06,0.44,8.27,0.0,0.0 21 | 1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.0,0.0 22 | 1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0.0,0.0 23 | 1.52369,13.44,0.0,1.58,72.22,0.32,12.24,0.0,0.0 24 | 1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0.0,0.0 25 | 1.52171,11.56,1.88,1.56,72.86,0.47,11.41,0.0,0.0 26 | 1.51646,13.04,3.4,1.26,73.01,0.52,8.58,0.0,0.0 27 | 1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0.0,0.0 28 | 1.51751,12.81,3.57,1.35,73.02,0.62,8.59,0.0,0.0 29 | 1.52177,13.2,3.68,1.15,72.75,0.54,8.52,0.0,0.0 30 | 1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0.0,0.0 31 | 1.51683,14.56,0.0,1.98,73.29,0.0,8.52,1.57,0.07 32 | 1.51969,12.64,0.0,1.65,73.75,0.38,11.53,0.0,0.0 33 | 1.5166,12.99,3.18,1.23,72.97,0.58,8.81,0.0,0.24 34 | 1.53393,12.3,0.0,1.0,70.16,0.12,16.19,0.0,0.24 35 | 1.51916,14.15,0.0,2.09,72.74,0.0,10.88,0.0,0.0 36 | 1.51755,13.0,3.6,1.36,72.99,0.57,8.4,0.0,0.11 37 | 1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0.0,0.0 38 | 1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0.0,0.0 39 | 1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0.0,0.0 40 | 1.5167,13.24,3.57,1.38,72.7,0.56,8.44,0.0,0.1 41 | 1.51806,13.0,3.8,1.08,73.07,0.56,8.38,0.0,0.12 42 | 1.51592,12.86,3.52,2.12,72.66,0.69,7.97,0.0,0.0 43 | 1.51709,13.0,3.47,1.79,72.72,0.66,8.18,0.0,0.0 44 | 1.51321,13.0,0.0,3.02,70.7,6.21,6.93,0.0,0.0 45 | 1.51818,13.72,0.0,0.56,74.45,0.0,10.99,0.0,0.0 46 | 1.5232,13.72,3.72,0.51,71.75,0.09,10.06,0.0,0.16 47 | 1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0.0,0.0 48 | 1.51623,14.14,0.0,2.88,72.61,0.08,9.18,1.06,0.0 49 | 1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0.0 50 | 1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.0,0.0 51 | 1.51685,14.92,0.0,1.99,73.06,0.0,8.4,1.59,0.0 52 | 1.51707,13.48,3.48,1.71,72.52,0.62,7.99,0.0,0.0 53 | 1.51753,12.57,3.47,1.38,73.39,0.6,8.55,0.0,0.06 54 | 1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0.0,0.0 55 | 1.5172,13.38,3.5,1.15,72.85,0.5,8.43,0.0,0.0 56 | 1.51131,13.69,3.2,1.81,72.81,1.76,5.43,1.19,0.0 57 | 1.51665,13.14,3.45,1.76,72.48,0.6,8.38,0.0,0.17 58 | 1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0.0,0.15 59 | 1.5161,13.42,3.4,1.22,72.69,0.59,8.32,0.0,0.0 60 | 1.51687,13.23,3.54,1.48,72.84,0.56,8.1,0.0,0.0 61 | 1.51768,12.65,3.56,1.3,73.08,0.61,8.69,0.0,0.14 62 | 1.5221,13.73,3.84,0.72,71.76,0.17,9.74,0.0,0.0 63 | 1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0.0,0.0 64 | 1.52725,13.8,3.15,0.66,70.57,0.08,11.64,0.0,0.0 65 | 1.52152,13.12,3.58,0.9,72.2,0.23,9.82,0.0,0.16 66 | 1.51755,12.71,3.42,1.2,73.2,0.59,8.64,0.0,0.0 67 | -------------------------------------------------------------------------------- /part2/ch7/glass_train.csv: -------------------------------------------------------------------------------- 1 | RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type 2 | 1.51829,14.46,2.24,1.62,72.38,0.0,9.26,0.0,0.0,6 3 | 1.5161,13.33,3.53,1.34,72.67,0.56,8.33,0.0,0.0,3 4 | 1.52172,13.48,3.74,0.9,72.01,0.18,9.61,0.0,0.07,1 5 | 1.51905,13.6,3.62,1.11,72.64,0.14,8.76,0.0,0.0,1 6 | 1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0.0,0.0,2 7 | 1.51763,12.8,3.66,1.27,73.01,0.6,8.56,0.0,0.0,1 8 | 1.51409,14.25,3.09,2.08,72.28,1.1,7.08,0.0,0.0,2 9 | 1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0.0,0.0,1 10 | 1.519,13.49,3.48,1.35,71.95,0.55,9.0,0.0,0.0,1 11 | 1.52127,14.32,3.9,0.83,71.5,0.0,9.49,0.0,0.0,3 12 | 1.51646,13.41,3.55,1.25,72.81,0.68,8.1,0.0,0.0,2 13 | 1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0.0,0.0,1 14 | 1.51844,13.25,3.76,1.32,72.4,0.58,8.42,0.0,0.0,2 15 | 1.51613,13.88,1.78,1.79,73.1,0.0,8.67,0.76,0.0,7 16 | 1.52043,13.38,0.0,1.4,72.25,0.33,12.5,0.0,0.0,5 17 | 1.51763,12.61,3.59,1.31,73.29,0.58,8.5,0.0,0.0,1 18 | 1.51571,12.72,3.46,1.56,73.2,0.67,8.09,0.0,0.24,1 19 | 1.5164,14.37,0.0,2.74,72.85,0.0,9.45,0.54,0.0,7 20 | 1.51911,13.9,3.73,1.18,72.12,0.06,8.89,0.0,0.0,1 21 | 1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0.0,0.0,3 22 | 1.51658,14.8,0.0,1.99,73.11,0.0,8.28,1.71,0.0,7 23 | 1.51574,14.86,3.67,1.74,71.87,0.16,7.36,0.0,0.12,2 24 | 1.51824,12.87,3.48,1.29,72.95,0.6,8.43,0.0,0.0,1 25 | 1.51299,14.4,1.74,1.54,74.55,0.0,7.59,0.0,0.0,6 26 | 1.51589,12.88,3.43,1.4,73.28,0.69,8.05,0.0,0.24,1 27 | 1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0.0,0.19,2 28 | 1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,2 29 | 1.5175,12.82,3.55,1.49,72.75,0.54,8.52,0.0,0.19,1 30 | 1.5159,13.02,3.58,1.51,73.12,0.69,7.96,0.0,0.0,2 31 | 1.51673,13.3,3.64,1.53,72.53,0.65,8.03,0.0,0.29,2 32 | 1.51851,13.2,3.63,1.07,72.83,0.57,8.41,0.09,0.17,2 33 | 1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0.0,0.26,1 34 | 1.52172,13.51,3.86,0.88,71.79,0.23,9.54,0.0,0.11,1 35 | 1.51769,13.65,3.66,1.11,72.77,0.11,8.6,0.0,0.0,3 36 | 1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0.0,0.14,2 37 | 1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0.0,0.0,2 38 | 1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0.0,0.0,1 39 | 1.51596,13.02,3.56,1.54,73.11,0.72,7.9,0.0,0.0,2 40 | 1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0.0,0.0,1 41 | 1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0.0,0.3,1 42 | 1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1 43 | 1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0.0,0.0,2 44 | 1.51743,12.2,3.25,1.16,73.55,0.62,8.9,0.0,0.24,2 45 | 1.52315,13.44,3.34,1.23,72.38,0.6,8.83,0.0,0.0,7 46 | 1.51609,15.01,0.0,2.51,73.05,0.05,8.83,0.53,0.0,7 47 | 1.5186,13.36,3.43,1.43,72.26,0.51,8.6,0.0,0.0,2 48 | 1.51915,12.73,1.85,1.86,72.69,0.6,10.09,0.0,0.0,5 49 | 1.53125,10.73,0.0,2.1,69.81,0.58,13.3,3.15,0.28,2 50 | 1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1 51 | 1.5159,12.82,3.52,1.9,72.86,0.69,7.97,0.0,0.0,2 52 | 1.51514,14.85,0.0,2.42,73.72,0.0,8.39,0.56,0.0,7 53 | 1.51994,13.27,0.0,1.76,73.03,0.47,11.32,0.0,0.0,5 54 | 1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0.0,0.31,1 55 | 1.51831,14.39,0.0,1.82,72.86,1.41,6.47,2.88,0.0,7 56 | 1.52667,13.99,3.7,0.71,71.57,0.02,9.82,0.0,0.1,1 57 | 1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0.0,0.0,2 58 | 1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0.0,0.22,2 59 | 1.51514,14.01,2.68,3.5,69.89,1.68,5.87,2.2,0.0,5 60 | 1.51667,12.94,3.61,1.26,72.75,0.56,8.6,0.0,0.0,2 61 | 1.51793,12.79,3.5,1.12,73.03,0.64,8.77,0.0,0.0,1 62 | 1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,2 63 | 1.51848,13.64,3.87,1.27,71.96,0.54,8.32,0.0,0.32,2 64 | 1.52065,14.36,0.0,2.02,73.42,0.0,8.44,1.64,0.0,7 65 | 1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0.0,0.17,1 66 | 1.5164,12.55,3.48,1.87,73.23,0.63,8.08,0.0,0.09,2 67 | 1.51645,14.94,0.0,1.87,73.11,0.0,8.67,1.38,0.0,7 68 | 1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,2 69 | 1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0.0,0.09,1 70 | 1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0.0,0.0,2 71 | 1.51838,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0.0,7 72 | 1.51556,13.87,0.0,2.54,73.23,0.14,9.41,0.81,0.01,7 73 | 1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0.0,0.17,1 74 | 1.5159,13.24,3.34,1.47,73.1,0.39,8.22,0.0,0.0,2 75 | 1.51779,13.64,3.65,0.65,73.0,0.06,8.93,0.0,0.0,3 76 | 1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0.0,1 77 | 1.51719,14.75,0.0,2.0,73.02,0.0,8.53,1.59,0.08,7 78 | 1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0.0,0.0,1 79 | 1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0.0,0.14,2 80 | 1.51869,13.19,3.37,1.18,72.72,0.57,8.83,0.0,0.16,1 81 | 1.51545,14.14,0.0,2.68,73.39,0.08,9.07,0.61,0.05,7 82 | 1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0.0,0.0,2 83 | 1.51743,13.3,3.6,1.14,73.09,0.58,8.17,0.0,0.0,1 84 | 1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0.0,0.12,2 85 | 1.51769,12.45,2.71,1.29,73.7,0.56,9.06,0.0,0.24,1 86 | 1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0.0,0.09,2 87 | 1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0.0,0.0,1 88 | 1.51674,12.79,3.52,1.54,73.36,0.66,7.9,0.0,0.0,2 89 | 1.518,13.71,3.93,1.54,71.81,0.54,8.21,0.0,0.15,2 90 | 1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0.0,0.21,2 91 | 1.51747,12.84,3.5,1.14,73.27,0.56,8.55,0.0,0.0,1 92 | 1.51602,14.85,0.0,2.38,73.28,0.0,8.76,0.64,0.09,7 93 | 1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0.0,0.07,1 94 | 1.51926,13.2,3.33,1.28,72.36,0.6,9.14,0.0,0.11,1 95 | 1.52222,14.43,0.0,1.0,72.67,0.1,11.52,0.0,0.08,2 96 | 1.523,13.31,3.58,0.82,71.99,0.12,10.17,0.0,0.03,1 97 | 1.51796,13.5,3.36,1.63,71.94,0.57,8.81,0.0,0.09,3 98 | 1.5202,13.98,1.35,1.63,71.76,0.39,10.56,0.0,0.18,2 99 | 1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0.0,0.0,2 100 | 1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0.0,0.37,3 101 | 1.5241,13.83,2.9,1.17,71.15,0.08,10.79,0.0,0.0,2 102 | 1.51316,13.02,0.0,3.04,70.48,6.21,6.96,0.0,0.0,5 103 | 1.51711,14.23,0.0,2.08,73.36,0.0,8.62,1.67,0.0,7 104 | 1.51666,12.86,0.0,1.83,73.88,0.97,10.17,0.0,0.0,5 105 | 1.51846,13.41,3.89,1.33,72.38,0.51,8.28,0.0,0.0,2 106 | 1.51508,15.15,0.0,2.25,73.5,0.0,8.34,0.63,0.0,7 107 | 1.51115,17.38,0.0,0.34,75.41,0.0,6.65,0.0,0.0,6 108 | 1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0.0,0.0,1 109 | 1.51651,14.38,0.0,1.94,73.61,0.0,8.48,1.57,0.0,7 110 | 1.51789,13.19,3.9,1.3,72.33,0.55,8.44,0.0,0.28,2 111 | 1.51617,14.95,0.0,2.27,73.3,0.0,8.71,0.67,0.0,7 112 | 1.52099,13.69,3.59,1.12,71.96,0.09,9.4,0.0,0.0,1 113 | 1.51653,11.95,0.0,1.19,75.18,2.7,8.93,0.0,0.0,7 114 | 1.51593,13.09,3.59,1.52,73.1,0.67,7.83,0.0,0.0,2 115 | 1.51618,13.01,3.5,1.48,72.89,0.6,8.12,0.0,0.0,2 116 | 1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0.0,0.17,2 117 | 1.52739,11.02,0.0,0.75,73.08,0.0,14.96,0.0,0.0,2 118 | 1.51761,12.81,3.54,1.23,73.24,0.58,8.39,0.0,0.0,1 119 | 1.52151,11.03,1.71,1.56,73.44,0.58,11.62,0.0,0.0,5 120 | 1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0.0,0.0,1 121 | 1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0.0,7 122 | 1.52777,12.64,0.0,0.67,72.02,0.06,14.4,0.0,0.0,2 123 | 1.52247,14.86,2.2,2.06,70.26,0.76,9.76,0.0,0.0,7 124 | 1.51847,13.1,3.97,1.19,72.44,0.6,8.43,0.0,0.0,2 125 | 1.51937,13.79,2.41,1.19,72.76,0.0,9.77,0.0,0.0,6 126 | 1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0.0,0.0,1 127 | 1.51784,13.08,3.49,1.28,72.86,0.6,8.49,0.0,0.0,1 128 | 1.51832,13.33,3.34,1.54,72.14,0.56,8.99,0.0,0.0,3 129 | 1.51905,14.0,2.39,1.56,72.37,0.0,9.57,0.0,0.0,6 130 | 1.52227,14.17,3.81,0.78,71.35,0.0,9.69,0.0,0.0,1 131 | 1.52664,11.23,0.0,0.77,73.21,0.0,14.68,0.0,0.0,2 132 | 1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0.0,0.0,1 133 | 1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0.0,0.0,1 134 | 1.52614,13.7,0.0,1.36,71.24,0.19,13.44,0.0,0.1,2 135 | 1.51727,14.7,0.0,2.34,73.28,0.0,8.95,0.66,0.0,7 136 | 1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.0,0.0,1 137 | 1.51627,13.0,3.58,1.54,72.83,0.61,8.04,0.0,0.0,2 138 | 1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0.0,0.28,5 139 | 1.51888,14.99,0.78,1.74,72.5,0.0,9.95,0.0,0.0,6 140 | 1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0.0,0.35,2 141 | 1.51736,12.78,3.62,1.29,72.79,0.59,8.7,0.0,0.0,1 142 | 1.5173,12.35,2.72,1.63,72.87,0.7,9.23,0.0,0.0,2 143 | 1.51808,13.43,2.87,1.19,72.84,0.55,9.03,0.0,0.0,1 144 | 1.51829,13.24,3.9,1.41,72.33,0.55,8.31,0.0,0.1,2 145 | 1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1 146 | 1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,3 147 | 1.51969,14.56,0.0,0.56,73.48,0.0,11.22,0.0,0.0,6 148 | 1.51531,14.38,0.0,2.66,73.1,0.04,9.08,0.64,0.0,7 149 | 1.51764,12.98,3.54,1.21,73.0,0.65,8.53,0.0,0.0,1 150 | 1.51645,13.4,3.49,1.52,72.65,0.67,8.08,0.0,0.1,2 151 | -------------------------------------------------------------------------------- /part2/ch8/laptop_test.csv: -------------------------------------------------------------------------------- 1 | Brand,Model,Series,Processor,Processor_Gen,RAM,Hard_Disk_Capacity,OS,Rating 2 | DELL,Vostro,,i3,10th,8.0,256 GB SSD,Windows 10 Home,4.3 3 | Lenovo,IdeaPad,3,i3,10th,8.0,256 GB SSD,Windows 11 Home,4.3 4 | HP,,,i5,11th,8.0,512 GB SSD,Windows 11 Home,4.4 5 | Lenovo,IdeaPad,3,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.3 6 | HP,250-G6,,i5,7th,4.0,1 TB HDD,DOS,4.2 7 | DELL,Inspiron,Ryzen,3450U,-,8.0,512 GB SSD,Windows 11 Home,4.3 8 | acer,Aspire,7,i5,10th,8.0,512 GB SSD,Windows 10 Home,4.4 9 | ASUS,ROG,Zephyrus,4800HS,-,8.0,1 TB SSD,Windows 10 Home,4.6 10 | ASUS,ROG,Strix,i5,10th,8.0,512 GB SSD,Windows 10 Home,4.6 11 | ASUS,VivoBook,15,-,(4,4.0,256 GB SSD,Windows 10 Home,4.3 12 | Lenovo,IdeaPad,Flex,i3,11th,8.0,512 GB SSD,Windows 10 Home,4.5 13 | APPLE,MacBook,Air,,,,,,4.5 14 | realme,Book,(Slim),i3,11th,8.0,256 GB SSD,Windows 10 Home,4.4 15 | acer,Travelmate,,i5,11th,16.0,1 TB HDD,256 GB SSD,3.4 16 | DELL,,,i7,11th,16.0,512 GB SSD,Windows 11 Home,4.4 17 | DELL,Vostro,,i5,11th,8.0,1 TB HDD,Windows 10 Home,3.6 18 | HP,Ryzen,3,5300U,-,8.0,512 GB SSD,Windows 11 Home,4.2 19 | DELL,Inspiron,,i5,11th,8.0,1 TB HDD,256 GB SSD,4.2 20 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7 21 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7 22 | HP,Notebook,PC,i3,11th,8.0,1 TB HDD,Windows 10,3.9 23 | ASUS,Vivobook,14,i3,10th,8.0,1 TB HDD,Windows 10 Home,4.2 24 | MICROSOFT,Surface,Laptop,i5,8th,8.0,128 GB SSD,Windows 10 Home,4.5 25 | DELL,Inspiron,Ryzen,3250U,-,8.0,256 GB SSD,Windows 11 Home,4.3 26 | ASUS,,,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.4 27 | Lenovo,IdeaPad,3,i5,11th,8.0,512 GB SSD,Windows 10 Home,4.2 28 | MSI,Katana,GF66,i5,11th,16.0,512 GB SSD,Windows 10 Home,4.4 29 | ASUS,VivoBook,15,i3,10th,4.0,512 GB SSD,Windows 11 Home,4.2 30 | ASUS,,,i3,10th,4.0,1 TB HDD,Windows 10 Home,3.7 31 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7 32 | Lenovo,IdeaPad,3,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.2 33 | ASUS,TUF,Gaming,i5,10th,8.0,512 GB SSD,Windows 10 Home,4.5 34 | HP,HP,Pavilion,i5,11th,16.0,512 GB SSD,Windows 10 Home,4.5 35 | HP,,,i3,11th,8.0,512 GB SSD,Windows 10,4.0 36 | DELL,Inspiron,Ryzen,3450U,-,8.0,1 TB HDD,256 GB SSD,3.9 37 | Lenovo,IdeaPad,3,5500U,-,8.0,512 GB SSD,Windows 11 Home,4.4 38 | APPLE,MacBook,Air,,,,,,4.5 39 | DELL,Inspiron,,i5,11th,16.0,512 GB SSD,Windows 10,4.2 40 | Lenovo,IdeaPad,3,i3,10th,8.0,1 TB HDD,Windows 10 Home,4.1 41 | -------------------------------------------------------------------------------- /part2/ch8/laptop_train.csv: -------------------------------------------------------------------------------- 1 | Brand,Model,Series,Processor,Processor_Gen,RAM,Hard_Disk_Capacity,OS,Rating,Price 2 | ASUS,VivoBook,15,i3,10th,8.0,512 GB SSD,Windows 11 Home,4.3,37940 3 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7,39040 4 | ASUS,VivoBook,15,i7,10th,16.0,512 GB SSD,Windows 11 Home,4.1,57940 5 | DELL,,,i3,10th,8.0,1 TB HDD,Windows 10,3.2,41340 6 | Lenovo,IdeaPad,Slim,i3,11th,8.0,512 GB SSD,Windows 10 Home,4.4,45440 7 | ASUS,TUF,Gaming,i5,11th,16.0,512 GB SSD,Windows 10 Home,4.6,89940 8 | ASUS,VivoBook,Ultra,i3,11th,8.0,512 GB SSD,Windows 11 Home,4.8,42940 9 | HP,,,i3,10th,8.0,512 GB SSD,Windows 10 Home,4.3,42340 10 | APPLE,2020,Macbook,,,,,,4.6,129990 11 | DELL,Inspiron,,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.3,41540 12 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7,39040 13 | Lenovo,Ideapad,Slim,i5,11th,8.0,512 GB SSD,Windows 10 Home,4.2,56449 14 | acer,Nitro,Ryzen,5600H,-,8.0,1 TB HDD,256 GB SSD,4.5,72940 15 | HP,Ryzen,3,3250U,-,8.0,256 GB SSD,Windows 10 Home,4.3,38940 16 | DELL,Vostro,3405,3450U,-,8.0,256 GB SSD,Windows 10 Home,4.2,44440 17 | Lenovo,IdeaPad,3,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.3,39940 18 | acer,Swift,3,i5,8th,8.0,1 TB HDD,128 GB SSD,4.6,59940 19 | DELL,Inspiron,,i3,11th,8.0,512 GB SSD,Windows 11 Home,4.4,45540 20 | Lenovo,IdeaPad,3,i5,10th,8.0,512 GB SSD,Windows 10 Home,4.3,50940 21 | HP,HP,Pavilion,4600H,-,8.0,512 GB SSD,Windows 10 Home,4.4,59940 22 | DELL,Inspiron,Athlon,3050U,-,4.0,256 GB SSD,Windows 11 Home,4.2,33940 23 | ASUS,Chromebook,Celeron,-,(4,4.0,64 GB EMMC Storage,Chrome OS,4.2,23490 24 | ASUS,ASUS,TUF,i5,11th,8.0,512 GB SSD,Windows 10 Home,4.5,71940 25 | HP,15q,,i3,7th,4.0,1 TB HDD,DOS,4.2,32905 26 | Lenovo,IdeaPad,5,i5,11th,16.0,512 GB SSD,Windows 10 Home,4.6,70940 27 | ASUS,VivoBook,15,i3,10th,4.0,1 TB HDD,Windows 10 Home,3.7,31940 28 | HP,,,i3,11th,8.0,512 GB SSD,Windows 11 Home,4.3,42440 29 | ASUS,Ryzen,3,3250U,3rd,8.0,256 GB SSD,Windows 10 Home,4.3,35940 30 | HP,Pavilion,Ryzen,5600H,-,8.0,512 GB SSD,Windows 11 Home,4.5,59940 31 | Lenovo,IdeaPad,3,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.3,39940 32 | Lenovo,,,i3,11th,4.0,256 GB SSD,Windows 10 Home,4.3,36940 33 | Lenovo,Thinkpad,Ryzen,5600U,-,8.0,512 GB SSD,DOS,4.1,57440 34 | HP,,,i5,11th,8.0,512 GB SSD,Windows 11 Home,4.1,55940 35 | ASUS,ZenBook,Duo,i5,11th,16.0,512 GB SSD,Windows 10 Home,4.4,114940 36 | Lenovo,,,i3,10th,8.0,512 GB SSD,Windows 11 Home,4.3,62140 37 | DELL,Vostro,,i5,11th,8.0,1 TB HDD,Windows 10 Home,3.6,50840 38 | Lenovo,IdeaPad,3,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.2,40940 39 | HP,Ryzen,5,-,(8,8.0,512 GB SSD,Windows 11 Home,4.6,48940 40 | HP,Envy,,i5,10th,8.0,512 GB SSD,Windows 10 Home,4.3,74940 41 | DELL,Inspiron,,i5,11th,16.0,512 GB SSD,Windows 10,4.4,67740 42 | ASUS,Vivobook,14,i3,11th,4.0,256 GB SSD,Windows 10 Home,4.2,36940 43 | ASUS,Chromebook,Celeron,-,(4,4.0,32 GB EMMC Storage,Chrome OS,3.6,17640 44 | Lenovo,Ideapad,Slim,i5,11th,16.0,512 GB SSD,Windows 10 Home,4.4,59940 45 | ASUS,Vivobook,15,i3,11th,8.0,1 TB HDD,Windows 10 Home,2.9,42750 46 | APPLE,2020,Macbook,,,,,,4.7,84940 47 | DELL,Inspiron,,i5,11th,16.0,512 GB SSD,Windows 10,4.2,69040 48 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,256 GB SSD,4.1,48940 49 | DELL,3000,,i3,11th,8.0,1 TB HDD,Windows 11 Home,4.2,43850 50 | ASUS,Vivobook,14,-,(8,8.0,256 GB SSD,Windows 11 Home,4.3,32940 51 | ASUS,VivoBook,K15,i5,11th,16.0,1 TB HDD,256 GB SSD,4.7,66940 52 | Lenovo,Ideapad,Gaming,i5,10th,8.0,1 TB HDD,256 GB SSD,4.4,64940 53 | Lenovo,Ideapad,Gaming,5600H,-,8.0,512 GB SSD,Windows 11 Home,4.5,60440 54 | Lenovo,IDEAPAD,3,AMD,Ryzen™,8.0,512 GB SSD,Windows 10,3.7,59850 55 | Lenovo,IdeaPad,3,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.3,40940 56 | Lenovo,v15,,i3,10th,4.0,1 TB HDD,DOS,3.1,39949 57 | ASUS,VivoBook,15,i5,11th,8.0,1 TB HDD,256 GB SSD,4.3,52940 58 | Lenovo,Ideapad,530s,i5,8th,8.0,512 GB SSD,Windows 10 Home,4.4,59949 59 | HP,Pavilion,Gaming,i7,11th,16.0,512 GB SSD,Windows 10 Home,4.4,98140 60 | MSI,,,i3,11th,8.0,512 GB SSD,Windows 10 Home,4.4,48740 61 | DELL,Vostro,,i5,11th,8.0,1 TB HDD,Windows 10 Home,3.6,50840 62 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,256 GB SSD,4.4,45340 63 | Lenovo,IdeaPad,3,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.3,39940 64 | DELL,Vostro,,i3,11th,8.0,256 GB SSD,Windows 10,4.3,41740 65 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7,39040 66 | HP,Pavilion,Gaming,4800H,-,16.0,1 TB HDD,256 GB SSD,4.5,75940 67 | ASUS,VivoBook,K15,i5,11th,16.0,1 TB HDD,256 GB SSD,4.3,65940 68 | APPLE,MacBook,Air,,,,,,4.5,105940 69 | ASUS,VivoBook,15,i3,11th,4.0,256 GB SSD,Windows 10 Home,4.2,36940 70 | ASUS,VivoBook,K15,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.5,46940 71 | DELL,,,i3,10th,8.0,1 TB HDD,256 GB SSD,2.9,46840 72 | APPLE,2020,Macbook,,,,,,4.6,129990 73 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7,39040 74 | DELL,Inspiron,,i5,11th,8.0,512 GB SSD,Windows 11 Home,4.4,59150 75 | Lenovo,Ideapad,Slim,i5,11th,16.0,512 GB SSD,Windows 10 Home,4.4,59940 76 | MSI,GF63,Thin,i5,10th,8.0,512 GB SSD,Windows 10 Home,4.5,65940 77 | ASUS,Vivobook,Gaming,i7,10th,8.0,512 GB SSD,Windows 10 Home,3.7,61940 78 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7,39040 79 | Lenovo,,,i3,10th,8.0,512 GB SSD,Windows 11 Home,4.3,62140 80 | ASUS,Vivobook,14,i3,11th,8.0,256 GB SSD,Windows 10 Home,4.3,37840 81 | ASUS,Ryzen,5,5500U,-,8.0,512 GB SSD,Windows 10 Home,4.5,49940 82 | DELL,Vostro,,i3,10th,8.0,512 GB SSD,Windows 10,3.9,43040 83 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7,39040 84 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7,39040 85 | Lenovo,APU,Dual,A9,A99425,4.0,1 TB HDD,DOS,3.9,21846 86 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,256 GB SSD,4.4,44440 87 | DELL,Vostro,,i3,11th,4.0,1 TB HDD,256 GB SSD,4.1,43126 88 | DELL,Vostro,,i5,11th,8.0,1 TB HDD,Windows 10 Home,3.6,50840 89 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 10,4.0,42540 90 | DELL,Vostro,Ryzen,R3-3250U,-,,,,3.9,37340 91 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7,39040 92 | APPLE,2020,Macbook,,,,,,4.7,110940 93 | -------------------------------------------------------------------------------- /part3/ch2/ch2_anova.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyOUTcimp/KUlGXgMsT+YVQR"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part3/ch2/ch2_anova.ipynb)"],"metadata":{"id":"8nZruVfngrdb"}},{"cell_type":"markdown","source":["# 1. 일원 분산 분석"],"metadata":{"id":"noR3fjbYSw1_"}},{"cell_type":"markdown","source":["### 1. 기본학습"],"metadata":{"id":"80IBaqHZS1JH"}},{"cell_type":"code","source":["import pandas as pd\n","df = pd.DataFrame({\n"," 'A': [10.5, 11.3, 10.8, 9.6, 11.1, 10.2, 10.9, 11.4, 10.5, 10.3],\n"," 'B': [11.9, 12.4, 12.1, 13.2, 12.5, 11.8, 12.2, 12.9, 12.4, 12.3],\n"," 'C': [11.2, 11.7, 11.6, 10.9, 11.3, 11.1, 10.8, 11.5, 11.4, 11.0],\n"," 'D': [9.8, 9.4, 9.1, 9.5, 9.6, 9.9, 9.2, 9.7, 9.3, 9.4]\n","})\n","print(df.head(2))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"8Tix8IOgjleA","executionInfo":{"status":"ok","timestamp":1719884010828,"user_tz":-540,"elapsed":413,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"1bc7425f-2413-4cec-e03f-152883a2959a"},"execution_count":30,"outputs":[{"output_type":"stream","name":"stdout","text":[" A B C D\n","0 10.5 11.9 11.2 9.8\n","1 11.3 12.4 11.7 9.4\n"]}]},{"cell_type":"code","execution_count":31,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ie4pxfxlWEvK","executionInfo":{"status":"ok","timestamp":1719884011190,"user_tz":-540,"elapsed":2,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"4aaa08bb-980a-43a0-f340-ded79e0104b6"},"outputs":[{"output_type":"stream","name":"stdout","text":["=== 정규성 검정 ===\n","ShapiroResult(statistic=0.9649055004119873, pvalue=0.840017557144165)\n","ShapiroResult(statistic=0.9468040466308594, pvalue=0.63086998462677)\n","ShapiroResult(statistic=0.9701647162437439, pvalue=0.8923683762550354)\n","ShapiroResult(statistic=0.9752339720726013, pvalue=0.9346861243247986)\n","\n"," === 등분산 검정 ===\n","LeveneResult(statistic=1.9355354288758708, pvalue=0.14127835331346628)\n","\n"," === 일원 분산 분석 ===\n","F_onewayResult(statistic=89.12613851177174, pvalue=1.001838152252373e-16)\n"]}],"source":["from scipy import stats\n","\n","print(\"=== 정규성 검정 ===\")\n","print(stats.shapiro(df['A']))\n","print(stats.shapiro(df['B']))\n","print(stats.shapiro(df['C']))\n","print(stats.shapiro(df['D']))\n","\n","print(\"\\n === 등분산 검정 ===\")\n","print(stats.levene(df['A'], df['B'], df['C'], df['D']))\n","\n","print(\"\\n === 일원 분산 분석 ===\")\n","print(stats.f_oneway(df['A'], df['B'], df['C'], df['D']))"]},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"fertilizer.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part3/ch2/fertilizer.csv\")"],"metadata":{"id":"9mnmdtnzFxd0","executionInfo":{"status":"ok","timestamp":1719884311261,"user_tz":-540,"elapsed":344,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}}},"execution_count":35,"outputs":[]},{"cell_type":"code","source":["from statsmodels.formula.api import ols\n","from statsmodels.stats.anova import anova_lm\n","model = ols('성장 ~ C(비료)', df).fit()\n","print(anova_lm(model))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"NT_kg6YdJ96X","executionInfo":{"status":"ok","timestamp":1719884328036,"user_tz":-540,"elapsed":343,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"d5f49195-129e-4c1b-ad94-c02df1c36f9e"},"execution_count":36,"outputs":[{"output_type":"stream","name":"stdout","text":[" df sum_sq mean_sq F PR(>F)\n","C(비료) 3.0 43.21875 14.406250 89.126139 1.001838e-16\n","Residual 36.0 5.81900 0.161639 NaN NaN\n"]}]},{"cell_type":"markdown","source":["# 이원 분산 분석"],"metadata":{"id":"zE_cfuF4TJD7"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"tree.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part3/ch2/tree.csv\")\n","print(df.sample(10))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"d-87C0HTjR9d","executionInfo":{"status":"ok","timestamp":1719883433968,"user_tz":-540,"elapsed":566,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"a93b5c03-9cea-49c6-da5e-42fca9d13156"},"execution_count":19,"outputs":[{"output_type":"stream","name":"stdout","text":[" 나무 비료 성장률\n","59 B 3 70.755451\n","106 D 2 86.861859\n","78 C 2 63.917608\n","65 C 1 73.562400\n","3 A 1 65.230299\n","10 A 2 48.365823\n","107 D 2 69.745778\n","118 D 3 82.428228\n","112 D 3 71.602302\n","18 A 2 43.919759\n"]}]},{"cell_type":"code","source":["import statsmodels.api as sm\n","from statsmodels.formula.api import ols\n","\n","model = ols('성장률 ~ 나무 + 비료 + 나무:비료', data=df).fit()\n","anova_table = sm.stats.anova_lm(model)\n","print(anova_table)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"40ToLh4Xe0vA","executionInfo":{"status":"ok","timestamp":1719883433968,"user_tz":-540,"elapsed":10,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"17cb1d31-be34-4021-acf0-660e8b1c1056"},"execution_count":20,"outputs":[{"output_type":"stream","name":"stdout","text":[" df sum_sq mean_sq F PR(>F)\n","나무 3.0 4783.353938 1594.451313 18.391274 9.016693e-10\n","비료 1.0 873.322002 873.322002 10.073374 1.942421e-03\n","나무:비료 3.0 394.801585 131.600528 1.517952 2.137666e-01\n","Residual 112.0 9709.960792 86.696078 NaN NaN\n"]}]},{"cell_type":"code","source":["import statsmodels.api as sm\n","from statsmodels.formula.api import ols\n","\n","model = ols('성장률 ~ C(나무) + C(비료) + C(나무):C(비료)', data=df).fit()\n","anova_table = sm.stats.anova_lm(model)\n","print(anova_table)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"kE4qc4wtkbj9","executionInfo":{"status":"ok","timestamp":1719883433968,"user_tz":-540,"elapsed":7,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"290d2038-c380-49d0-c2d1-394e56ded6af"},"execution_count":21,"outputs":[{"output_type":"stream","name":"stdout","text":[" df sum_sq mean_sq F PR(>F)\n","C(나무) 3.0 4783.353938 1594.451313 18.855528 6.600012e-10\n","C(비료) 2.0 1127.924259 563.962129 6.669256 1.857612e-03\n","C(나무):C(비료) 6.0 717.520672 119.586779 1.414199 2.157357e-01\n","Residual 108.0 9132.639448 84.561476 NaN NaN\n"]}]},{"cell_type":"code","source":["print(format(6.600012e-10, '.11f'))\n","print(format(1.857612e-03, '.11f'))\n","print(format(2.157357e-01, '.11f'))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"B6lwreZut_JA","executionInfo":{"status":"ok","timestamp":1719883433968,"user_tz":-540,"elapsed":6,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"091d18c5-5f87-4cd1-fdaf-5d3b76407cf3"},"execution_count":22,"outputs":[{"output_type":"stream","name":"stdout","text":["0.00000000066\n","0.00185761200\n","0.21573570000\n"]}]},{"cell_type":"code","source":["model = ols('성장률 ~ C(나무) * C(비료)', data=df).fit()\n","anova_table = sm.stats.anova_lm(model)\n","print(anova_table)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"zzz8pEIyjky7","executionInfo":{"status":"ok","timestamp":1719883433968,"user_tz":-540,"elapsed":5,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"b6c2a751-7bc0-4a49-9b69-876da2c465e0"},"execution_count":23,"outputs":[{"output_type":"stream","name":"stdout","text":[" df sum_sq mean_sq F PR(>F)\n","C(나무) 3.0 4783.353938 1594.451313 18.855528 6.600012e-10\n","C(비료) 2.0 1127.924259 563.962129 6.669256 1.857612e-03\n","C(나무):C(비료) 6.0 717.520672 119.586779 1.414199 2.157357e-01\n","Residual 108.0 9132.639448 84.561476 NaN NaN\n"]}]}]} -------------------------------------------------------------------------------- /part3/ch2/fertilizer.csv: -------------------------------------------------------------------------------- 1 | 비료,성장 2 | A,10.5 3 | A,11.3 4 | A,10.8 5 | A,9.6 6 | A,11.1 7 | A,10.2 8 | A,10.9 9 | A,11.4 10 | A,10.5 11 | A,10.3 12 | B,11.9 13 | B,12.4 14 | B,12.1 15 | B,13.2 16 | B,12.5 17 | B,11.8 18 | B,12.2 19 | B,12.9 20 | B,12.4 21 | B,12.3 22 | C,11.2 23 | C,11.7 24 | C,11.6 25 | C,10.9 26 | C,11.3 27 | C,11.1 28 | C,10.8 29 | C,11.5 30 | C,11.4 31 | C,11.0 32 | D,9.8 33 | D,9.4 34 | D,9.1 35 | D,9.5 36 | D,9.6 37 | D,9.9 38 | D,9.2 39 | D,9.7 40 | D,9.3 41 | D,9.4 42 | -------------------------------------------------------------------------------- /part3/ch2/tree.csv: -------------------------------------------------------------------------------- 1 | 나무,비료,성장률 2 | A,1,54.96714153011233 3 | A,1,48.61735698828815 4 | A,1,56.47688538100692 5 | A,1,65.23029856408026 6 | A,1,47.658466252766644 7 | A,1,47.6586304305082 8 | A,1,65.79212815507391 9 | A,1,57.67434729152909 10 | A,1,45.30525614065048 11 | A,1,55.42560043585965 12 | A,2,48.365823071875376 13 | A,2,48.34270246429743 14 | A,2,55.41962271566034 15 | A,2,33.86719755342202 16 | A,2,35.75082167486967 17 | A,2,47.37712470759027 18 | A,2,42.87168879665576 19 | A,2,56.142473325952736 20 | A,2,43.91975924478789 21 | A,2,38.87696298664709 22 | A,3,70.65648768921554 23 | A,3,53.74223699513465 24 | A,3,56.67528204687924 25 | A,3,41.75251813786544 26 | A,3,50.556172754748175 27 | A,3,57.10922589709866 28 | A,3,44.490064225776976 29 | A,3,59.75698018345672 30 | A,3,49.99361310081195 31 | A,3,53.083062502067236 32 | B,1,48.98293387770603 33 | B,1,73.52278184508938 34 | B,1,54.86502775262066 35 | B,1,44.422890710440996 36 | B,1,63.225449121031886 37 | B,1,42.791563500289776 38 | B,1,57.08863595004755 39 | B,1,35.403298761202244 40 | B,1,41.71813951101569 41 | B,1,56.96861235869123 42 | B,2,65.3846657999541 43 | B,2,59.7136828118997 44 | B,2,56.843517176117594 45 | B,2,54.98896304410711 46 | B,2,43.214780096325725 47 | B,2,50.801557916052914 48 | B,2,53.393612290402125 49 | B,2,68.57122226218915 50 | B,2,61.43618289568462 51 | B,2,40.36959844637266 52 | B,3,64.24083969394795 53 | B,3,57.149177195836835 54 | B,3,54.23077999694041 55 | B,3,67.11676288840869 56 | B,3,71.30999522495951 57 | B,3,70.31280119116198 58 | B,3,52.607824767773614 59 | B,3,57.907876241487855 60 | B,3,64.31263431403565 61 | B,3,70.75545127122359 62 | C,1,55.2082576215471 63 | C,1,58.14341023336183 64 | C,1,48.93665025993972 65 | C,1,48.037933759193294 66 | C,1,68.12525822394198 67 | C,1,73.56240028570824 68 | C,1,59.27989878419666 69 | C,1,70.03532897892023 70 | C,1,63.61636025047634 71 | C,1,53.54880245394876 72 | C,2,66.61395605508415 73 | C,2,78.38036566465969 74 | C,2,62.64173960890049 75 | C,2,78.64643655814007 76 | C,2,36.80254895910255 77 | C,2,71.21902504375224 78 | C,2,63.870470682381715 79 | C,2,60.009926495341325 80 | C,2,63.91760776535502 81 | C,2,43.124310853991076 82 | C,3,63.80328112162488 83 | C,3,69.57112571511746 84 | C,3,80.77894044741517 85 | C,3,60.81729781726352 86 | C,3,57.91506397106812 87 | C,3,60.982429564154636 88 | C,3,75.15402117702074 89 | C,3,69.28751109659684 90 | C,3,60.70239796232961 91 | C,3,71.13267433113356 92 | D,1,65.9707754934804 93 | D,1,74.68644990532889 94 | D,1,57.979469061226474 95 | D,1,61.723378534022316 96 | D,1,61.078918468678424 97 | D,1,50.364850518678814 98 | D,1,67.96120277064577 99 | D,1,67.6105527217989 100 | D,1,65.05113456642461 101 | D,1,62.65412866624853 102 | D,2,53.84629257949586 103 | D,2,63.79354677234641 104 | D,2,64.57285483473231 105 | D,2,59.97722730778381 106 | D,2,66.3871428833399 107 | D,2,72.04050856814538 108 | D,2,86.8618590121053 109 | D,2,69.74577812831839 110 | D,2,70.57550390722764 111 | D,2,67.25554084233832 112 | D,3,51.812287847009586 113 | D,3,70.73486124550783 114 | D,3,71.60230209941027 115 | D,3,95.63242112485287 116 | D,3,69.07639035218878 117 | D,3,74.01547342333612 118 | D,3,70.65288230294757 119 | D,3,59.31321962380468 120 | D,3,82.4282281451502 121 | D,3,78.51933032686775 122 | -------------------------------------------------------------------------------- /part3/ch4/study.csv: -------------------------------------------------------------------------------- 1 | study_hours,material_type,score 2 | 71,강의,95 3 | 34,독학,63 4 | 91,도서,95 5 | 80,독학,80 6 | 40,강의,79 7 | 94,강의,100 8 | 94,도서,99 9 | 43,독학,63 10 | 22,강의,72 11 | 41,도서,68 12 | 72,강의,99 13 | 21,강의,66 14 | 49,독학,65 15 | 57,강의,93 16 | 21,독학,48 17 | 83,강의,100 18 | 79,도서,89 19 | 40,강의,78 20 | 52,독학,62 21 | 95,독학,87 22 | 77,도서,85 23 | 41,강의,81 24 | 68,강의,91 25 | 78,도서,92 26 | 61,강의,88 27 | 79,강의,98 28 | 99,독학,91 29 | 34,강의,73 30 | 81,강의,100 31 | 81,도서,93 32 | 66,도서,78 33 | 81,독학,80 34 | 70,강의,95 35 | 74,강의,98 36 | 83,강의,98 37 | 22,독학,47 38 | 70,독학,76 39 | 26,도서,63 40 | 40,독학,60 41 | 92,강의,100 42 | 58,강의,87 43 | 37,독학,59 44 | 23,강의,72 45 | 79,강의,97 46 | 33,독학,61 47 | 28,독학,55 48 | 72,강의,93 49 | 21,강의,72 50 | 79,강의,97 51 | 90,도서,96 52 | 63,도서,84 53 | 27,도서,61 54 | 66,독학,75 55 | 54,도서,78 56 | 97,독학,90 57 | 100,독학,94 58 | 55,도서,76 59 | 69,도서,82 60 | 23,도서,59 61 | 21,강의,68 62 | 25,도서,62 63 | 73,강의,97 64 | 23,독학,52 65 | 73,강의,98 66 | 82,도서,91 67 | 37,독학,62 68 | 63,독학,70 69 | 53,독학,73 70 | 93,강의,100 71 | 81,도서,88 72 | 33,독학,53 73 | 67,독학,74 74 | 34,독학,56 75 | 91,강의,100 76 | 97,강의,100 77 | 81,도서,90 78 | 59,강의,87 79 | 99,독학,85 80 | 72,도서,84 81 | 43,독학,63 82 | 45,독학,63 83 | 79,강의,96 84 | 60,도서,80 85 | 48,강의,84 86 | 34,강의,74 87 | 64,도서,82 88 | 84,독학,82 89 | 90,독학,82 90 | 28,도서,64 91 | 20,독학,51 92 | 27,도서,66 93 | 82,도서,93 94 | 30,강의,71 95 | 100,도서,97 96 | 27,강의,74 97 | 54,독학,68 98 | 54,독학,68 99 | 52,독학,75 100 | 24,독학,53 101 | 60,강의,92 102 | -------------------------------------------------------------------------------- /part3/ch5/ch5_logistic_regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part3/ch5/ch5_logistic_regression.ipynb)" 21 | ], 22 | "metadata": { 23 | "id": "nFYUmBR9hu_R" 24 | } 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "source": [ 29 | "# 로지스틱 회귀분석" 30 | ], 31 | "metadata": { 32 | "id": "FwzB2swKZo2j" 33 | } 34 | }, 35 | { 36 | "cell_type": "code", 37 | "source": [ 38 | "import pandas as pd\n", 39 | "# df = pd.read_csv(\"health_survey.csv\")\n", 40 | "df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part3/ch5/health_survey.csv\")\n", 41 | "\n", 42 | "print(df.head())" 43 | ], 44 | "metadata": { 45 | "colab": { 46 | "base_uri": "https://localhost:8080/" 47 | }, 48 | "id": "JuSKhrlWfVaI", 49 | "outputId": "d23e18d5-6aa4-4f44-ff22-5c2a6fce1a8e" 50 | }, 51 | "execution_count": 1, 52 | "outputs": [ 53 | { 54 | "output_type": "stream", 55 | "name": "stdout", 56 | "text": [ 57 | " age bmi smoker activity_level disease\n", 58 | "0 62 35.179089 0 0 1\n", 59 | "1 65 18.576042 0 2 1\n", 60 | "2 71 33.178426 0 1 1\n", 61 | "3 18 37.063007 1 2 0\n", 62 | "4 21 17.613266 0 0 0\n" 63 | ] 64 | } 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "source": [ 70 | "from statsmodels.formula.api import logit\n", 71 | "\n", 72 | "model = logit('disease ~ age + bmi', data=df).fit()\n", 73 | "print(model.summary())" 74 | ], 75 | "metadata": { 76 | "colab": { 77 | "base_uri": "https://localhost:8080/" 78 | }, 79 | "id": "y00i8apYgU5b", 80 | "outputId": "40b4482e-f809-444e-9e38-54161c1190d0" 81 | }, 82 | "execution_count": 2, 83 | "outputs": [ 84 | { 85 | "output_type": "stream", 86 | "name": "stdout", 87 | "text": [ 88 | "Optimization terminated successfully.\n", 89 | " Current function value: 0.643725\n", 90 | " Iterations 5\n", 91 | " Logit Regression Results \n", 92 | "==============================================================================\n", 93 | "Dep. Variable: disease No. Observations: 1000\n", 94 | "Model: Logit Df Residuals: 997\n", 95 | "Method: MLE Df Model: 2\n", 96 | "Date: Mon, 05 Aug 2024 Pseudo R-squ.: 0.04996\n", 97 | "Time: 15:30:22 Log-Likelihood: -643.72\n", 98 | "converged: True LL-Null: -677.58\n", 99 | "Covariance Type: nonrobust LLR p-value: 1.984e-15\n", 100 | "==============================================================================\n", 101 | " coef std err z P>|z| [0.025 0.975]\n", 102 | "------------------------------------------------------------------------------\n", 103 | "Intercept -1.8700 0.289 -6.482 0.000 -2.435 -1.305\n", 104 | "age 0.0177 0.004 4.747 0.000 0.010 0.025\n", 105 | "bmi 0.0563 0.009 6.418 0.000 0.039 0.074\n", 106 | "==============================================================================\n" 107 | ] 108 | } 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "source": [ 114 | "import numpy as np\n", 115 | "print(model.params['bmi'])\n", 116 | "print(np.exp(model.params['bmi']))" 117 | ], 118 | "metadata": { 119 | "colab": { 120 | "base_uri": "https://localhost:8080/" 121 | }, 122 | "id": "WBltnh1-gU8M", 123 | "outputId": "6bad8637-f1fc-4e26-813e-3d5b55271245" 124 | }, 125 | "execution_count": 3, 126 | "outputs": [ 127 | { 128 | "output_type": "stream", 129 | "name": "stdout", 130 | "text": [ 131 | "0.056333879687088535\n", 132 | "1.057950853075076\n" 133 | ] 134 | } 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "source": [ 140 | "print(model.llf)" 141 | ], 142 | "metadata": { 143 | "colab": { 144 | "base_uri": "https://localhost:8080/" 145 | }, 146 | "id": "_icMMT0zfOan", 147 | "outputId": "8f53d796-0f9b-49a6-aa81-faeb1e290280" 148 | }, 149 | "execution_count": 4, 150 | "outputs": [ 151 | { 152 | "output_type": "stream", 153 | "name": "stdout", 154 | "text": [ 155 | "-643.7246164682088\n" 156 | ] 157 | } 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "source": [ 163 | "print(-2 * model.llf)" 164 | ], 165 | "metadata": { 166 | "colab": { 167 | "base_uri": "https://localhost:8080/" 168 | }, 169 | "id": "Xl9ltoyZfYW6", 170 | "outputId": "06aae68f-95ed-4060-f3d7-ef33e6c31520" 171 | }, 172 | "execution_count": 5, 173 | "outputs": [ 174 | { 175 | "output_type": "stream", 176 | "name": "stdout", 177 | "text": [ 178 | "1287.4492329364175\n" 179 | ] 180 | } 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "source": [], 186 | "metadata": { 187 | "id": "aCxBIez0hLB8" 188 | }, 189 | "execution_count": 5, 190 | "outputs": [] 191 | } 192 | ] 193 | } -------------------------------------------------------------------------------- /part3/ch6/customer_travel.csv: -------------------------------------------------------------------------------- 1 | age,service,social,booked,target 2 | 34,6,0,1,0 3 | 34,5,1,0,1 4 | 37,3,1,0,0 5 | 30,2,0,0,0 6 | 30,1,0,0,0 7 | 27,1,0,1,1 8 | 34,4,1,1,0 9 | 34,2,1,0,1 10 | 30,3,0,1,0 11 | 36,1,0,0,1 12 | 34,1,1,1,0 13 | 28,2,0,0,1 14 | 35,1,1,1,0 15 | 34,4,0,0,0 16 | 34,5,0,0,0 17 | 37,6,0,1,0 18 | 30,1,1,1,0 19 | 30,1,1,0,0 20 | 31,1,0,1,0 21 | 37,2,1,0,1 22 | 30,4,0,1,0 23 | 31,1,0,0,1 24 | 34,1,1,0,0 25 | 30,2,0,0,0 26 | 34,1,0,1,0 27 | 38,1,0,1,0 28 | 37,3,1,0,0 29 | 30,5,1,0,0 30 | 28,1,1,0,0 31 | 34,1,0,0,0 32 | 33,6,0,1,0 33 | 34,2,0,0,0 34 | 27,3,1,0,0 35 | 35,1,0,0,1 36 | 30,4,0,0,0 37 | 36,2,0,1,0 38 | 34,1,1,1,0 39 | 37,1,1,0,1 40 | 37,3,0,0,0 41 | 36,2,0,0,0 42 | 27,5,0,1,0 43 | 36,4,0,0,0 44 | 28,1,1,1,0 45 | 30,2,0,0,0 46 | 27,3,0,0,0 47 | 37,6,0,1,1 48 | 27,1,1,0,0 49 | 38,2,1,0,0 50 | 30,4,0,1,0 51 | 34,1,0,0,0 52 | 34,3,0,1,1 53 | 31,2,0,0,0 54 | 34,1,1,0,0 55 | 30,5,0,0,0 56 | 31,1,0,1,0 57 | 28,4,1,1,1 58 | 30,3,1,1,0 59 | 37,1,1,0,1 60 | 36,1,0,0,0 61 | 36,2,0,0,0 62 | 34,6,0,1,0 63 | 35,1,0,0,0 64 | 30,4,1,0,0 65 | 29,2,0,0,0 66 | 33,1,1,1,0 67 | 28,1,0,1,1 68 | 33,5,1,1,0 69 | 37,2,1,0,1 70 | 31,3,0,0,0 71 | 34,4,0,0,1 72 | 37,1,0,1,0 73 | 30,2,0,0,0 74 | 30,1,1,1,0 75 | 30,1,1,0,1 76 | 30,3,0,0,0 77 | 37,6,0,1,0 78 | 31,4,1,0,0 79 | 34,1,1,0,0 80 | 34,1,0,1,0 81 | 34,5,0,0,0 82 | 28,3,0,1,0 83 | 27,1,0,0,1 84 | 30,1,1,0,0 85 | 37,4,0,0,0 86 | 35,1,0,1,0 87 | 27,1,0,1,1 88 | 35,3,1,0,0 89 | 30,2,1,0,0 90 | 37,1,0,1,0 91 | 30,1,0,0,0 92 | 29,6,0,1,0 93 | 30,2,1,0,1 94 | 30,5,1,0,0 95 | 36,1,0,0,1 96 | 37,1,0,0,0 97 | 28,2,0,1,1 98 | 30,1,1,1,0 99 | 31,4,1,0,1 100 | 31,3,0,0,0 101 | 31,2,0,0,0 102 | 30,1,1,1,0 103 | 36,1,0,0,0 104 | 31,1,1,1,0 105 | 28,2,0,0,1 106 | 30,4,0,1,0 107 | 30,6,0,1,1 108 | 37,1,1,0,0 109 | 37,2,1,0,0 110 | 28,1,0,1,0 111 | 27,1,1,0,1 112 | 34,3,0,1,0 113 | 30,4,0,0,0 114 | 33,1,1,1,0 115 | 31,1,0,0,0 116 | 31,1,0,1,0 117 | 31,2,0,1,0 118 | 30,3,1,0,0 119 | 34,1,1,0,1 120 | 34,5,1,0,0 121 | 34,2,0,0,0 122 | 30,6,0,1,0 123 | 28,1,0,0,1 124 | 35,3,1,0,0 125 | 29,2,0,0,0 126 | 35,1,0,0,0 127 | 31,4,0,1,0 128 | 35,1,1,1,0 129 | 29,2,1,0,1 130 | 34,3,0,1,0 131 | 31,1,0,0,1 132 | 30,1,0,1,0 133 | 30,5,0,0,0 134 | 30,4,1,1,0 135 | 27,1,0,0,1 136 | 30,3,0,0,0 137 | 30,6,0,1,0 138 | 31,1,1,1,0 139 | 31,1,1,0,0 140 | 30,1,0,1,0 141 | 38,4,0,0,0 142 | 30,3,0,1,0 143 | 27,1,0,0,1 144 | 35,1,1,0,0 145 | 28,2,0,0,1 146 | 34,5,0,1,0 147 | 30,1,1,1,1 148 | 33,4,1,0,0 149 | 34,2,1,0,0 150 | 27,1,0,0,0 151 | 30,1,0,0,0 152 | 30,6,0,1,0 153 | 31,2,0,0,0 154 | 30,3,1,1,0 155 | 27,4,0,0,1 156 | 30,1,1,0,0 157 | 37,2,0,1,0 158 | 30,1,1,1,0 159 | 30,5,1,0,1 160 | 34,3,0,0,0 161 | 29,2,0,0,0 162 | 34,4,0,1,0 163 | 37,1,0,0,0 164 | 37,1,1,1,0 165 | 35,2,1,0,1 166 | 30,3,0,0,0 167 | 36,6,0,1,1 168 | 30,1,1,0,0 169 | 34,4,1,0,0 170 | 29,1,0,1,0 171 | 27,1,0,0,1 172 | 34,5,0,1,0 173 | 30,2,0,0,0 174 | 29,1,1,0,0 175 | 28,1,0,0,1 176 | 37,4,0,1,0 177 | 33,2,0,1,0 178 | 37,3,1,1,1 179 | 30,1,1,0,1 180 | 35,1,0,0,0 181 | 28,2,0,0,1 182 | 30,6,0,1,0 183 | 28,4,1,0,1 184 | 38,3,1,0,0 185 | 27,5,0,0,1 186 | 27,1,0,0,0 187 | 30,1,0,1,0 188 | 30,1,1,1,0 189 | 30,2,1,0,1 190 | 28,4,0,0,0 191 | 28,1,0,0,1 192 | 29,1,1,1,0 193 | 33,2,0,0,0 194 | 30,1,1,1,0 195 | 37,1,0,0,0 196 | 36,3,0,0,0 197 | 27,6,0,1,1 198 | 37,5,1,0,1 199 | 31,1,1,0,0 200 | 27,1,0,1,0 201 | 33,2,1,0,1 202 | 29,3,0,1,0 203 | 34,1,0,0,1 204 | 30,4,1,0,0 205 | 30,2,0,0,0 206 | 35,1,0,1,0 207 | 31,1,0,1,0 208 | 27,3,1,0,0 209 | 34,2,1,0,0 210 | 31,1,1,1,0 211 | 27,5,0,0,1 212 | 30,6,0,1,0 213 | 29,2,0,0,0 214 | 30,3,1,0,0 215 | 28,1,0,0,1 216 | 29,1,0,0,0 217 | 36,2,0,1,0 218 | 37,4,1,1,0 219 | 30,1,1,0,1 220 | 28,3,0,0,0 221 | 34,2,0,0,0 222 | 38,1,0,1,0 223 | 30,1,0,0,0 224 | 30,5,1,1,0 225 | 33,4,0,0,0 226 | 34,3,0,1,0 227 | 34,6,0,1,1 228 | 37,1,1,0,0 229 | 37,2,1,0,0 230 | 37,1,0,1,0 231 | 37,1,0,0,0 232 | 31,4,0,1,0 233 | 34,2,0,0,0 234 | 30,1,1,1,0 235 | 34,1,0,0,0 236 | 37,1,0,1,0 237 | 33,5,1,1,1 238 | 28,3,1,0,0 239 | 33,4,1,0,1 240 | 31,1,0,0,0 241 | 28,2,0,0,1 242 | 29,6,0,1,0 243 | 38,1,0,0,0 244 | 36,3,1,0,0 245 | 28,2,0,0,1 246 | 29,4,1,0,0 247 | 30,1,0,1,0 248 | 34,1,1,1,0 249 | 29,2,1,0,1 250 | 37,5,0,1,1 251 | 30,1,0,0,1 252 | 37,1,0,1,0 253 | 30,4,0,0,0 254 | 31,1,1,1,0 255 | 30,1,1,0,1 256 | 30,3,0,0,0 257 | 37,6,0,1,0 258 | 37,1,1,1,0 259 | 36,1,1,0,0 260 | 31,4,0,1,0 261 | 36,2,0,0,0 262 | 28,3,0,1,0 263 | 30,5,0,0,1 264 | 28,1,1,0,0 265 | 38,2,0,0,0 266 | 31,1,0,1,0 267 | 29,4,0,1,0 268 | 34,3,1,0,0 269 | 36,2,1,0,0 270 | 37,1,0,0,0 271 | 29,1,0,0,0 272 | 37,6,0,1,0 273 | 38,2,1,0,1 274 | 30,4,1,1,0 275 | 37,1,0,0,1 276 | 35,5,0,0,1 277 | 28,2,0,1,1 278 | 31,1,1,1,0 279 | 34,1,1,0,1 280 | 35,3,0,0,0 281 | 35,4,0,0,0 282 | 35,1,1,1,0 283 | 28,1,0,0,1 284 | 28,1,1,1,0 285 | 28,2,0,0,1 286 | 30,3,0,0,0 287 | 36,6,0,1,1 288 | 30,4,1,0,0 289 | 31,5,1,0,0 290 | 31,1,0,1,0 291 | 28,1,1,0,1 292 | 35,3,0,1,0 293 | 37,2,0,0,0 294 | 29,1,1,0,0 295 | 34,4,0,0,0 296 | 31,1,0,1,0 297 | 30,2,0,1,0 298 | 35,3,1,1,0 299 | 38,1,1,0,1 300 | 27,1,1,0,0 301 | 29,2,0,0,0 302 | 35,6,0,1,1 303 | 29,1,0,0,0 304 | 37,3,1,0,1 305 | 34,2,0,0,0 306 | 37,1,0,1,0 307 | 28,1,0,1,1 308 | 36,1,1,1,0 309 | 30,4,1,0,1 310 | 37,3,0,0,0 311 | 36,1,0,0,1 312 | 33,1,0,1,0 313 | 37,2,0,0,0 314 | 35,1,1,1,0 315 | 27,5,0,0,1 316 | 28,4,0,0,0 317 | 30,6,0,1,0 318 | 34,1,1,0,0 319 | 35,1,1,0,0 320 | 37,1,0,1,0 321 | 27,2,0,0,1 322 | 38,3,0,1,0 323 | 30,4,0,0,1 324 | 30,1,1,0,0 325 | 36,2,0,0,0 326 | 30,1,0,1,0 327 | 34,1,1,1,1 328 | 30,5,1,0,0 329 | 37,2,1,0,0 330 | 37,4,0,1,0 331 | 30,1,0,0,0 332 | 38,6,0,1,0 333 | 36,2,0,0,0 334 | 34,3,1,0,0 335 | 34,1,0,0,1 336 | 31,1,1,0,0 337 | 30,4,0,1,0 338 | 35,1,1,1,0 339 | 33,1,1,0,1 340 | 30,3,0,0,0 341 | 31,5,0,0,0 342 | 29,1,0,1,0 343 | 30,1,0,0,0 344 | 30,4,1,1,0 345 | 30,2,1,0,1 346 | 35,3,0,1,1 347 | 31,6,0,1,1 348 | 27,1,1,0,0 349 | 37,2,1,0,0 350 | 34,1,0,1,0 351 | 30,4,0,0,0 352 | 34,3,0,1,0 353 | 31,2,0,0,0 354 | 34,5,1,1,0 355 | 34,1,0,0,0 356 | 27,1,0,1,0 357 | 30,2,0,1,0 358 | 37,4,1,0,0 359 | 37,1,1,0,1 360 | 37,1,0,0,0 361 | 27,2,0,0,1 362 | 30,6,0,1,0 363 | 31,1,1,0,1 364 | 30,3,1,0,0 365 | 37,4,0,0,0 366 | 34,1,0,0,0 367 | 27,5,0,1,1 368 | 30,1,1,1,0 369 | 34,2,1,0,1 370 | 38,3,0,1,0 371 | 35,1,0,0,1 372 | 34,4,1,1,0 373 | 29,2,0,0,0 374 | 31,1,1,1,0 375 | 30,1,0,0,0 376 | 30,3,0,0,0 377 | 30,6,0,1,0 378 | 34,1,1,1,0 379 | 29,4,1,0,0 380 | 27,5,0,1,0 381 | 29,2,1,0,1 382 | 30,3,0,1,0 383 | 31,1,0,0,1 384 | 34,1,1,0,0 385 | 31,2,0,0,0 386 | 36,4,0,1,0 387 | 37,1,0,1,0 388 | 30,3,1,0,0 389 | 30,2,1,0,0 390 | 30,1,1,0,0 391 | 34,1,0,0,0 392 | 36,6,0,1,0 393 | 30,5,0,0,0 394 | 34,3,1,1,0 395 | 31,1,0,0,1 396 | 30,1,0,0,0 397 | 35,2,0,1,0 398 | 36,1,1,1,0 399 | 27,1,1,0,1 400 | 30,4,0,0,0 401 | 38,2,0,0,0 402 | 31,1,0,1,0 403 | 37,1,0,0,0 404 | 34,1,1,1,0 405 | 30,2,0,0,0 406 | 36,5,0,0,1 407 | 35,6,0,1,1 408 | 30,1,1,0,0 409 | 27,2,1,0,1 410 | 34,1,0,1,0 411 | 30,1,0,0,0 412 | 30,3,0,1,0 413 | 30,2,0,0,0 414 | 30,4,1,0,0 415 | 27,1,0,0,1 416 | 36,1,0,1,0 417 | 30,2,1,1,1 418 | 34,3,1,1,0 419 | 34,5,1,0,1 420 | 37,1,0,0,0 421 | 27,4,0,0,1 422 | 37,6,0,1,0 423 | 30,1,0,0,0 424 | 37,3,1,0,0 425 | 27,2,0,0,1 426 | 31,1,1,1,0 427 | 31,1,0,1,0 428 | 27,4,1,1,0 429 | 27,2,1,0,1 430 | 30,3,0,0,0 431 | 35,1,0,0,1 432 | 28,5,0,1,0 433 | 30,2,0,0,0 434 | 37,1,1,1,0 435 | 30,4,1,0,1 436 | 37,3,0,0,0 437 | 37,6,0,1,0 438 | 30,1,1,0,0 439 | 29,1,1,0,0 440 | 36,1,0,1,0 441 | 30,2,0,0,0 442 | 37,4,0,1,0 443 | 37,1,0,0,1 444 | 31,1,1,0,0 445 | 27,5,0,0,1 446 | 30,1,0,1,0 447 | 29,1,0,1,0 448 | 30,3,1,0,0 449 | 31,4,1,0,0 450 | 36,1,0,1,0 451 | 34,1,0,0,0 452 | 34,6,0,1,0 453 | 30,2,1,0,1 454 | 37,3,1,0,0 455 | 34,1,0,0,1 456 | 30,4,0,0,0 457 | 38,2,0,1,0 458 | 28,5,1,1,0 459 | 27,1,1,0,1 460 | 28,3,0,0,0 461 | 37,2,0,0,0 462 | 27,1,1,1,0 463 | 38,4,0,0,0 464 | 28,1,1,1,0 465 | 36,2,0,0,0 466 | 37,3,0,1,0 467 | 28,6,0,1,1 468 | 30,1,1,0,0 469 | 28,2,1,0,1 470 | 30,4,0,1,0 471 | 27,5,1,0,1 472 | 30,3,0,1,0 473 | 28,2,0,0,1 474 | 30,1,1,1,0 475 | 27,1,0,0,1 476 | 34,1,0,1,0 477 | 37,4,0,1,0 478 | 36,3,1,0,0 479 | 37,1,1,0,1 480 | 31,1,1,0,0 481 | 37,2,0,0,0 482 | 37,6,0,1,0 483 | 31,1,0,0,0 484 | 35,5,1,0,1 485 | 30,2,0,0,0 486 | 36,1,0,0,0 487 | 29,1,0,1,0 488 | 36,1,1,1,0 489 | 37,2,1,0,1 490 | 30,3,0,1,0 491 | 33,4,0,0,1 492 | 31,1,0,1,0 493 | 30,2,0,0,0 494 | 37,1,1,1,0 495 | 31,1,0,0,0 496 | 37,3,0,0,0 497 | 34,6,0,1,0 498 | 28,4,1,1,0 499 | 30,1,1,0,0 500 | 31,1,0,1,0 501 | 35,2,0,0,0 502 | 28,3,0,1,0 503 | 34,1,0,0,1 504 | 30,1,1,0,0 505 | 34,4,0,0,0 506 | 31,1,0,1,0 507 | 30,1,1,1,1 508 | 30,3,1,0,0 509 | 35,2,1,0,0 510 | 30,5,0,0,0 511 | 30,1,0,0,0 512 | 34,6,0,1,0 513 | 34,2,0,0,0 514 | 31,3,1,1,0 515 | 29,1,0,0,1 516 | 30,1,1,0,0 517 | 29,2,0,1,0 518 | 34,1,1,1,0 519 | 34,4,1,0,1 520 | 31,3,0,0,0 521 | 27,2,0,0,1 522 | 38,1,0,1,0 523 | 30,5,0,0,0 524 | 38,1,1,1,0 525 | 36,2,1,0,1 526 | 35,4,0,0,0 527 | 28,6,0,1,1 528 | 34,1,1,0,0 529 | 30,2,1,0,0 530 | 27,1,0,1,0 531 | 30,1,0,0,0 532 | 31,3,0,1,0 533 | 30,4,0,0,0 534 | 34,1,1,0,0 535 | 31,1,0,0,0 536 | 29,5,0,1,0 537 | 34,2,0,1,0 538 | 33,3,1,1,0 539 | 31,1,1,0,1 540 | 30,4,0,0,0 541 | 36,2,0,0,0 542 | 28,6,0,1,0 543 | 31,1,1,0,1 544 | 29,3,1,0,0 545 | 37,2,0,0,0 546 | 34,1,0,1,0 547 | 28,4,0,1,1 548 | 30,1,1,1,0 549 | 28,5,1,0,1 550 | 29,3,0,0,0 551 | 30,1,0,0,1 552 | 37,1,1,1,0 553 | 27,2,0,0,1 554 | 29,4,1,1,0 555 | 36,1,0,0,0 556 | 35,3,0,0,1 557 | 28,6,0,1,1 558 | 36,1,1,0,0 559 | 29,1,1,0,0 560 | 37,1,0,1,0 561 | 31,4,1,0,1 562 | 28,5,0,1,0 563 | 31,1,0,0,1 564 | 36,1,1,0,0 565 | 36,2,0,0,0 566 | 31,1,0,1,0 567 | 30,1,0,1,0 568 | 29,4,1,0,0 569 | 38,2,1,0,0 570 | 38,1,1,1,0 571 | 29,1,0,0,0 572 | 27,6,0,1,0 573 | 35,2,0,0,0 574 | 30,3,1,0,0 575 | 29,5,0,0,1 576 | 31,1,0,0,0 577 | 31,2,0,1,0 578 | 34,1,1,1,0 579 | 29,1,1,0,1 580 | 37,3,0,0,0 581 | 36,2,0,0,0 582 | 34,4,0,1,0 583 | 30,1,0,0,0 584 | 31,1,1,1,0 585 | 31,2,0,0,0 586 | 31,3,0,1,0 587 | 37,6,0,1,1 588 | 30,5,1,0,0 589 | 31,4,1,0,0 590 | 38,1,0,1,0 591 | 28,1,0,0,1 592 | 30,3,0,1,0 593 | 27,2,0,0,1 594 | 27,1,1,0,0 595 | 30,1,0,0,0 596 | 36,4,0,1,0 597 | 27,2,1,1,1 598 | 34,3,1,0,1 599 | 37,1,1,0,1 600 | 38,1,0,0,0 601 | 37,5,0,0,1 602 | 37,6,0,1,0 603 | 37,4,0,0,0 604 | 37,3,1,0,0 605 | 36,2,0,0,0 606 | 35,1,1,0,0 607 | 30,1,0,1,0 608 | 30,1,1,1,0 609 | 30,2,1,0,1 610 | 30,4,0,1,0 611 | 30,1,0,0,1 612 | 34,1,0,1,0 613 | 30,2,0,0,0 614 | 30,5,1,1,0 615 | 34,1,1,0,1 616 | 37,3,0,0,0 617 | 30,6,0,1,0 618 | 29,1,1,1,0 619 | 33,1,1,0,0 620 | 34,1,0,1,0 621 | 38,2,0,0,0 622 | 31,3,0,1,0 623 | 30,1,0,0,1 624 | 35,4,1,0,0 625 | 30,2,0,0,0 626 | 37,1,0,1,0 627 | 34,5,0,1,0 628 | 30,3,1,0,0 629 | 29,2,1,0,0 630 | 30,1,0,0,0 631 | 27,4,0,0,1 632 | 30,6,0,1,0 633 | 36,2,1,0,1 634 | 37,3,1,1,0 635 | 30,1,0,0,1 636 | 37,1,0,0,0 637 | 30,2,0,1,0 638 | 27,4,1,1,0 639 | 37,1,1,0,1 640 | 28,5,0,0,0 641 | 34,2,0,0,0 642 | 29,1,1,1,0 643 | 37,1,0,0,0 644 | 28,1,1,1,0 645 | 30,4,0,0,0 646 | 35,3,0,0,0 647 | 30,6,0,1,1 648 | 30,1,1,0,0 649 | 37,2,1,0,0 650 | 30,1,0,1,0 651 | 30,1,1,0,1 652 | 28,4,0,1,0 653 | 36,5,0,0,1 654 | 36,1,1,0,0 655 | 29,1,0,0,0 656 | 35,1,0,1,0 657 | 30,2,0,1,0 658 | 34,3,1,1,0 659 | 30,4,1,0,1 660 | 33,1,1,0,0 661 | 37,2,0,0,0 662 | 30,6,0,1,0 663 | 30,1,0,0,0 664 | 28,3,1,0,0 665 | 35,2,0,0,0 666 | 37,5,0,1,1 667 | 30,1,0,1,0 668 | 30,1,1,1,0 669 | 28,2,1,0,1 670 | 36,3,0,0,0 671 | 28,1,0,0,1 672 | 30,1,0,1,0 673 | 31,4,0,0,0 674 | 29,1,1,1,0 675 | 33,1,0,0,0 676 | 29,3,0,0,0 677 | 29,6,0,1,0 678 | 31,1,1,0,0 679 | 29,5,1,0,0 680 | 27,4,0,1,0 681 | 28,2,0,0,1 682 | 31,3,0,1,0 683 | 30,1,0,0,1 684 | 28,1,1,0,0 685 | 30,2,0,0,0 686 | 27,1,0,1,0 687 | 37,4,1,1,1 688 | 36,3,1,0,0 689 | 30,2,1,0,0 690 | 36,1,0,1,0 691 | 29,1,0,0,0 692 | 36,6,0,1,1 693 | 34,2,0,0,0 694 | 34,4,1,0,0 695 | 29,1,0,0,1 696 | 37,1,1,0,0 697 | 30,2,0,1,0 698 | 35,1,1,1,0 699 | 27,1,1,0,1 700 | 36,3,0,0,0 701 | 35,4,0,0,0 702 | 30,1,0,1,0 703 | 31,1,0,0,0 704 | 30,1,1,1,0 705 | 29,5,1,0,1 706 | 36,3,0,1,0 707 | 29,6,0,1,1 708 | 37,4,1,0,0 709 | 30,2,1,0,0 710 | 37,1,0,1,0 711 | 36,1,0,0,0 712 | 28,3,0,1,0 713 | 37,2,0,0,0 714 | 30,1,1,1,0 715 | 30,4,0,0,0 716 | 30,1,0,1,0 717 | 30,2,0,1,0 718 | 31,5,1,0,0 719 | 30,1,1,0,1 720 | 29,1,0,0,0 721 | 37,2,0,0,0 722 | 29,6,0,1,0 723 | 37,1,1,0,1 724 | 36,3,1,0,1 725 | 31,2,0,0,0 726 | 27,1,0,0,0 727 | 33,1,0,1,0 728 | 27,1,1,1,0 729 | 37,4,1,0,1 730 | 30,3,0,1,0 731 | 29,5,0,0,1 732 | 34,1,1,1,0 733 | 30,2,0,0,0 734 | 33,1,1,1,0 735 | 29,1,0,0,0 736 | 37,4,0,0,0 737 | 30,6,0,1,0 738 | 33,1,1,1,0 739 | 30,1,1,0,0 740 | 30,1,0,1,0 741 | 36,2,1,0,1 742 | 37,3,0,1,0 743 | 37,4,0,0,1 744 | 37,5,1,0,1 745 | 31,2,0,0,0 746 | 31,1,0,1,0 747 | 30,1,0,1,0 748 | 29,3,1,0,0 749 | 37,2,1,0,0 750 | 30,4,1,0,0 751 | 37,1,0,0,0 752 | 38,6,0,1,0 753 | 30,2,0,0,0 754 | 30,3,1,1,0 755 | 36,1,0,0,1 756 | 35,1,0,0,0 757 | 31,5,0,1,0 758 | 30,1,1,1,0 759 | 29,1,1,0,1 760 | 36,3,0,0,0 761 | 37,2,0,0,0 762 | 29,1,0,1,0 763 | 28,1,0,0,1 764 | 30,4,1,1,0 765 | 30,2,0,0,0 766 | 29,3,0,0,0 767 | 31,6,0,1,1 768 | 37,1,1,0,0 769 | 30,2,1,0,0 770 | 37,5,0,1,1 771 | 33,4,0,0,0 772 | 30,3,0,1,0 773 | 31,2,0,0,0 774 | 38,1,1,0,0 775 | 34,1,0,0,0 776 | 30,1,0,1,0 777 | 35,2,1,1,1 778 | 27,4,1,0,0 779 | 29,1,1,0,1 780 | 29,1,0,0,0 781 | 27,2,0,0,1 782 | 34,6,0,1,0 783 | 29,5,0,0,0 784 | 30,3,1,0,0 785 | 31,4,0,0,0 786 | 34,1,1,1,0 787 | 31,1,0,1,0 788 | 31,1,1,1,0 789 | 37,2,1,0,1 790 | 37,3,0,0,0 791 | 30,1,0,0,1 792 | 35,4,0,1,0 793 | 38,2,0,0,0 794 | 30,1,1,1,0 795 | 30,1,1,0,1 796 | 28,5,0,0,0 797 | 35,6,0,1,0 798 | 30,1,1,0,0 799 | 30,4,1,0,0 800 | 37,1,0,1,0 801 | 28,4,0,0,1 -------------------------------------------------------------------------------- /part3/ch6/math.csv: -------------------------------------------------------------------------------- 1 | groups,scores 2 | group_A,85 3 | group_A,88 4 | group_A,90 5 | group_A,82 6 | group_A,87 7 | group_A,89 8 | group_A,92 9 | group_A,86 10 | group_A,88 11 | group_A,90 12 | group_B,78 13 | group_B,80 14 | group_B,79 15 | group_B,81 16 | group_B,80 17 | group_B,78 18 | group_B,77 19 | group_B,79 20 | group_B,78 21 | group_B,82 22 | group_C,80 23 | group_C,82 24 | group_C,85 25 | group_C,84 26 | group_C,81 27 | group_C,86 28 | group_C,82 29 | group_C,81 30 | group_C,83 31 | group_C,80 32 | group_D,85 33 | group_D,84 34 | group_D,86 35 | group_D,87 36 | group_D,85 37 | group_D,86 38 | group_D,84 39 | group_D,85 40 | group_D,87 41 | group_D,86 42 | -------------------------------------------------------------------------------- /part3/ch6/tomato2.csv: -------------------------------------------------------------------------------- 1 | 비료유형,물주기,수확량 2 | A,1,514 3 | A,1,480 4 | A,1,507 5 | A,2,452 6 | A,2,526 7 | A,2,457 8 | A,3,506 9 | A,3,502 10 | A,3,482 11 | A,4,595 12 | A,4,491 13 | A,4,523 14 | B,1,538 15 | B,1,469 16 | B,1,545 17 | B,2,504 18 | B,2,538 19 | B,2,481 20 | B,3,480 21 | B,3,547 22 | B,3,526 23 | B,4,518 24 | B,4,533 25 | B,4,530 26 | C,1,475 27 | C,1,444 28 | C,1,460 29 | C,2,459 30 | C,2,446 31 | C,2,494 32 | C,3,500 33 | C,3,515 34 | C,3,522 35 | C,4,507 36 | C,4,511 37 | C,4,521 38 | -------------------------------------------------------------------------------- /part4/ch2/members.csv: -------------------------------------------------------------------------------- 1 | id,age,city,f1,f2,f3,f4,f5,subscribed,views 2 | id01,2,서울,,0,gold,ENFJ,91.29779092,2021-07-16,6820 3 | id02,9,서울,70,1,,ENFJ,60.33982554,2021-05-12,2534 4 | id03,27,서울,61,1,gold,ISTJ,17.25298557,2021-03-16,7312 5 | id04,75,서울,,2,,INFP,52.66707799,2021-07-21,493 6 | id05,24,서울,85,2,,ISFJ,29.26986926,2021-03-07,1338 7 | id06,22,서울,57,0,vip,INTP,20.1294441,2021-09-12,21550 8 | id07,36.3,서울,60,1,,ISFJ,9.796377581,2021-01-11,61 9 | id08,38,서울,101,1,silver,INFJ,83.68538032,2021-03-06,3260 10 | id09,3.3,서울,35,2,,ESFJ,17.25298557,2021-03-21,2764 11 | id10,95,서울,74,1,gold,ISFP,98.42989897,2021-04-03,9992 12 | id100,47,경기,53,0,vip,ESFP,33.30899901,2021-02-21,15535 13 | id11,40,서울,68,0,gold,ENFP,98.42989897,2021-10-29,6752 14 | id12,20,서울,,0,,ESTP,91.29779092,2021-11-30,1367 15 | id13,15,서울,68,0,gold,ESFJ,83.68538032,2021-12-30,5643 16 | id14,77,서울,50,1,gold,ENTJ,67.8863732,2021-09-19,5700 17 | id15,22,서울,67,1,gold,ENTP,9.796377581,2021-05-26,7676 18 | id16,68,서울,85,0,gold,ESFP,16.2838541,2021-07-25,9472 19 | id17,74,서울,,1,gold,ISTP,67.8863732,2021-10-26,9441 20 | id18,41,서울,87,2,gold,ISFJ,80.13828012,2021-03-03,7933 21 | id19,53,서울,,0,gold,ISFP,83.68538032,2021-12-24,5287 22 | id20,11,서울,51,1,,INTJ,91.29779092,2021-07-16, 23 | id21,90,부산,,1,gold,ISFP,29.26986926,2021-05-03,9690 24 | id22,-6.3,부산,72,1,gold,ENFP,52.66707799,2021-02-09,6147 25 | id23,34,부산,75,1,gold,ISTP,69.73031281,2021-05-21,6236 26 | id24,80,부산,44,0,gold,INFJ,73.58639712,2021-09-11,5976 27 | id25,34,부산,,0,gold,ESTP,60.33982554,2021-07-12,8954 28 | id26,55,부산,57,1,gold,ENFP,83.68538032,2021-05-01,5857 29 | id27,37,부산,60,0,silver,ESTP,73.58639712,2021-10-13,4255 30 | id28,38,부산,34,1,gold,ENTP,80.13828012,2021-10-31,5068 31 | id29,-13.5,부산,47,2,gold,ENTP,67.8863732,2021-08-28,6793 32 | id30,16,부산,,0,,ESTJ,17.25298557,2021-05-28,240 33 | id31,86,부산,77,0,gold,ESFJ,73.58639712,2021-02-11,8014 34 | id32,25,부산,64,0,vip,ISFJ,13.04992129,2021-05-24,17421 35 | id33,47,부산,94,0,silver,ENFJ,17.25298557,2021-04-02,3880 36 | id34,65,부산,,1,silver,INFP,48.43118381,2021-02-01,3163 37 | id35,30,부산,,2,silver,ESTJ,33.30899901,2021-06-10,3084 38 | id36,68,부산,77,1,gold,INTP,13.04992129,2021-07-20,9713 39 | id37,100,부산,,0,silver,ESTP,33.30899901,2021-07-08,4068 40 | id38,87,부산,,1,,ESTP,83.68538032,2021-06-21,1048 41 | id39,56,부산,50,0,,INFJ,33.30899901,2021-12-22, 42 | id40,56,대구,75,0,gold,ENFP,17.25298557,2021-01-22,8481 43 | id41,81,대구,55,0,gold,ENFJ,37.11373918,2021-10-04,8640 44 | id42,65,대구,48,2,gold,ESTP,33.30899901,2021-02-09,5999 45 | id43,23,대구,60,0,silver,ISTP,29.26986926,2021-05-18,3878 46 | id44,44,대구,,0,,INTP,16.2838541,2021-11-10,546 47 | id45,97,대구,88,0,gold,ENFJ,13.04992129,2021-06-21,8317 48 | id46,93,대구,,0,gold,ESTJ,67.8863732,2021-05-23,9711 49 | id47,34.6,대구,75,1,gold,ESTJ,90.49699927,2021-05-28,8628 50 | id48,18,대구,,0,,ENFP,20.1294441,2021-03-25, 51 | id49,75,대구,88,0,gold,INTP,37.11373918,2021-03-31,9737 52 | id50,86,대구,78,1,,ENFP,60.33982554,2021-12-05,1935 53 | id51,36,대구,,0,gold,ISTJ,83.68538032,2021-08-20,7217 54 | id52,97,대구,82,1,gold,ISFJ,90.49699927,2021-05-20,8518 55 | id53,52,대구,50,0,gold,ESTP,20.1294441,2021-09-09,7012 56 | id54,53,대구,,1,gold,ENFJ,69.73031281,2021-06-21,5872 57 | id55,75,대구,63,2,gold,ENTP,13.04992129,2021-02-06,6042 58 | id56,59,대구,,1,vip,ESTJ,73.58639712,2021-04-05,19589 59 | id57,3,대구,111,0,silver,ISFJ,29.26986926,2021-01-12,4421 60 | id58,0,대구,100,2,,ESTP,33.30899901,2021-04-18,1928 61 | id59,64,대구,,1,silver,ESFJ,20.1294441,2021-06-23,4994 62 | id60,56,경기,,0,gold,ESFP,52.66707799,2021-11-24,6794 63 | id61,87,경기,62,2,,INTP,69.73031281,2021-02-03,218 64 | id62,52,경기,,0,,INTP,60.33982554,2021-04-10,2100 65 | id63,88,경기,86,1,silver,ISFJ,73.58639712,2021-12-01,4053 66 | id64,43,경기,62,2,gold,ESFP,73.58639712,2021-02-22,5995 67 | id65,26.5,경기,,0,silver,ISFP,91.29779092,2021-01-10,3336 68 | id66,87,경기,,1,gold,ISFP,17.25298557,2021-08-05,8471 69 | id67,66,경기,52,1,,ISFJ,73.58639712,2021-06-17,1159 70 | id68,35,경기,45,2,gold,ISFP,67.8863732,2021-07-29,8599 71 | id69,75,경기,85,0,,ESTJ,69.73031281,2021-11-14,2708 72 | id70,-9,경기,96,1,silver,ISTP,48.43118381,2021-11-17,4442 73 | id71,35,경기,84,2,gold,ISFP,52.66707799,2021-07-15,8087 74 | id72,8,경기,97,0,,ESTJ,97.38103419,2021-01-30,602 75 | id73,90,경기,,1,,ISFJ,73.58639712,2021-08-12,512 76 | id74,45,경기,98,0,gold,ESTP,52.66707799,2021-05-27,7739 77 | id75,63,경기,47,0,gold,ESTP,20.1294441,2021-06-12,6779 78 | id76,71,경기,12,0,,ENTJ,83.68538032,2021-07-28,2872 79 | id77,77,경기,31,0,,INFP,98.42989897,2021-01-16,1518 80 | id78,92,경기,96,1,gold,INTJ,69.73031281,2021-10-27,7565 81 | id79,30,경기,,0,gold,INTJ,80.13828012,2021-08-14,8777 82 | id80,67,경기,60,0,silver,ISFP,83.68538032,2021-01-14,4381 83 | id81,86,경기,50,1,,ISFJ,37.11373918,2021-09-14,244 84 | id82,48,경기,,0,,ENTJ,37.11373918,2021-10-17, 85 | id83,73,경기,50,1,vip,ENTP,80.13828012,2021-09-26,19139 86 | id84,66,경기,44,0,gold,INTP,83.68538032,2021-12-19,5650 87 | id85,83.6,경기,55,0,gold,INFJ,80.13828012,2021-09-24,6719 88 | id86,2,경기,,0,,ESTP,29.26986926,2021-02-16,2155 89 | id87,19,경기,,1,gold,ISFP,97.38103419,2021-08-30,6516 90 | id88,89,경기,75,0,,ESTJ,60.33982554,2021-01-06,2713 91 | id89,34,경기,66,1,gold,ENTJ,33.30899901,2021-10-14,6119 92 | id90,54,경기,,0,silver,ENTP,29.26986926,2021-04-03,3818 93 | id91,6,경기,72,0,gold,INTP,9.796377581,2021-08-23,8988 94 | id92,97,경기,78,1,gold,INFP,97.38103419,2021-05-08,9625 95 | id93,21.8,경기,57,0,,ISFP,73.58639712,2021-06-07,42 96 | id94,84,경기,,1,silver,ESTJ,90.49699927,2021-08-16,3774 97 | id95,77,경기,43,1,gold,INTJ,91.29779092,2021-05-21,8697 98 | id96,92,경기,53,1,silver,ENTJ,52.66707799,2021-05-06,4336 99 | id97,100,경기,,0,gold,INFP,67.8863732,2021-03-18,6687 100 | id98,39,경기,58,2,,INFP,98.42989897,2021-10-02,865 101 | id99,1,경기,47,0,gold,ESFJ,97.38103419,2021-12-02,6090 102 | -------------------------------------------------------------------------------- /part4/ch2/p2_type1.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyM8nWiAL6LDgoxZWnlkMmm+"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch2/p2_type1.ipynb)\n","\n","\n"],"metadata":{"id":"8B9gqkLvwvAs"}},{"cell_type":"markdown","source":["## 작업형1"],"metadata":{"id":"uO07g2QeKprH"}},{"cell_type":"markdown","source":["### 문제1"],"metadata":{"id":"6f9i0g9dKvQQ"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"members.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch2/members.csv\")"],"metadata":{"id":"P_qNqyD3W-6P"},"execution_count":null,"outputs":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ShDssNLn6Dw8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1713098382739,"user_tz":-540,"elapsed":9,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"3f4ce496-cc1f-4d92-952e-2e97172cd7bf"},"outputs":[{"output_type":"stream","name":"stdout","text":["5674.04\n"]}],"source":["import pandas as pd\n","# df = pd.read_csv(\"members.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch2/members.csv\")\n","\n","# 1) views 컬럼 내림차순 정렬\n","df.sort_values('views', ascending=False, inplace=True)\n","# 2) 상위 10개 중에서 10번째 (최소값) 값 구하기\n","min_value = df['views'][:10].min()\n","\n","# 3) 상위 10개 중에서 10번째 (최소값) 값 대체\n","df.iloc[:10,-1] = min_value\n","\n","# 4) age가 80 이상의 views 컬럼 평균\n","cond = df['age'] >= 80\n","result = df[cond]['views'].mean()\n","\n","# 5) 반올림하여 소수 둘째자리까지 계산\n","print(round(result, 2))"]},{"cell_type":"markdown","source":["### 문제2"],"metadata":{"id":"ccvjEw-aKy39"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"members.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch2/members.csv\")\n","\n","# 1) 앞에서 부터 80% 데이터 슬라이싱\n","line = int(len(df) * 0.8)\n","df = df.iloc[:line]\n","\n","# 2) 결측치 채우기 전 'f1' 컬럼 표준편차 구하기\n","std1 = df['f1'].std()\n","\n","# 3) 중앙값으로 결측치 채우기\n","med=df['f1'].median()\n","df['f1'] = df['f1'].fillna(med)\n","\n","# 4) 결측치를 채운 후 'f1' 컬럼 표준편차 구하기\n","std2 = df['f1'].std()\n","\n","# 5) 두 표준편차 차이 절대값 계산 (반올림하여 소수 둘째자리까지 계산)\n","result = abs(std1-std2)\n","print(round(result,2))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-3VwHCDKKjRl","executionInfo":{"status":"ok","timestamp":1713098382739,"user_tz":-540,"elapsed":7,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"8b5e4d6d-4fbf-4465-d485-f92da340375a"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["3.56\n"]}]},{"cell_type":"code","source":["# 표본 표준편차(판다스) vs 모 표준편차(넘파이)\n","import numpy as np\n","# 기본값\n","print(\"판다스 표본 표준편차\", df['f1'].std())\n","print(\"넘파이 모 표준편차\", np.std(df['f1']))\n","print(\"---------------------------------- \")\n","# ddof변경\n","print(\"판다스 모 표준편차\", df['f1'].std(ddof=0))\n","print(\"넘파이 표본 표준편차\", np.std(df['f1'],ddof=1))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"4pflsVkWnxfy","executionInfo":{"status":"ok","timestamp":1713098382739,"user_tz":-540,"elapsed":7,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"6aa140b2-2748-4b30-c6fe-69b02d658705"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["판다스 표본 표준편차 17.010788646613268\n","넘파이 모 표준편차 16.90413688272785\n","---------------------------------- \n","판다스 모 표준편차 16.90413688272785\n","넘파이 표본 표준편차 17.010788646613268\n"]}]},{"cell_type":"markdown","source":["### 문제3."],"metadata":{"id":"U2rmG2jaK_v3"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"members.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch2/members.csv\")\n","\n","# 1) 표준편차, 평균값 구하기\n","std = df['age'].std()\n","mean = df['age'].mean()\n","\n","# 2) 이상치 최저, 최고 기준 구하기\n","lower = mean - (std * 1.5)\n","upper = mean + (std * 1.5)\n","\n","# 3) 이상치를 벗어나는 값(조건) 찾기\n","cond1 = df['age'] < lower\n","cond2 = df['age'] > upper\n","\n","# 4) 조건에 만족하는 이상치 age합\n","print(df[cond1|cond2]['age'].sum())"],"metadata":{"id":"WgzH-3yB6I26","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1713098382739,"user_tz":-540,"elapsed":6,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"ac09c5da-b6d5-4462-a80b-d9d868cfefda"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["473.5\n"]}]}]} -------------------------------------------------------------------------------- /part4/ch3/members.csv: -------------------------------------------------------------------------------- 1 | id,age,city,f1,f2,f3,f4,f5,subscribed,views 2 | id01,2,서울,,0,gold,ENFJ,91.29779092,2021-07-16,6820 3 | id02,9,서울,70,1,,ENFJ,60.33982554,2021-05-12,2534 4 | id03,27,서울,61,1,gold,ISTJ,17.25298557,2021-03-16,7312 5 | id04,75,서울,,2,,INFP,52.66707799,2021-07-21,493 6 | id05,24,서울,85,2,,ISFJ,29.26986926,2021-03-07,1338 7 | id06,22,서울,57,0,vip,INTP,20.1294441,2021-09-12,21550 8 | id07,36.3,서울,60,1,,ISFJ,9.796377581,2021-01-11,61 9 | id08,38,서울,101,1,silver,INFJ,83.68538032,2021-03-06,3260 10 | id09,3.3,서울,35,2,,ESFJ,17.25298557,2021-03-21,2764 11 | id10,95,서울,74,1,gold,ISFP,98.42989897,2021-04-03,9992 12 | id100,47,경기,53,0,vip,ESFP,33.30899901,2021-02-21,15535 13 | id11,40,서울,68,0,gold,ENFP,98.42989897,2021-10-29,6752 14 | id12,20,서울,,0,,ESTP,91.29779092,2021-11-30,1367 15 | id13,15,서울,68,0,gold,ESFJ,83.68538032,2021-12-30,5643 16 | id14,77,서울,50,1,gold,ENTJ,67.8863732,2021-09-19,5700 17 | id15,22,서울,67,1,gold,ENTP,9.796377581,2021-05-26,7676 18 | id16,68,서울,85,0,gold,ESFP,16.2838541,2021-07-25,9472 19 | id17,74,서울,,1,gold,ISTP,67.8863732,2021-10-26,9441 20 | id18,41,서울,87,2,gold,ISFJ,80.13828012,2021-03-03,7933 21 | id19,53,서울,,0,gold,ISFP,83.68538032,2021-12-24,5287 22 | id20,11,서울,51,1,,INTJ,91.29779092,2021-07-16, 23 | id21,90,부산,,1,gold,ISFP,29.26986926,2021-05-03,9690 24 | id22,-6.3,부산,72,1,gold,ENFP,52.66707799,2021-02-09,6147 25 | id23,34,부산,75,1,gold,ISTP,69.73031281,2021-05-21,6236 26 | id24,80,부산,44,0,gold,INFJ,73.58639712,2021-09-11,5976 27 | id25,34,부산,,0,gold,ESTP,60.33982554,2021-07-12,8954 28 | id26,55,부산,57,1,gold,ENFP,83.68538032,2021-05-01,5857 29 | id27,37,부산,60,0,silver,ESTP,73.58639712,2021-10-13,4255 30 | id28,38,부산,34,1,gold,ENTP,80.13828012,2021-10-31,5068 31 | id29,-13.5,부산,47,2,gold,ENTP,67.8863732,2021-08-28,6793 32 | id30,16,부산,,0,,ESTJ,17.25298557,2021-05-28,240 33 | id31,86,부산,77,0,gold,ESFJ,73.58639712,2021-02-11,8014 34 | id32,25,부산,64,0,vip,ISFJ,13.04992129,2021-05-24,17421 35 | id33,47,부산,94,0,silver,ENFJ,17.25298557,2021-04-02,3880 36 | id34,65,부산,,1,silver,INFP,48.43118381,2021-02-01,3163 37 | id35,30,부산,,2,silver,ESTJ,33.30899901,2021-06-10,3084 38 | id36,68,부산,77,1,gold,INTP,13.04992129,2021-07-20,9713 39 | id37,100,부산,,0,silver,ESTP,33.30899901,2021-07-08,4068 40 | id38,87,부산,,1,,ESTP,83.68538032,2021-06-21,1048 41 | id39,56,부산,50,0,,INFJ,33.30899901,2021-12-22, 42 | id40,56,대구,75,0,gold,ENFP,17.25298557,2021-01-22,8481 43 | id41,81,대구,55,0,gold,ENFJ,37.11373918,2021-10-04,8640 44 | id42,65,대구,48,2,gold,ESTP,33.30899901,2021-02-09,5999 45 | id43,23,대구,60,0,silver,ISTP,29.26986926,2021-05-18,3878 46 | id44,44,대구,,0,,INTP,16.2838541,2021-11-10,546 47 | id45,97,대구,88,0,gold,ENFJ,13.04992129,2021-06-21,8317 48 | id46,93,대구,,0,gold,ESTJ,67.8863732,2021-05-23,9711 49 | id47,34.6,대구,75,1,gold,ESTJ,90.49699927,2021-05-28,8628 50 | id48,18,대구,,0,,ENFP,20.1294441,2021-03-25, 51 | id49,75,대구,88,0,gold,INTP,37.11373918,2021-03-31,9737 52 | id50,86,대구,78,1,,ENFP,60.33982554,2021-12-05,1935 53 | id51,36,대구,,0,gold,ISTJ,83.68538032,2021-08-20,7217 54 | id52,97,대구,82,1,gold,ISFJ,90.49699927,2021-05-20,8518 55 | id53,52,대구,50,0,gold,ESTP,20.1294441,2021-09-09,7012 56 | id54,53,대구,,1,gold,ENFJ,69.73031281,2021-06-21,5872 57 | id55,75,대구,63,2,gold,ENTP,13.04992129,2021-02-06,6042 58 | id56,59,대구,,1,vip,ESTJ,73.58639712,2021-04-05,19589 59 | id57,3,대구,111,0,silver,ISFJ,29.26986926,2021-01-12,4421 60 | id58,0,대구,100,2,,ESTP,33.30899901,2021-04-18,1928 61 | id59,64,대구,,1,silver,ESFJ,20.1294441,2021-06-23,4994 62 | id60,56,경기,,0,gold,ESFP,52.66707799,2021-11-24,6794 63 | id61,87,경기,62,2,,INTP,69.73031281,2021-02-03,218 64 | id62,52,경기,,0,,INTP,60.33982554,2021-04-10,2100 65 | id63,88,경기,86,1,silver,ISFJ,73.58639712,2021-12-01,4053 66 | id64,43,경기,62,2,gold,ESFP,73.58639712,2021-02-22,5995 67 | id65,26.5,경기,,0,silver,ISFP,91.29779092,2021-01-10,3336 68 | id66,87,경기,,1,gold,ISFP,17.25298557,2021-08-05,8471 69 | id67,66,경기,52,1,,ISFJ,73.58639712,2021-06-17,1159 70 | id68,35,경기,45,2,gold,ISFP,67.8863732,2021-07-29,8599 71 | id69,75,경기,85,0,,ESTJ,69.73031281,2021-11-14,2708 72 | id70,-9,경기,96,1,silver,ISTP,48.43118381,2021-11-17,4442 73 | id71,35,경기,84,2,gold,ISFP,52.66707799,2021-07-15,8087 74 | id72,8,경기,97,0,,ESTJ,97.38103419,2021-01-30,602 75 | id73,90,경기,,1,,ISFJ,73.58639712,2021-08-12,512 76 | id74,45,경기,98,0,gold,ESTP,52.66707799,2021-05-27,7739 77 | id75,63,경기,47,0,gold,ESTP,20.1294441,2021-06-12,6779 78 | id76,71,경기,12,0,,ENTJ,83.68538032,2021-07-28,2872 79 | id77,77,경기,31,0,,INFP,98.42989897,2021-01-16,1518 80 | id78,92,경기,96,1,gold,INTJ,69.73031281,2021-10-27,7565 81 | id79,30,경기,,0,gold,INTJ,80.13828012,2021-08-14,8777 82 | id80,67,경기,60,0,silver,ISFP,83.68538032,2021-01-14,4381 83 | id81,86,경기,50,1,,ISFJ,37.11373918,2021-09-14,244 84 | id82,48,경기,,0,,ENTJ,37.11373918,2021-10-17, 85 | id83,73,경기,50,1,vip,ENTP,80.13828012,2021-09-26,19139 86 | id84,66,경기,44,0,gold,INTP,83.68538032,2021-12-19,5650 87 | id85,83.6,경기,55,0,gold,INFJ,80.13828012,2021-09-24,6719 88 | id86,2,경기,,0,,ESTP,29.26986926,2021-02-16,2155 89 | id87,19,경기,,1,gold,ISFP,97.38103419,2021-08-30,6516 90 | id88,89,경기,75,0,,ESTJ,60.33982554,2021-01-06,2713 91 | id89,34,경기,66,1,gold,ENTJ,33.30899901,2021-10-14,6119 92 | id90,54,경기,,0,silver,ENTP,29.26986926,2021-04-03,3818 93 | id91,6,경기,72,0,gold,INTP,9.796377581,2021-08-23,8988 94 | id92,97,경기,78,1,gold,INFP,97.38103419,2021-05-08,9625 95 | id93,21.8,경기,57,0,,ISFP,73.58639712,2021-06-07,42 96 | id94,84,경기,,1,silver,ESTJ,90.49699927,2021-08-16,3774 97 | id95,77,경기,43,1,gold,INTJ,91.29779092,2021-05-21,8697 98 | id96,92,경기,53,1,silver,ENTJ,52.66707799,2021-05-06,4336 99 | id97,100,경기,,0,gold,INFP,67.8863732,2021-03-18,6687 100 | id98,39,경기,58,2,,INFP,98.42989897,2021-10-02,865 101 | id99,1,경기,47,0,gold,ESFJ,97.38103419,2021-12-02,6090 102 | -------------------------------------------------------------------------------- /part4/ch3/p3_type1.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyOF0eLeFT4xZzvFsbWZ42Bt"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch3/p3_type1.ipynb)\n"],"metadata":{"id":"LNmnV8r3xZYl"}},{"cell_type":"markdown","source":["## 작업형1"],"metadata":{"id":"uO07g2QeKprH"}},{"cell_type":"markdown","source":["### 문제1"],"metadata":{"id":"6f9i0g9dKvQQ"}},{"cell_type":"code","execution_count":1,"metadata":{"id":"ShDssNLn6Dw8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1717333939429,"user_tz":-540,"elapsed":578,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"9871913f-3b0c-47a0-a3ab-9d2726f36123"},"outputs":[{"output_type":"stream","name":"stdout","text":["57\n"]}],"source":["import pandas as pd\n","# df = pd.read_csv(\"members.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch3/members.csv\")\n","\n","# 1) 결측치가 있는 행 제거 (기본값 axis=0)\n","df = df.dropna()\n","\n","# 2) 앞에서부터 70% 데이터 슬라이싱\n","df = df.iloc[:int(len(df)*0.7)]\n","\n","# 3) 1사분위 값 계산(정수 출력)\n","print(int(df['f1'].quantile(.25)))"]},{"cell_type":"markdown","source":["### 문제2"],"metadata":{"id":"ccvjEw-aKy39"}},{"cell_type":"code","source":["# 방법1\n","import pandas as pd\n","# df = pd.read_csv('year.csv', index_col='Unnamed: 0')\n","df = pd.read_csv('https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch3/year.csv', index_col='Unnamed: 0')\n","\n","# 1) 행을 기준으로 평균을 계산\n","m = df.loc[2000].mean()\n","\n","# 2) 평균보다 큰 값의 합 계산\n","print(sum(df.loc[2000,:] > m))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-3VwHCDKKjRl","executionInfo":{"status":"ok","timestamp":1717333939787,"user_tz":-540,"elapsed":360,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"70c9238f-89cf-41b1-f615-8e46b5408266"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["100\n"]}]},{"cell_type":"code","source":["# 방법2\n","# df = pd.read_csv('year.csv', index_col='Unnamed: 0')\n","df = pd.read_csv('https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch3/year.csv', index_col='Unnamed: 0')\n","\n","# 1) 행과 열을 변경함\n","df = df.T\n","\n","# 2) 평균 계산\n","m = df[2000].mean()\n","\n","# 3) 평균보다 큰 값의 합 계산\n","print(sum(df[2000] > m))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"iYnbTrVNYbjs","executionInfo":{"status":"ok","timestamp":1717333939787,"user_tz":-540,"elapsed":5,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"671a595f-2ecc-465c-b513-d7e68927bb1d"},"execution_count":3,"outputs":[{"output_type":"stream","name":"stdout","text":["100\n"]}]},{"cell_type":"markdown","source":["### 문제3"],"metadata":{"id":"U2rmG2jaK_v3"}},{"cell_type":"code","source":["# 방법1\n","import pandas as pd\n","# df = pd.read_csv(\"members.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch3/members.csv\")\n","\n","# 1) 결측치 수 컬럼별로 확인 및 변수에 대입(시리즈)\n","df_cntNull = df.isnull().sum()\n","\n","# 2) 내림차순 정렬\n","df_cntNull = df_cntNull.sort_values(ascending=False)\n","\n","# 3) 가장 상위에 있는 인덱스명 출력\n","print(df_cntNull.index[0])"],"metadata":{"id":"WgzH-3yB6I26","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1717333939788,"user_tz":-540,"elapsed":5,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"dd746db0-5cc0-42b6-9225-3a6bed976754"},"execution_count":4,"outputs":[{"output_type":"stream","name":"stdout","text":["f1\n"]}]},{"cell_type":"code","source":["# 방법2\n","# df = pd.read_csv(\"members.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch3/members.csv\")\n","\n","# 1) 결측치 수 컬럼별로 확인 및 변수에 대입(시리즈)\n","df_cntNull = df.isnull().sum()\n","\n","# 2) 인덱스 리셋를 활용해 기존 인덱스는 새로운 컬럼으로 변경)\n","df_cntNull = df_cntNull.reset_index()\n","\n","# 3) 출력하고자하는 값을 찾아 출력\n","print(df_cntNull.loc[3, 'index'])"],"metadata":{"id":"L9JYUM9N9u_z","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1717333939788,"user_tz":-540,"elapsed":4,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"dcb941dd-cc0d-4b2d-a9d5-0567eaba4cd0"},"execution_count":5,"outputs":[{"output_type":"stream","name":"stdout","text":["f1\n"]}]}]} -------------------------------------------------------------------------------- /part4/ch3/year.csv: -------------------------------------------------------------------------------- 1 | ,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199 2 | 1999,137,74,114,140,80,150,16,133,178,181,106,176,163,81,4,195,111,80,166,161,117,141,134,146,10,191,68,11,200,95,74,104,18,107,68,114,43,4,14,59,173,124,108,18,175,30,174,28,144,100,162,116,71,9,138,162,83,112,66,1,111,35,8,180,191,7,108,174,180,189,83,139,109,73,24,197,28,148,47,194,26,106,103,196,115,200,145,33,182,70,158,112,196,41,27,50,34,19,120,33,103,26,43,140,184,159,16,111,34,191,192,168,93,53,95,141,184,85,68,81,164,129,27,119,128,172,103,105,100,162,55,139,193,114,128,57,165,38,39,175,184,119,98,73,23,62,67,180,163,145,68,48,75,187,17,199,74,48,35,54,117,173,76,51,1,40,81,5,165,175,176,72,103,7,175,178,172,152,104,195,15,153,44,111,176,55,39,40,80,85,124,94,118,12,50,191,137,174,56,128 3 | 2000,176,87,64,110,128,16,8,4,123,87,190,146,53,52,21,55,75,131,76,181,72,82,121,182,97,162,86,179,68,36,77,146,155,13,133,134,28,14,108,4,194,197,153,96,16,53,172,125,57,50,184,122,3,3,168,32,99,189,197,27,7,188,120,181,23,172,56,45,68,191,109,14,66,101,58,1,33,72,74,55,74,87,62,112,14,47,68,15,172,173,196,190,79,117,137,141,171,105,186,128,159,194,29,8,147,24,199,120,94,94,171,158,115,141,92,13,86,54,182,117,193,186,171,198,136,38,77,35,93,80,88,74,199,9,85,78,176,25,137,54,94,181,35,156,82,175,74,15,16,52,122,162,106,36,17,190,135,128,169,78,160,137,147,188,130,146,97,7,37,25,133,134,162,155,19,112,170,154,170,103,197,8,55,131,48,117,112,68,57,117,59,22,3,108,17,104,101,161,156,43 4 | 2001,128,132,123,112,134,160,180,47,115,155,50,152,13,137,89,193,156,122,137,88,173,173,57,59,114,99,77,27,41,4,191,172,11,165,45,39,38,181,178,23,151,76,188,19,110,28,39,89,88,100,174,12,116,142,70,141,91,112,72,147,195,142,89,34,179,3,101,171,144,33,122,196,142,145,88,159,167,44,84,173,52,31,123,187,99,187,5,95,193,101,186,30,176,134,14,164,9,122,29,126,38,137,110,130,153,167,14,22,185,54,162,124,100,155,98,3,18,193,199,168,126,121,152,174,131,61,148,16,171,64,169,95,109,45,139,86,179,17,186,151,88,100,147,74,31,200,91,16,32,142,137,162,74,31,20,131,77,106,96,187,102,166,85,179,178,37,39,158,186,17,36,122,6,125,190,156,90,116,68,146,39,136,123,54,186,103,154,46,22,87,160,111,116,75,48,51,81,101,194,73 5 | -------------------------------------------------------------------------------- /part4/ch4/data4-1.csv: -------------------------------------------------------------------------------- 1 | id,age,city,f1,f2,f3,f4,f5 2 | id01,2,서울,,0,,ENFJ,91.29779092 3 | id02,9,서울,70,1,,ENFJ,60.33982554 4 | id03,27,서울,61,1,,ISTJ,17.25298557 5 | id04,75,서울,,2,,INFP,52.66707799 6 | id05,24,서울,85,2,,ISFJ,29.26986926 7 | id06,22,서울,57,0,vip,INTP,20.1294441 8 | id07,36.3,서울,60,1,,ISFJ,9.796377581 9 | id08,38,서울,101,1,,INFJ,83.68538032 10 | id09,3.3,서울,35,2,,ESFJ,17.25298557 11 | id10,95,서울,74,1,,ISFP,98.42989897 12 | id11,40,서울,68,0,,ENFP,98.42989897 13 | id12,20,서울,,0,,ESTP,91.29779092 14 | id13,15,서울,68,0,,ESFJ,83.68538032 15 | id14,77,서울,50,1,,ENTJ,67.8863732 16 | id15,22,서울,67,1,,ENTP,9.796377581 17 | id16,68,서울,85,0,,ESFP,16.2838541 18 | id17,74,서울,,1,,ISTP,67.8863732 19 | id18,41,서울,87,2,,ISFJ,80.13828012 20 | id19,53,서울,,0,,ISFP,83.68538032 21 | id20,11,서울,51,1,,INTJ,91.29779092 22 | id21,90,부산,,1,,ISFP,29.26986926 23 | id22,-6.3,부산,72,1,,ENFP,52.66707799 24 | id23,34,부산,75,1,,ISTP,69.73031281 25 | id24,80,부산,44,0,,INFJ,73.58639712 26 | id25,34,부산,,0,,ESTP,60.33982554 27 | id26,55,부산,57,1,,ENFP,83.68538032 28 | id27,37,부산,60,0,,ESTP,73.58639712 29 | id28,38,부산,34,1,,ENTP,80.13828012 30 | id29,-13.5,부산,47,2,,ENTP,67.8863732 31 | id30,16,부산,,0,,ESTJ,17.25298557 32 | id31,86,부산,77,0,,ESFJ,73.58639712 33 | id32,25,부산,64,0,vip,ISFJ,13.04992129 34 | id33,47,부산,94,0,,ENFJ,17.25298557 35 | id34,65,부산,,1,,INFP,48.43118381 36 | id35,30,부산,,2,,ESTJ,33.30899901 37 | id36,68,부산,77,1,,INTP,13.04992129 38 | id37,100,부산,,0,,ESTP,33.30899901 39 | id38,87,부산,,1,,ESTP,83.68538032 40 | id39,56,부산,50,0,,INFJ,33.30899901 41 | id40,56,대구,75,0,,ENFP,17.25298557 42 | id41,81,대구,55,0,,ENFJ,37.11373918 43 | id42,65,대구,48,2,,ESTP,33.30899901 44 | id43,23,대구,60,0,,ISTP,29.26986926 45 | id44,44,대구,,0,,INTP,16.2838541 46 | id45,97,대구,88,0,,ENFJ,13.04992129 47 | id46,93,대구,,0,,ESTJ,67.8863732 48 | id47,34.6,대구,75,1,,ESTJ,90.49699927 49 | id48,18,대구,,0,,ENFP,20.1294441 50 | id49,75,대구,88,0,,INTP,37.11373918 51 | id50,86,대구,78,1,,ENFP,60.33982554 52 | id51,36,대구,,0,,ISTJ,83.68538032 53 | id52,97,대구,82,1,,ISFJ,90.49699927 54 | id53,52,대구,50,0,,ESTP,20.1294441 55 | id54,53,대구,,1,,ENFJ,69.73031281 56 | id55,75,대구,63,2,,ENTP,13.04992129 57 | id56,59,대구,,1,vip,ESTJ,73.58639712 58 | id57,3,대구,111,0,,ISFJ,29.26986926 59 | id58,0,대구,100,2,,ESTP,33.30899901 60 | id59,64,대구,,1,,ESFJ,20.1294441 61 | id60,56,경기,,0,,ESFP,52.66707799 62 | id61,87,경기,62,2,,INTP,69.73031281 63 | id62,52,경기,,0,,INTP,60.33982554 64 | id63,88,경기,86,1,,ISFJ,73.58639712 65 | id64,43,경기,62,2,,ESFP,73.58639712 66 | id65,26.5,경기,,0,,ISFP,91.29779092 67 | id66,87,경기,,1,,ISFP,17.25298557 68 | id67,66,경기,52,1,,ISFJ,73.58639712 69 | id68,35,경기,45,2,,ISFP,67.8863732 70 | id69,75,경기,85,0,,ESTJ,69.73031281 71 | id70,-9,경기,96,1,,ISTP,48.43118381 72 | id71,35,경기,84,2,,ISFP,52.66707799 73 | id72,8,경기,97,0,,ESTJ,97.38103419 74 | id73,90,경기,,1,,ISFJ,73.58639712 75 | id74,45,경기,98,0,,ESTP,52.66707799 76 | id75,63,경기,47,0,,ESTP,20.1294441 77 | id76,71,경기,12,0,,ENTJ,83.68538032 78 | id77,77,경기,31,0,,INFP,98.42989897 79 | id78,92,경기,96,1,,INTJ,69.73031281 80 | id79,30,경기,,0,,INTJ,80.13828012 81 | id80,67,경기,60,0,,ISFP,83.68538032 82 | id81,86,경기,50,1,,ISFJ,37.11373918 83 | id82,48,경기,,0,,ENTJ,37.11373918 84 | id83,73,경기,50,1,vip,ENTP,80.13828012 85 | id84,66,경기,44,0,,INTP,83.68538032 86 | id85,83.6,경기,55,0,,INFJ,80.13828012 87 | id86,2,경기,,0,,ESTP,29.26986926 88 | id87,19,경기,,1,,ISFP,97.38103419 89 | id88,89,경기,75,0,,ESTJ,60.33982554 90 | id89,34,경기,66,1,,ENTJ,33.30899901 91 | id90,54,경기,,0,,ENTP,29.26986926 92 | id91,6,경기,72,0,,INTP,9.796377581 93 | id92,97,경기,78,1,,INFP,97.38103419 94 | id93,21.8,경기,57,0,,ISFP,73.58639712 95 | id94,84,경기,,1,,ESTJ,90.49699927 96 | id95,77,경기,43,1,,INTJ,91.29779092 97 | id96,92,경기,53,1,,ENTJ,52.66707799 98 | id97,100,경기,,0,,INFP,67.8863732 99 | id98,39,경기,58,2,,INFP,98.42989897 100 | id99,1,경기,47,0,,ESFJ,97.38103419 101 | id100,47,경기,53,0,vip,ESFP,33.30899901 102 | -------------------------------------------------------------------------------- /part4/ch4/p4_type1.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyNMmI6oMjxT37FgfVx31SGC"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch4/p4_type1.ipynb)"],"metadata":{"id":"LPh19ptmmhPP"}},{"cell_type":"markdown","source":["## 작업형1"],"metadata":{"id":"uO07g2QeKprH"}},{"cell_type":"markdown","source":["### 문제1"],"metadata":{"id":"6f9i0g9dKvQQ"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"ShDssNLn6Dw8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1713450424035,"user_tz":-540,"elapsed":1325,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"939c2853-d95d-4d42-cc80-3b6b8cb1f509"},"outputs":[{"output_type":"stream","name":"stdout","text":["50\n"]}],"source":["import pandas as pd\n","# df = pd.read_csv(\"data4-1.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch4/data4-1.csv\")\n","\n","# 1) 3사분위수와 1사분위수의 차이를 절대값으로 계산\n","q1 = df['age'].quantile(0.25)\n","q3 = df['age'].quantile(0.75)\n","result = abs(q1 - q3)\n","\n","# 2) 소수점 이하를 버리고, 정수로 출력\n","print(int(result))"]},{"cell_type":"markdown","source":["### 문제2"],"metadata":{"id":"ccvjEw-aKy39"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"data4-2.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch4/data4-2.csv\")\n","\n","# 1) 비율이 40%보다 크고 50%보다 작은 조건\n","cond1 = (df['loves'] + df['wows']) / df['reactions'] > 0.4\n","cond2 = (df['loves'] + df['wows']) / df['reactions'] < 0.5\n","\n","# 2) type이 video인 조건\n","cond3 = df['type'] == 'video'\n","\n","# 3) 조건에 맞는 데이터 수 구하기\n","print(len(df[cond1 & cond2 & cond3]))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-3VwHCDKKjRl","executionInfo":{"status":"ok","timestamp":1713450424036,"user_tz":-540,"elapsed":4,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"717997e1-2e5c-458d-f140-d2ab276cb662"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["90\n"]}]},{"cell_type":"markdown","source":["### 문제3"],"metadata":{"id":"U2rmG2jaK_v3"}},{"cell_type":"code","source":["# 방법1\n","import pandas as pd\n","# df = pd.read_csv(\"data4-3.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch4/data4-3.csv\")\n","\n","# 1) datetime으로 형변환\n","df['date_added'] = pd.to_datetime(df['date_added'])\n","\n","# 2) dt를 활용해 year과 month 파생변수 생성\n","df['year'] = df['date_added'].dt.year\n","df['month'] = df['date_added'].dt.month\n","\n","# 3) 조건\n","cond1 = df['country'] == \"United Kingdom\"\n","cond2 = df['year'] == 2018\n","cond3 = df['month'] == 1\n","\n","# 4) 조건에 맞는 데이터 수 출력\n","print(len(df[cond1 & cond2 & cond3]))"],"metadata":{"id":"WgzH-3yB6I26","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1713450424545,"user_tz":-540,"elapsed":512,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"56aafe5e-9d9b-475c-a422-c69552f3a55e"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["6\n"]}]},{"cell_type":"code","source":["# 방법2\n","# df = pd.read_csv(\"data4-3.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch4/data4-3.csv\")\n","\n","# 1) datetime으로 형변환\n","df['date_added'] = pd.to_datetime(df['date_added'])\n","\n","# 2) 조건\n","cond1 = df['country'] == \"United Kingdom\"\n","cond2 = df['date_added'] >= '2018-1-1'\n","cond3 = df['date_added'] <= '2018-1-31'\n","\n","# 3) 조건에 맞는 데이터 수 출력\n","print(len(df[cond1 & cond2 & cond3]))"],"metadata":{"id":"L9JYUM9N9u_z","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1713450425001,"user_tz":-540,"elapsed":458,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"949eac59-5e2a-449e-a86d-49fb1d08cf32"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["6\n"]}]}]} -------------------------------------------------------------------------------- /part4/ch5/p5_type1.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyNVXuORZIebZMIq5tLwPhb8"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch5/p5_type1.ipynb)"],"metadata":{"id":"p_I02eB4m9z9"}},{"cell_type":"markdown","source":["## 작업형1"],"metadata":{"id":"uO07g2QeKprH"}},{"cell_type":"markdown","source":["### 문제1"],"metadata":{"id":"6f9i0g9dKvQQ"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"ShDssNLn6Dw8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1713100901774,"user_tz":-540,"elapsed":459,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"db3ca790-0d92-4902-9759-bf9a7e7d75a3"},"outputs":[{"output_type":"stream","name":"stdout","text":["118\n"]}],"source":["import pandas as pd\n","# df = pd.read_csv(\"data5-1.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch5/data5-1.csv\")\n","\n","# 1) 조건1.종량제 봉투 종류 ‘규격봉투’\n","cond1 = df['종량제봉투종류'] == '규격봉투'\n","\n","# 2) 조건2.종량제 봉투 용도 ‘음식물쓰레기’\n","cond2 = df['종량제봉투용도'] == '음식물쓰레기'\n","\n","# 3) 조건3. 2l가격이 0이면 제외\n","cond3 = df['2ℓ가격'] != 0\n","\n","# 4) 조건 적용한 데이터\n","df = df[cond1 & cond2 & cond3]\n","\n","# 5) 2l 가격 평균 계산, 반올림, 정수 출력\n","print(round(df['2ℓ가격'].mean()))"]},{"cell_type":"markdown","source":["### 문제2"],"metadata":{"id":"ccvjEw-aKy39"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"data5-2.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch5/data5-2.csv\")\n","\n","# 1) bmi 파생변수 계산\n","df['bmi'] = df['Weight'] / (df['Height']/100)**2\n","\n","# 2) 조건1. 정상체중 bmi\n","cond1 = (df['bmi'] >= 18.5) & (df['bmi'] < 23)\n","\n","# 3) 조건2. 위험체중 bmi\n","cond2 = (df['bmi'] >= 23) & (df['bmi'] < 25)\n","\n","# 4) 조건1의 인원와 조건2의 인원 차이계산, 절대값 처리\n","print(abs(len(df[cond1]) - len(df[cond2])))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-3VwHCDKKjRl","executionInfo":{"status":"ok","timestamp":1719909009812,"user_tz":-540,"elapsed":333,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"a9ed122d-e44a-44e3-89b4-77b5887cd3cf"},"execution_count":4,"outputs":[{"output_type":"stream","name":"stdout","text":["144\n"]}]},{"cell_type":"markdown","source":["### 문제3"],"metadata":{"id":"U2rmG2jaK_v3"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"data5-3.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch5/data5-3.csv\")\n","\n","# 1) '순전입' 파생변수 계산\n","df['순전입'] = df['전입학생수(계)'] - df['전출학생수(계)']\n","\n","# 2) '순전입' 컬럼 기준으로 내림차순 정렬\n","df = df.sort_values('순전입', ascending=False)\n","\n","# 3) 첫번째 행의 전체 학생 수 값 선택\n","print(int(df.iloc[0,-2]))"],"metadata":{"id":"WgzH-3yB6I26","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1713100950339,"user_tz":-540,"elapsed":426,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"36dbc6da-2150-43b9-dc56-21b9dca328e9"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["230\n"]}]}]} -------------------------------------------------------------------------------- /part4/ch6/data6-1-1.csv: -------------------------------------------------------------------------------- 1 | 소방서,출동시간,도착시간 2 | 소방서1,2023-09-16 06:12:29,2023-09-16 07:51:55 3 | 소방서5,2023-09-19 06:11:41,2023-09-19 08:08:36 4 | 소방서1,2023-09-14 06:12:00,2023-09-14 07:24:21 5 | 소방서6,2023-09-20 06:12:24,2023-09-20 06:30:45 6 | 소방서9,2023-09-15 06:11:50,2023-09-15 06:44:00 7 | 소방서1,2023-09-14 06:11:45,2023-09-14 06:24:41 8 | 소방서10,2023-09-14 06:12:30,2023-09-14 06:13:50 9 | 소방서8,2023-09-19 06:11:37,2023-09-19 06:17:42 10 | 소방서3,2023-09-18 06:11:44,2023-09-18 08:08:45 11 | 소방서5,2023-09-14 06:11:58,2023-09-14 07:14:14 12 | 소방서8,2023-09-20 06:11:45,2023-09-20 06:52:06 13 | 소방서9,2023-09-13 06:12:21,2023-09-13 07:58:59 14 | 소방서9,2023-09-11 06:11:43,2023-09-11 06:52:43 15 | 소방서7,2023-09-10 06:12:02,2023-09-10 07:08:45 16 | 소방서9,2023-09-10 06:11:44,2023-09-10 07:38:22 17 | 소방서8,2023-09-12 06:11:52,2023-09-12 07:06:15 18 | 소방서9,2023-09-20 06:11:40,2023-09-20 07:49:21 19 | 소방서3,2023-09-19 06:11:36,2023-09-19 07:15:18 20 | 소방서5,2023-09-18 06:12:13,2023-09-18 06:56:48 21 | 소방서7,2023-09-19 06:11:48,2023-09-19 07:11:12 22 | 소방서10,2023-09-15 06:12:33,2023-09-15 06:38:10 23 | 소방서2,2023-09-10 06:12:11,2023-09-10 06:15:08 24 | 소방서6,2023-09-17 06:12:09,2023-09-17 07:21:41 25 | 소방서1,2023-09-10 06:12:16,2023-09-10 07:17:04 26 | 소방서4,2023-09-12 06:12:04,2023-09-12 06:35:13 27 | 소방서9,2023-09-13 06:12:10,2023-09-13 07:49:39 28 | 소방서1,2023-09-20 06:12:26,2023-09-20 07:11:38 29 | 소방서1,2023-09-16 06:11:43,2023-09-16 06:58:26 30 | 소방서5,2023-09-14 06:11:54,2023-09-14 07:05:54 31 | 소방서2,2023-09-11 06:12:30,2023-09-11 07:18:52 32 | 소방서2,2023-09-19 06:12:07,2023-09-19 08:07:07 33 | 소방서9,2023-09-14 06:12:30,2023-09-14 07:23:14 34 | 소방서7,2023-09-20 06:12:14,2023-09-20 06:46:44 35 | 소방서1,2023-09-12 06:12:13,2023-09-12 07:06:35 36 | 소방서1,2023-09-10 06:11:54,2023-09-10 06:33:50 37 | 소방서7,2023-09-19 06:11:49,2023-09-19 08:04:32 38 | 소방서3,2023-09-15 06:11:52,2023-09-15 06:43:39 39 | 소방서2,2023-09-10 06:12:31,2023-09-10 07:51:04 40 | 소방서7,2023-09-12 06:12:15,2023-09-12 06:32:13 41 | 소방서5,2023-09-14 06:12:11,2023-09-14 06:29:45 42 | 소방서6,2023-09-12 06:12:27,2023-09-12 07:33:37 43 | 소방서10,2023-09-14 06:12:07,2023-09-14 06:20:54 44 | 소방서3,2023-09-13 06:11:52,2023-09-13 06:15:14 45 | 소방서8,2023-09-10 06:12:06,2023-09-10 06:51:33 46 | 소방서1,2023-09-18 06:12:01,2023-09-18 06:54:56 47 | 소방서9,2023-09-12 06:11:59,2023-09-12 06:30:20 48 | 소방서9,2023-09-19 06:12:00,2023-09-19 07:45:23 49 | 소방서4,2023-09-11 06:12:13,2023-09-11 06:24:30 50 | 소방서9,2023-09-17 06:12:21,2023-09-17 08:06:41 51 | 소방서5,2023-09-14 06:12:32,2023-09-14 07:17:37 52 | 소방서4,2023-09-12 06:12:22,2023-09-12 07:51:19 53 | 소방서1,2023-09-17 06:12:17,2023-09-17 07:25:31 54 | 소방서2,2023-09-11 06:12:14,2023-09-11 06:23:35 55 | 소방서9,2023-09-12 06:12:26,2023-09-12 08:12:22 56 | 소방서9,2023-09-18 06:11:44,2023-09-18 08:08:14 57 | 소방서8,2023-09-14 06:11:45,2023-09-14 06:16:01 58 | 소방서10,2023-09-13 06:12:13,2023-09-13 07:04:11 59 | 소방서4,2023-09-12 06:11:35,2023-09-12 06:29:14 60 | 소방서1,2023-09-11 06:11:56,2023-09-11 07:16:17 61 | 소방서10,2023-09-19 06:11:50,2023-09-19 06:19:13 62 | 소방서8,2023-09-15 06:11:52,2023-09-15 08:02:18 63 | 소방서6,2023-09-15 06:12:21,2023-09-15 07:51:49 64 | 소방서6,2023-09-17 06:11:42,2023-09-17 06:36:06 65 | 소방서1,2023-09-14 06:12:10,2023-09-14 07:17:26 66 | 소방서3,2023-09-12 06:12:10,2023-09-12 07:00:14 67 | 소방서5,2023-09-19 06:11:36,2023-09-19 06:46:08 68 | 소방서10,2023-09-15 06:12:31,2023-09-15 07:01:34 69 | 소방서10,2023-09-14 06:12:15,2023-09-14 06:26:44 70 | 소방서10,2023-09-15 06:12:19,2023-09-15 07:35:47 71 | 소방서5,2023-09-13 06:11:58,2023-09-13 07:26:20 72 | 소방서4,2023-09-13 06:12:02,2023-09-13 07:26:59 73 | 소방서5,2023-09-15 06:12:31,2023-09-15 06:36:38 74 | 소방서1,2023-09-14 06:11:52,2023-09-14 07:16:38 75 | 소방서3,2023-09-13 06:12:00,2023-09-13 06:18:30 76 | 소방서9,2023-09-18 06:12:14,2023-09-18 08:04:37 77 | 소방서4,2023-09-11 06:11:39,2023-09-11 06:17:08 78 | 소방서9,2023-09-11 06:11:45,2023-09-11 06:49:30 79 | 소방서10,2023-09-20 06:12:14,2023-09-20 07:46:38 80 | 소방서4,2023-09-19 06:11:39,2023-09-19 06:44:57 81 | 소방서3,2023-09-14 06:12:12,2023-09-14 07:07:14 82 | 소방서10,2023-09-20 06:12:03,2023-09-20 07:39:13 83 | 소방서4,2023-09-20 06:12:17,2023-09-20 07:20:27 84 | 소방서9,2023-09-17 06:12:15,2023-09-17 07:17:10 85 | 소방서8,2023-09-17 06:12:16,2023-09-17 07:20:21 86 | 소방서2,2023-09-13 06:11:55,2023-09-13 07:22:30 87 | 소방서5,2023-09-17 06:12:01,2023-09-17 08:11:05 88 | 소방서7,2023-09-14 06:11:43,2023-09-14 07:54:11 89 | 소방서2,2023-09-20 06:11:58,2023-09-20 07:08:46 90 | 소방서7,2023-09-12 06:12:12,2023-09-12 06:32:43 91 | 소방서3,2023-09-17 06:11:35,2023-09-17 06:39:50 92 | 소방서1,2023-09-17 06:11:36,2023-09-17 06:38:40 93 | 소방서7,2023-09-20 06:12:34,2023-09-20 07:06:15 94 | 소방서6,2023-09-12 06:12:33,2023-09-12 06:19:32 95 | 소방서4,2023-09-14 06:11:50,2023-09-14 06:50:49 96 | 소방서3,2023-09-16 06:12:27,2023-09-16 06:50:09 97 | 소방서6,2023-09-17 06:12:02,2023-09-17 06:16:49 98 | 소방서6,2023-09-18 06:12:05,2023-09-18 06:41:31 99 | 소방서4,2023-09-12 06:11:52,2023-09-12 08:10:19 100 | 소방서7,2023-09-12 06:12:22,2023-09-12 07:02:26 101 | 소방서8,2023-09-20 06:12:16,2023-09-20 06:30:46 102 | -------------------------------------------------------------------------------- /part4/ch6/data6-1-2.csv: -------------------------------------------------------------------------------- 1 | 학교명,교사수,1학년,2학년,3학년,4학년,5학년,6학년 2 | 학교1,20,118,72,154,118,77,59 3 | 학교2,31,187,193,135,137,63,90 4 | 학교3,23,136,193,135,112,91,50 5 | 학교4,32,72,146,67,51,130,164 6 | 학교5,22,61,73,86,82,55,124 7 | 학교6,32,196,172,117,170,59,128 8 | 학교7,29,183,173,102,184,194,130 9 | 학교8,19,151,181,161,187,174,183 10 | 학교9,40,157,145,180,58,96,71 11 | 학교10,34,117,93,135,136,191,150 12 | 학교11,21,166,149,196,136,54,98 13 | 학교12,37,72,143,53,140,108,153 14 | 학교13,36,178,65,123,176,109,189 15 | 학교14,34,95,89,187,163,146,168 16 | 학교15,20,52,84,166,99,55,115 17 | 학교16,30,119,161,157,50,77,66 18 | 학교17,35,140,72,71,160,52,187 19 | 학교18,31,68,189,153,57,132,117 20 | 학교19,34,58,193,135,156,139,56 21 | 학교20,39,91,157,66,86,145,110 22 | 학교21,42,70,64,182,149,187,128 23 | 학교22,23,124,149,144,82,186,136 24 | 학교23,35,80,92,200,148,160,65 25 | 학교24,42,84,166,55,139,165,165 26 | 학교25,28,159,57,90,183,133,182 27 | 학교26,35,192,85,137,178,76,189 28 | 학교27,42,144,107,195,137,73,119 29 | 학교28,35,112,102,132,117,151,61 30 | 학교29,35,70,100,179,101,54,99 31 | 학교30,27,194,109,71,131,71,137 32 | 학교31,36,187,84,186,84,173,176 33 | 학교32,42,151,57,115,197,174,137 34 | 학교33,31,107,178,82,51,176,137 35 | 학교34,41,199,81,63,142,167,142 36 | 학교35,40,157,130,141,105,164,137 37 | 학교36,25,96,147,65,156,146,178 38 | 학교37,27,94,193,146,144,67,125 39 | 학교38,22,117,178,132,64,147,63 40 | 학교39,37,149,126,77,169,196,133 41 | 학교40,26,164,118,162,194,196,138 42 | 학교41,26,166,178,197,120,136,62 43 | 학교42,24,81,55,159,177,83,172 44 | 학교43,36,60,170,188,86,133,144 45 | 학교44,26,58,168,179,123,198,53 46 | 학교45,29,148,108,66,114,122,93 47 | 학교46,32,141,158,61,53,177,92 48 | 학교47,25,61,118,184,92,185,99 49 | 학교48,28,177,169,107,122,185,73 50 | 학교49,22,163,189,192,121,99,183 51 | 학교50,21,150,154,164,78,65,195 52 | 학교51,32,161,182,141,89,176,84 53 | 학교52,25,104,111,59,112,103,68 54 | 학교53,31,56,52,155,132,190,57 55 | 학교54,20,113,161,124,97,127,78 56 | 학교55,28,133,104,181,56,142,89 57 | 학교56,33,107,156,106,180,161,150 58 | 학교57,36,98,149,69,177,60,123 59 | 학교58,23,171,77,82,81,167,140 60 | 학교59,36,54,163,183,140,123,142 61 | 학교60,25,149,62,77,180,74,133 62 | 학교61,42,71,102,148,96,157,82 63 | 학교62,34,85,51,91,72,134,58 64 | 학교63,22,87,106,167,160,101,153 65 | 학교64,20,64,88,79,168,73,84 66 | 학교65,40,60,141,162,102,111,110 67 | 학교66,32,137,86,72,52,124,133 68 | 학교67,40,152,138,108,65,64,190 69 | 학교68,21,78,113,123,110,82,192 70 | 학교69,19,189,117,157,192,144,144 71 | 학교70,39,83,156,58,124,139,130 72 | 학교71,41,50,139,179,50,125,70 73 | 학교72,31,97,133,163,76,186,100 74 | 학교73,31,68,99,102,141,159,173 75 | 학교74,28,191,189,91,149,56,116 76 | 학교75,42,57,91,99,143,97,75 77 | 학교76,23,75,119,81,68,142,64 78 | 학교77,27,142,83,114,155,73,116 79 | 학교78,35,191,80,186,80,134,123 80 | 학교79,39,128,162,110,51,73,78 81 | 학교80,38,75,162,149,141,96,83 82 | 학교81,31,50,156,103,111,51,156 83 | 학교82,23,71,94,187,118,157,93 84 | 학교83,31,128,66,56,81,195,150 85 | 학교84,27,195,63,189,98,129,129 86 | 학교85,38,190,136,55,87,128,122 87 | 학교86,25,159,104,177,61,158,66 88 | 학교87,23,93,62,190,106,66,164 89 | 학교88,37,98,152,195,161,186,143 90 | 학교89,21,105,72,162,60,186,129 91 | 학교90,41,57,103,59,89,184,75 92 | 학교91,41,62,155,170,148,135,96 93 | 학교92,39,153,159,76,72,61,105 94 | 학교93,23,84,199,127,143,180,157 95 | 학교94,41,117,122,155,97,186,185 96 | 학교95,40,190,73,50,198,86,183 97 | 학교96,38,75,166,113,191,172,57 98 | 학교97,30,125,169,145,122,187,194 99 | 학교98,37,136,100,93,60,145,63 100 | 학교99,37,74,175,145,157,95,76 101 | 학교100,40,68,109,196,148,84,179 102 | -------------------------------------------------------------------------------- /part4/ch6/data6-1-3.csv: -------------------------------------------------------------------------------- 1 | 날짜,강력범죄,절도범죄,폭력범죄,지능범죄,풍속범죄,교통범죄,경찰서명 2 | 2020년 01월,22,102,86,62,28,212,B경찰서 3 | 2020년 02월,26,138,80,61,31,183,E경찰서 4 | 2020년 03월,14,129,76,60,29,202,C경찰서 5 | 2020년 04월,26,142,83,71,33,182,B경찰서 6 | 2020년 05월,28,131,80,72,28,212,B경찰서 7 | 2020년 06월,22,104,84,72,34,201,A경찰서 8 | 2020년 07월,14,134,84,72,32,182,A경찰서 9 | 2020년 08월,27,101,71,67,31,198,B경찰서 10 | 2020년 09월,21,108,87,70,33,187,B경찰서 11 | 2020년 10월,21,114,89,61,32,212,E경찰서 12 | 2020년 11월,22,134,79,64,35,198,D경찰서 13 | 2020년 12월,13,118,82,76,37,236,C경찰서 14 | 2021년 01월,16,104,82,60,25,206,C경찰서 15 | 2021년 02월,19,145,88,60,30,204,B경찰서 16 | 2021년 03월,12,136,79,72,32,233,B경찰서 17 | 2021년 04월,26,106,87,71,28,216,C경찰서 18 | 2021년 05월,13,140,75,80,29,187,E경찰서 19 | 2021년 06월,12,131,78,65,30,201,E경찰서 20 | 2021년 07월,12,116,79,65,31,202,D경찰서 21 | 2021년 08월,25,129,77,69,33,185,A경찰서 22 | 2021년 09월,28,107,71,71,32,203,D경찰서 23 | 2021년 10월,18,130,71,77,30,206,C경찰서 24 | 2021년 11월,12,140,82,73,32,187,A경찰서 25 | 2021년 12월,27,108,90,89,20,213,E경찰서 26 | 2022년 01월,23,140,83,60,26,219,A경찰서 27 | 2022년 02월,11,111,73,60,31,211,B경찰서 28 | 2022년 03월,17,125,87,76,25,220,C경찰서 29 | 2022년 04월,10,112,78,78,28,185,E경찰서 30 | 2022년 05월,20,122,76,74,30,192,B경찰서 31 | 2022년 06월,24,139,87,77,29,192,E경찰서 32 | 2022년 07월,19,107,79,80,30,193,E경찰서 33 | 2022년 08월,27,124,81,62,29,187,C경찰서 34 | 2022년 09월,25,108,76,66,28,182,B경찰서 35 | 2022년 10월,25,134,71,75,33,189,C경찰서 36 | 2022년 11월,26,122,72,62,31,180,E경찰서 37 | 2022년 12월,22,104,87,80,31,181,C경찰서 38 | 2023년 01월,28,127,90,73,33,202,B경찰서 39 | 2023년 02월,10,102,96,73,33,166,A경찰서 40 | 2023년 03월,29,140,70,79,34,211,C경찰서 41 | 2023년 04월,17,123,100,62,32,201,B경찰서 42 | 2023년 05월,11,138,70,75,35,190,B경찰서 43 | 2023년 06월,30,128,78,74,34,216,C경찰서 44 | 2023년 07월,30,144,74,73,31,248,D경찰서 45 | 2023년 08월,26,120,86,76,33,188,C경찰서 46 | 2023년 09월,11,117,86,65,33,192,C경찰서 47 | 2023년 10월,12,117,86,67,28,200,C경찰서 48 | 2023년 11월,22,103,83,72,33,196,B경찰서 49 | 2023년 12월,28,124,83,61,27,209,A경찰서 50 | 2024년 01월,10,140,90,65,31,184,B경찰서 51 | 2024년 02월,16,137,72,80,30,180,C경찰서 52 | 2024년 03월,17,144,84,76,34,197,C경찰서 53 | 2024년 04월,14,150,85,75,31,181,C경찰서 54 | 2024년 05월,22,119,73,74,34,200,B경찰서 55 | 2024년 06월,30,122,70,70,24,161,B경찰서 56 | 2024년 07월,21,115,76,62,35,151,C경찰서 57 | 2024년 08월,17,104,98,85,24,180,E경찰서 58 | 2024년 09월,21,128,84,69,31,181,B경찰서 59 | 2024년 10월,20,105,80,77,33,188,E경찰서 60 | 2024년 11월,25,148,90,67,33,210,B경찰서 61 | 2024년 12월,21,142,77,62,35,183,E경찰서 62 | -------------------------------------------------------------------------------- /part4/ch6/data6-3-2.csv: -------------------------------------------------------------------------------- 1 | solar,wind,o3,temperature 2 | 89.14,6.28,33.52,23.0 3 | 109.97,1.04,27.01,20.7 4 | 102.83,6.42,41.0,20.5 5 | 84.94,10.2,33.44,22.2 6 | 94.21,4.95,29.97,21.4 7 | 116.51,5.07,28.97,24.7 8 | 75.73,5.36,29.57,19.9 9 | 95.71,1.28,25.42,23.5 10 | 112.66,5.85,29.52,22.6 11 | 91.33,1.79,31.39,21.7 12 | 93.21,4.14,32.9,24.1 13 | 99.05,7.49,32.9,22.9 14 | 114.91,3.53,28.63,20.2 15 | 93.61,6.0,22.92,22.8 16 | 95.56,7.03,26.65,24.2 17 | 95.66,5.56,38.06,21.5 18 | 122.06,2.26,34.48,22.1 19 | 121.87,4.34,31.85,21.0 20 | 110.04,8.92,26.19,21.5 21 | 103.86,0.95,30.02,23.7 22 | 107.37,4.45,23.72,20.3 23 | 114.91,3.9,27.24,19.5 24 | 90.64,5.24,28.77,20.8 25 | 111.76,6.5,28.19,19.1 26 | 87.46,8.22,34.78,22.3 27 | 93.62,4.46,22.91,20.8 28 | 109.07,6.62,25.67,18.3 29 | 85.71,6.0,23.13,20.6 30 | 98.6,5.95,23.81,21.2 31 | 91.38,3.87,30.62,22.6 32 | 97.44,3.01,22.0,21.6 33 | 72.01,2.8,33.77,25.2 34 | 82.28,3.49,28.77,20.5 35 | 93.0,5.64,30.34,21.5 36 | 109.27,6.52,31.61,21.6 37 | 98.26,5.65,27.83,21.1 38 | 100.03,3.9,35.16,22.2 39 | 106.88,8.61,29.03,22.6 40 | 91.2,8.04,32.97,24.0 41 | 102.84,4.29,29.0,18.8 42 | 91.95,3.35,31.45,20.8 43 | 82.72,5.26,31.4,20.5 44 | 96.09,7.53,31.25,23.4 45 | 105.74,5.67,25.13,21.1 46 | 103.39,6.11,32.18,18.0 47 | 99.88,4.58,28.41,22.0 48 | 123.92,5.91,33.15,22.2 49 | 104.13,8.09,19.24,21.8 50 | 109.79,4.52,22.67,19.9 51 | 122.38,5.29,31.82,25.7 52 | 87.06,5.51,39.31,20.9 53 | 89.61,5.57,34.18,21.2 54 | 117.44,2.18,26.59,20.6 55 | 92.02,1.25,21.54,20.1 56 | 100.3,2.96,33.71,19.6 57 | 110.69,5.34,29.6,21.2 58 | 108.91,6.11,32.95,20.0 59 | 117.55,3.94,30.58,26.4 60 | 114.96,7.75,30.15,18.0 61 | 110.69,4.71,44.79,19.9 62 | 92.27,5.04,29.97,20.8 63 | 107.95,4.61,29.2,23.3 64 | 103.14,5.27,29.39,21.0 65 | 86.74,6.41,27.08,22.7 66 | 114.17,6.33,34.95,20.0 67 | 108.07,3.2,28.23,22.2 68 | 100.45,8.05,33.18,25.0 69 | 97.67,2.81,31.42,20.1 70 | 88.02,5.16,36.09,22.4 71 | 102.0,4.45,32.1,22.3 72 | 104.68,2.9,23.93,17.5 73 | 91.69,4.85,23.37,20.4 74 | 111.62,3.52,37.04,24.5 75 | 89.03,5.15,26.96,21.8 76 | 78.77,5.81,23.4,18.8 77 | 110.4,7.94,26.65,23.1 78 | 95.97,5.61,36.32,20.8 79 | 98.74,3.78,22.9,22.5 80 | 91.62,4.22,25.67,22.4 81 | 83.94,5.28,26.67,19.2 82 | 112.55,5.19,23.74,23.5 83 | 93.11,7.92,24.08,19.3 84 | 116.61,7.79,22.41,19.2 85 | 108.07,4.28,27.69,20.2 86 | 96.85,3.9,28.23,21.1 87 | 89.14,-0.11,26.59,22.3 88 | 92.68,3.9,21.73,20.1 89 | 87.87,3.04,36.27,23.3 90 | 120.87,4.29,23.35,22.7 91 | 101.64,5.78,31.39,20.4 92 | 111.5,5.35,24.63,23.7 93 | 87.33,4.94,33.34,23.9 94 | 101.81,5.4,34.78,21.6 95 | 111.78,4.75,25.61,21.6 96 | 96.65,5.39,20.38,18.0 97 | 110.31,-1.46,33.48,21.9 98 | 89.15,4.46,39.38,21.1 99 | 86.37,4.78,32.08,22.1 100 | 103.79,4.32,30.8,21.6 101 | 96.21,4.56,34.1,22.3 102 | -------------------------------------------------------------------------------- /part4/ch6/energy_test.csv: -------------------------------------------------------------------------------- 1 | Compac,Surf_Area,Wall_Area,Roof,Height,Orient,Glaze_Area,Glaze_Distr,Cool_Load 2 | 0.64,784.0,343.0,220.5,Short,South,0.4,4,22.25 3 | 0.82,612.5,318.5,Large,Tall,North,0.4,3,32.43 4 | 0.76,661.5,416.5,Medium,Tall,South,0.1,5,33.64 5 | 0.74,686.0,245.0,220.5,Short,East,0.4,1,17.25 6 | 0.64,784.0,343.0,220.5,Short,North,0.25,5,20.13 7 | 0.69,735.0,294.0,220.5,Short,South,0.1,2,13.32 8 | 0.74,686.0,245.0,220.5,Short,East,0.1,2,13.72 9 | 0.71,710.5,269.5,220.5,Short,South,0.4,5,15.33 10 | 0.76,661.5,416.5,Medium,Tall,South,0.1,4,33.89 11 | 0.66,759.5,318.5,220.5,Short,East,0.4,4,17.82 12 | 0.79,637.0,343.0,Large,Tall,North,0.1,5,34.99 13 | 0.64,784.0,343.0,220.5,Short,North,0.4,2,21.72 14 | 0.74,686.0,245.0,220.5,Short,West,0.25,3,14.76 15 | 0.82,612.5,318.5,Large,Tall,North,0.25,1,30.17 16 | 0.76,661.5,416.5,Medium,Tall,East,0.4,4,39.67 17 | 0.64,784.0,343.0,220.5,Short,West,0.4,5,21.4 18 | 0.74,686.0,245.0,220.5,Short,East,0.25,2,15.1 19 | 0.69,735.0,294.0,220.5,Short,West,0.4,1,17.2 20 | 0.71,710.5,269.5,220.5,Short,North,0.1,4,13.67 21 | 0.62,808.5,367.5,220.5,Short,East,0.1,3,14.23 22 | 0.64,784.0,343.0,220.5,Short,South,0.0,0,16.75 23 | 0.79,637.0,343.0,Large,Tall,West,0.4,4,39.56 24 | 0.76,661.5,416.5,Medium,Tall,North,0.1,1,33.87 25 | 0.66,759.5,318.5,220.5,Short,North,0.1,4,14.86 26 | 0.82,612.5,318.5,Large,Tall,South,0.4,4,31.14 27 | 0.64,784.0,343.0,220.5,Short,West,0.25,2,20.43 28 | 0.9,563.5,318.5,Medium,Tall,South,0.1,3,30.08 29 | 0.98,514.5,294.0,Small,Tall,West,0.4,5,34.01 30 | 0.64,784.0,343.0,220.5,Short,East,0.4,5,20.21 31 | 0.66,759.5,318.5,220.5,Short,North,0.4,3,17.04 32 | 0.86,588.0,294.0,Large,Tall,East,0.25,3,32.93 33 | 0.71,710.5,269.5,220.5,Short,West,0.25,3,14.94 34 | 0.64,784.0,343.0,220.5,Short,North,0.1,5,19.24 35 | 0.82,612.5,318.5,Large,Tall,West,0.25,4,27.34 36 | 0.79,637.0,343.0,Large,Tall,South,0.4,3,38.81 37 | 0.86,588.0,294.0,Large,Tall,South,0.4,4,36.21 38 | 0.79,637.0,343.0,Large,Tall,East,0.1,5,34.18 39 | 0.79,637.0,343.0,Large,Tall,North,0.25,2,43.86 40 | 0.62,808.5,367.5,220.5,Short,West,0.4,3,16.0 41 | 0.79,637.0,343.0,Large,Tall,East,0.25,4,45.48 42 | 0.74,686.0,245.0,220.5,Short,West,0.25,2,15.44 43 | 0.86,588.0,294.0,Large,Tall,North,0.1,3,31.73 44 | 0.74,686.0,245.0,220.5,Short,South,0.0,0,10.94 45 | 0.76,661.5,416.5,Medium,Tall,South,0.25,1,37.45 46 | 0.98,514.5,294.0,Small,Tall,West,0.4,4,33.88 47 | 0.74,686.0,245.0,220.5,Short,West,0.4,3,16.6 48 | 0.71,710.5,269.5,220.5,Short,North,0.1,1,13.8 49 | 0.62,808.5,367.5,220.5,Short,West,0.25,1,15.76 50 | 0.71,710.5,269.5,220.5,Short,North,0.4,1,17.1 51 | 0.9,563.5,318.5,Medium,Tall,West,0.25,3,32.46 52 | 0.86,588.0,294.0,Large,Tall,North,0.25,5,28.02 53 | 0.98,514.5,294.0,Small,Tall,East,0.1,4,25.72 54 | 0.74,686.0,245.0,220.5,Short,North,0.1,5,13.65 55 | 0.74,686.0,245.0,220.5,Short,East,0.4,5,16.62 56 | 0.64,784.0,343.0,220.5,Short,North,0.0,0,16.78 57 | 0.86,588.0,294.0,Large,Tall,East,0.25,1,28.61 58 | 0.64,784.0,343.0,220.5,Short,East,0.0,0,16.8 59 | 0.9,563.5,318.5,Medium,Tall,East,0.1,3,32.85 60 | 0.76,661.5,416.5,Medium,Tall,North,0.4,5,39.37 61 | 0.9,563.5,318.5,Medium,Tall,North,0.0,0,28.28 62 | 0.74,686.0,245.0,220.5,Short,North,0.25,4,14.92 63 | 0.62,808.5,367.5,220.5,Short,South,0.25,4,15.07 64 | 0.71,710.5,269.5,220.5,Short,North,0.25,1,15.42 65 | 0.98,514.5,294.0,Small,Tall,South,0.25,3,30.1 66 | 0.69,735.0,294.0,220.5,Short,North,0.25,3,14.92 67 | 0.64,784.0,343.0,220.5,Short,North,0.4,4,21.68 68 | 0.9,563.5,318.5,Medium,Tall,West,0.4,1,40.99 69 | 0.66,759.5,318.5,220.5,Short,North,0.4,2,17.85 70 | 0.62,808.5,367.5,220.5,Short,South,0.1,3,14.14 71 | 0.79,637.0,343.0,Large,Tall,North,0.1,3,43.12 72 | 0.76,661.5,416.5,Medium,Tall,South,0.1,1,34.14 73 | 0.71,710.5,269.5,220.5,Short,East,0.4,2,17.37 74 | 0.64,784.0,343.0,220.5,Short,West,0.4,3,21.53 75 | 0.98,514.5,294.0,Small,Tall,West,0.1,5,26.18 76 | 0.74,686.0,245.0,220.5,Short,West,0.1,2,13.79 77 | 0.86,588.0,294.0,Large,Tall,North,0.4,4,31.53 78 | 0.98,514.5,294.0,Small,Tall,East,0.4,5,33.23 79 | 0.76,661.5,416.5,Medium,Tall,East,0.1,1,34.07 80 | 0.71,710.5,269.5,220.5,Short,South,0.1,5,14.26 81 | 0.62,808.5,367.5,220.5,Short,South,0.1,5,13.99 82 | 0.98,514.5,294.0,Small,Tall,North,0.25,4,29.61 83 | 0.62,808.5,367.5,220.5,Short,West,0.25,2,15.3 84 | 0.82,612.5,318.5,Large,Tall,East,0.1,1,24.91 85 | 0.71,710.5,269.5,220.5,Short,South,0.4,3,16.7 86 | 0.64,784.0,343.0,220.5,Short,North,0.1,2,19.23 87 | 0.66,759.5,318.5,220.5,Short,East,0.1,3,13.7 88 | 0.66,759.5,318.5,220.5,Short,South,0.4,2,18.36 89 | 0.64,784.0,343.0,220.5,Short,South,0.25,5,20.19 90 | 0.98,514.5,294.0,Small,Tall,East,0.4,2,33.13 91 | 0.69,735.0,294.0,220.5,Short,East,0.0,0,12.05 92 | 0.71,710.5,269.5,220.5,Short,North,0.25,4,14.67 93 | 0.79,637.0,343.0,Large,Tall,North,0.25,3,45.13 94 | 0.76,661.5,416.5,Medium,Tall,South,0.1,2,34.17 95 | 0.66,759.5,318.5,220.5,Short,West,0.1,4,14.83 96 | 0.9,563.5,318.5,Medium,Tall,South,0.1,2,29.36 97 | 0.9,563.5,318.5,Medium,Tall,East,0.25,2,32.64 98 | 0.98,514.5,294.0,Small,Tall,North,0.25,1,29.79 99 | 0.66,759.5,318.5,220.5,Short,North,0.25,5,15.83 100 | 0.82,612.5,318.5,Large,Tall,South,0.1,3,25.35 101 | 0.64,784.0,343.0,220.5,Short,West,0.4,2,21.93 102 | 0.98,514.5,294.0,Small,Tall,East,0.25,1,29.79 103 | 0.64,784.0,343.0,220.5,Short,West,0.25,5,20.29 104 | 0.76,661.5,416.5,Medium,Tall,North,0.0,0,29.79 105 | 0.82,612.5,318.5,Large,Tall,East,0.4,1,29.13 106 | 0.76,661.5,416.5,Medium,Tall,North,0.25,3,36.07 107 | 0.86,588.0,294.0,Large,Tall,East,0.1,4,31.76 108 | 0.82,612.5,318.5,Large,Tall,North,0.25,5,26.53 109 | 0.86,588.0,294.0,Large,Tall,West,0.4,5,35.71 110 | 0.86,588.0,294.0,Large,Tall,West,0.0,0,27.87 111 | 0.74,686.0,245.0,220.5,Short,West,0.25,5,14.03 112 | 0.71,710.5,269.5,220.5,Short,East,0.25,1,15.85 113 | 0.62,808.5,367.5,220.5,Short,North,0.1,1,14.34 114 | 0.82,612.5,318.5,Large,Tall,North,0.1,5,25.11 115 | 0.79,637.0,343.0,Large,Tall,West,0.25,1,45.52 116 | 0.66,759.5,318.5,220.5,Short,North,0.25,4,15.95 117 | 0.76,661.5,416.5,Medium,Tall,West,0.4,5,38.18 118 | 0.76,661.5,416.5,Medium,Tall,North,0.1,5,34.25 119 | 0.86,588.0,294.0,Large,Tall,South,0.25,3,28.38 120 | 0.79,637.0,343.0,Large,Tall,East,0.25,2,37.41 121 | 0.69,735.0,294.0,220.5,Short,South,0.25,5,15.22 122 | 0.71,710.5,269.5,220.5,Short,West,0.1,4,14.2 123 | 0.74,686.0,245.0,220.5,Short,South,0.4,3,16.57 124 | 0.74,686.0,245.0,220.5,Short,North,0.1,3,13.6 125 | 0.74,686.0,245.0,220.5,Short,East,0.25,5,14.58 126 | 0.9,563.5,318.5,Medium,Tall,West,0.0,0,29.6 127 | 0.62,808.5,367.5,220.5,Short,South,0.4,1,17.15 128 | 0.62,808.5,367.5,220.5,Short,East,0.25,5,14.61 129 | 0.86,588.0,294.0,Large,Tall,East,0.0,0,21.97 130 | 0.62,808.5,367.5,220.5,Short,North,0.4,2,17.36 131 | 0.86,588.0,294.0,Large,Tall,East,0.4,5,31.2 132 | 0.76,661.5,416.5,Medium,Tall,North,0.4,4,40.36 133 | 0.71,710.5,269.5,220.5,Short,East,0.1,5,13.75 134 | 0.74,686.0,245.0,220.5,Short,South,0.1,1,13.48 135 | 0.9,563.5,318.5,Medium,Tall,West,0.1,1,34.33 136 | 0.64,784.0,343.0,220.5,Short,South,0.25,3,20.46 137 | 0.79,637.0,343.0,Large,Tall,North,0.25,1,43.8 138 | 0.79,637.0,343.0,Large,Tall,North,0.25,4,36.26 139 | 0.74,686.0,245.0,220.5,Short,West,0.1,5,13.5 140 | 0.62,808.5,367.5,220.5,Short,East,0.25,3,14.96 141 | 0.74,686.0,245.0,220.5,Short,East,0.25,4,15.24 142 | 0.69,735.0,294.0,220.5,Short,East,0.25,2,15.85 143 | 0.76,661.5,416.5,Medium,Tall,West,0.25,2,36.81 144 | 0.82,612.5,318.5,Large,Tall,East,0.0,0,21.46 145 | 0.62,808.5,367.5,220.5,Short,East,0.1,2,14.57 146 | 0.71,710.5,269.5,220.5,Short,East,0.4,3,15.47 147 | 0.64,784.0,343.0,220.5,Short,East,0.1,4,19.25 148 | 0.9,563.5,318.5,Medium,Tall,South,0.4,4,39.22 149 | 0.74,686.0,245.0,220.5,Short,North,0.4,5,16.69 150 | 0.69,735.0,294.0,220.5,Short,North,0.25,5,15.14 151 | 0.9,563.5,318.5,Medium,Tall,North,0.1,3,34.14 152 | 0.69,735.0,294.0,220.5,Short,East,0.1,2,13.54 153 | 0.69,735.0,294.0,220.5,Short,North,0.4,3,16.35 154 | 0.79,637.0,343.0,Large,Tall,East,0.4,4,47.59 155 | 0.76,661.5,416.5,Medium,Tall,East,0.1,5,34.35 156 | 0.71,710.5,269.5,220.5,Short,West,0.1,2,14.21 157 | 0.62,808.5,367.5,220.5,Short,North,0.4,4,17.1 158 | 0.9,563.5,318.5,Medium,Tall,South,0.1,4,32.83 159 | 0.98,514.5,294.0,Small,Tall,South,0.1,3,25.84 160 | 0.71,710.5,269.5,220.5,Short,West,0.4,4,17.74 161 | 0.64,784.0,343.0,220.5,Short,West,0.4,1,22.53 162 | 0.66,759.5,318.5,220.5,Short,West,0.25,4,16.14 163 | 0.64,784.0,343.0,220.5,Short,North,0.4,3,20.78 164 | 0.74,686.0,245.0,220.5,Short,West,0.4,1,17.25 165 | 0.71,710.5,269.5,220.5,Short,West,0.4,5,15.31 166 | 0.82,612.5,318.5,Large,Tall,West,0.25,1,31.39 167 | 0.98,514.5,294.0,Small,Tall,East,0.1,1,26.37 168 | 0.74,686.0,245.0,220.5,Short,South,0.4,4,16.9 169 | 0.9,563.5,318.5,Medium,Tall,North,0.25,2,35.56 170 | 0.74,686.0,245.0,220.5,Short,West,0.1,1,13.7 171 | 0.9,563.5,318.5,Medium,Tall,North,0.25,4,32.12 172 | 0.9,563.5,318.5,Medium,Tall,North,0.1,4,29.34 173 | 0.62,808.5,367.5,220.5,Short,West,0.25,5,14.75 174 | 0.79,637.0,343.0,Large,Tall,East,0.1,3,41.22 175 | 0.64,784.0,343.0,220.5,Short,South,0.25,4,20.37 176 | 0.64,784.0,343.0,220.5,Short,West,0.25,1,21.08 177 | 0.76,661.5,416.5,Medium,Tall,North,0.1,2,33.91 178 | 0.76,661.5,416.5,Medium,Tall,East,0.1,2,34.07 179 | 0.82,612.5,318.5,Large,Tall,West,0.1,5,27.69 180 | 0.62,808.5,367.5,220.5,Short,East,0.4,2,17.38 181 | 0.98,514.5,294.0,Small,Tall,North,0.1,5,25.64 182 | 0.82,612.5,318.5,Large,Tall,East,0.4,2,27.93 183 | 0.82,612.5,318.5,Large,Tall,North,0.1,1,27.31 184 | 0.74,686.0,245.0,220.5,Short,East,0.1,1,13.71 185 | 0.62,808.5,367.5,220.5,Short,North,0.25,3,14.89 186 | 0.82,612.5,318.5,Large,Tall,East,0.1,2,25.02 187 | 0.66,759.5,318.5,220.5,Short,North,0.1,2,14.86 188 | 0.62,808.5,367.5,220.5,Short,South,0.4,3,16.56 189 | 0.64,784.0,343.0,220.5,Short,North,0.4,5,20.82 190 | 0.69,735.0,294.0,220.5,Short,East,0.25,1,15.63 191 | 0.66,759.5,318.5,220.5,Short,South,0.1,2,15.0 192 | 0.98,514.5,294.0,Small,Tall,North,0.4,1,33.37 193 | 0.64,784.0,343.0,220.5,Short,South,0.4,1,22.72 194 | 0.66,759.5,318.5,220.5,Short,North,0.1,5,14.54 195 | 0.62,808.5,367.5,220.5,Short,South,0.1,4,14.28 196 | 0.62,808.5,367.5,220.5,Short,East,0.25,2,15.64 197 | 0.79,637.0,343.0,Large,Tall,South,0.4,1,38.35 198 | 0.74,686.0,245.0,220.5,Short,South,0.1,3,13.65 199 | 0.76,661.5,416.5,Medium,Tall,South,0.4,2,40.47 200 | 0.79,637.0,343.0,Large,Tall,East,0.4,2,39.41 201 | 0.66,759.5,318.5,220.5,Short,East,0.4,2,17.89 202 | 0.82,612.5,318.5,Large,Tall,North,0.0,0,23.77 203 | 0.76,661.5,416.5,Medium,Tall,North,0.4,2,39.48 204 | 0.74,686.0,245.0,220.5,Short,North,0.0,0,10.9 205 | 0.98,514.5,294.0,Small,Tall,North,0.4,5,32.88 206 | 0.69,735.0,294.0,220.5,Short,North,0.1,4,14.29 207 | 0.69,735.0,294.0,220.5,Short,North,0.4,2,16.44 208 | 0.86,588.0,294.0,Large,Tall,North,0.0,0,27.3 209 | 0.69,735.0,294.0,220.5,Short,East,0.4,4,16.88 210 | 0.98,514.5,294.0,Small,Tall,West,0.1,4,25.87 211 | 0.62,808.5,367.5,220.5,Short,East,0.1,1,14.5 212 | 0.82,612.5,318.5,Large,Tall,North,0.1,3,28.68 213 | 0.69,735.0,294.0,220.5,Short,West,0.4,2,17.22 214 | 0.71,710.5,269.5,220.5,Short,East,0.4,4,17.2 215 | 0.62,808.5,367.5,220.5,Short,North,0.1,4,14.37 216 | 0.66,759.5,318.5,220.5,Short,North,0.25,1,16.39 217 | 0.82,612.5,318.5,Large,Tall,North,0.1,4,24.61 218 | 0.66,759.5,318.5,220.5,Short,South,0.0,0,12.4 219 | 0.74,686.0,245.0,220.5,Short,East,0.1,4,13.36 220 | 0.64,784.0,343.0,220.5,Short,South,0.25,2,20.48 221 | 0.86,588.0,294.0,Large,Tall,South,0.4,1,31.7 222 | 0.79,637.0,343.0,Large,Tall,West,0.0,0,39.44 223 | 0.64,784.0,343.0,220.5,Short,East,0.25,2,20.56 224 | 0.64,784.0,343.0,220.5,Short,East,0.25,3,20.03 225 | 0.74,686.0,245.0,220.5,Short,East,0.0,0,11.19 226 | 0.9,563.5,318.5,Medium,Tall,East,0.25,1,33.17 227 | 0.98,514.5,294.0,Small,Tall,West,0.25,4,30.12 228 | 0.66,759.5,318.5,220.5,Short,East,0.25,5,16.03 229 | 0.82,612.5,318.5,Large,Tall,South,0.1,1,24.61 230 | 0.74,686.0,245.0,220.5,Short,South,0.25,4,15.03 231 | 0.64,784.0,343.0,220.5,Short,North,0.1,3,19.14 232 | 0.79,637.0,343.0,Large,Tall,North,0.1,4,34.25 233 | -------------------------------------------------------------------------------- /part4/ch6/p6_type1.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyPPdI+WdJzLJVLF3SZNKApC"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch6/p6_type1.ipynb)"],"metadata":{"id":"MY9rC_Qhnqkc"}},{"cell_type":"markdown","source":["## 작업형1"],"metadata":{"id":"uO07g2QeKprH"}},{"cell_type":"markdown","source":["### 문제1"],"metadata":{"id":"6f9i0g9dKvQQ"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"ShDssNLn6Dw8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1713101751899,"user_tz":-540,"elapsed":1575,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"0621c31e-a180-4caa-e978-d9acb4f4358c"},"outputs":[{"output_type":"stream","name":"stdout","text":["81\n"]}],"source":["import pandas as pd\n","# df = pd.read_csv(\"data6-1-1.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch6/data6-1-1.csv\")\n","\n","# 1) datetime형태로 변경\n","df['도착시간'] = pd.to_datetime(df['도착시간'])\n","df['출동시간'] = pd.to_datetime(df['출동시간'])\n","\n","# 2) 출동시간과 도착시간의 차이를 분으로 계산\n","df['시간차이(분)'] = (df['도착시간'] - df['출동시간']).dt.total_seconds() / 60\n","\n","# 3) 소방서별 평균 시간차이 계산\n","avg_diff = df.groupby('소방서')['시간차이(분)'].mean()\n","\n","# 4) 평균 차이가 가장 큰 소방서의 시간을 찾고 출력\n","idx = avg_diff.idxmax()\n","result = avg_diff[idx]\n","print(round(result))"]},{"cell_type":"code","source":["# 참고 (시간 반올림)\n","min = 5.5\n","print(int(min),\"분\")\n","print((min-int(min))*60,\"초\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"J4T5TS-KaMOa","executionInfo":{"status":"ok","timestamp":1692598354240,"user_tz":-540,"elapsed":279,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"1ecb0908-06c2-43a2-f435-f06acacdf287"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["5 분\n","30.0 초\n"]}]},{"cell_type":"markdown","source":["### 문제2"],"metadata":{"id":"ccvjEw-aKy39"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"data6-1-2.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch6/data6-1-2.csv\")\n","\n","# 1) 총 학생 수 계산\n","df['총학생수'] = df.iloc[:, 2:].sum(axis=1)\n","\n","# 2) 교사 한 명당 맡은 학생 수\n","df['학생/교사'] = df['총학생수'] / df['교사수']\n","\n","# 3) 학생/교사 최대값의 인덱스명\n","idx = df['학생/교사'].idxmax()\n","\n","# 4) 학생/교사 최대값의 학교 교사 수 출력\n","print(df.loc[idx, '교사수'])"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-3VwHCDKKjRl","executionInfo":{"status":"ok","timestamp":1713101761101,"user_tz":-540,"elapsed":534,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"937a7711-535c-4609-bc82-1db54a46f8cb"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["19\n"]}]},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"data6-1-2.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch6/data6-1-2.csv\")\n","\n","# 1) 교사 한 명당 맡은 학생 수\n","df['학생/교사'] = (df['1학년'] + df['2학년'] + df['3학년'] + df['4학년'] + df['5학년'] + df['6학년']) / df['교사수']\n","\n","# 2) 학생/교사 컬럼을 내림차순으로 정렬\n","df = df.sort_values('학생/교사', ascending=False)\n","\n","# 3) 최상단 행의 교사수 값 출력\n","print(df.iloc[0, 1])"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"qk3l6Y6WjwPU","executionInfo":{"status":"ok","timestamp":1713101789183,"user_tz":-540,"elapsed":6,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"cfb10b97-90c3-41df-9f93-d148337ea056"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["19\n"]}]},{"cell_type":"markdown","source":["### 문제3"],"metadata":{"id":"U2rmG2jaK_v3"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"data6-1-3.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch6/data6-1-3.csv\")\n","\n","# 1) 총 범죄 건수 계산\n","df['총 범죄 건수'] = df.iloc[:, 1:7].sum(axis=1)\n","\n","# 2) 연도 슬라이싱\n","df['연도'] = df[\"날짜\"].str[:4]\n","\n","# 3) 연도별 총 범죄 건수 합 계산\n","result = df['총 범죄 건수'].groupby(df[\"연도\"]).sum()\n","\n","# 4) 가장 큰 값의 월 평균 계산\n","print(round(result.max()/12))"],"metadata":{"id":"WgzH-3yB6I26","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1719908294893,"user_tz":-540,"elapsed":311,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"aeee2772-0a12-4d56-d8e9-10987054ba81"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["533\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"61CtkR_Dg1t9"},"execution_count":null,"outputs":[]}]} -------------------------------------------------------------------------------- /part4/ch6/p6_type3.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyPkyAXeTKMY80JLb1CoEssb"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch6/p6_type3.ipynb)"],"metadata":{"id":"6OZW46EDn6tl"}},{"cell_type":"markdown","source":["## 작업형3"],"metadata":{"id":"aH-zAFBloETg"}},{"cell_type":"markdown","source":["### 문제1"],"metadata":{"id":"xcmkuNKkYZyN"}},{"cell_type":"code","source":["import pandas as pd\n","df = pd.DataFrame({\n"," \"항암약\":[4,4,3,4,1,4,1,4,1,4,4,2,1,4,2,3,2,4,4,4]\n"," })\n","print(df.head(3))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"2aa_3YEGOnoq","executionInfo":{"status":"ok","timestamp":1722874971322,"user_tz":-540,"elapsed":339,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"2c4f0767-55f3-4080-ebb6-be5109d52479"},"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":[" 항암약\n","0 4\n","1 4\n","2 3\n"]}]},{"cell_type":"code","source":["# 1) 이상 없음(4)의 빈도 계산\n","cnt = sum(df['항암약']==4)\n","\n","# 2) 항암약을 투여 받은 환자 중 '이상 없음' 비율 계산\n","ratio = cnt / len(df)\n","print(ratio)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"oxcQ8AZndVzC","executionInfo":{"status":"ok","timestamp":1722874971322,"user_tz":-540,"elapsed":3,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"bcc76e1f-74e2-4fbd-f1de-3a3c80a6e9cf"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["0.55\n"]}]},{"cell_type":"code","source":["# 1) 카테고리별 비율 계산\n","print(df['항암약'].value_counts(normalize=True))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"i_iSXeDgkYy7","executionInfo":{"status":"ok","timestamp":1722874971322,"user_tz":-540,"elapsed":2,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"ea7cccec-c4df-448a-8575-e7b5473a25ab"},"execution_count":3,"outputs":[{"output_type":"stream","name":"stdout","text":["항암약\n","4 0.55\n","1 0.20\n","2 0.15\n","3 0.10\n","Name: proportion, dtype: float64\n"]}]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cpGaGRyGAZAF","executionInfo":{"status":"ok","timestamp":1722874973077,"user_tz":-540,"elapsed":1757,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"1d6cdeb8-fd90-4a5f-d5e9-3144d8d29856"},"outputs":[{"output_type":"stream","name":"stdout","text":["데이터 수: 20\n","[2.0, 1.0, 3.0, 14.0]\n","[2.0, 1.0, 3.0, 14.0]\n"]}],"source":["from scipy.stats import chisquare\n","\n","# 1) 각 카테고리의 비율을 리스트로 만들기\n","prob = [0.1, 0.05, 0.15, 0.7]\n","\n","# 2-1) 기대 빈도수 계산\n","print(\"데이터 수: \", len(df))\n","expected_counts = [0.1*20, 0.05*20, 0.15*20, 0.7*20]\n","print(expected_counts)\n","\n","# 2-2)기대 빈도수 계산 (다른 방법)\n","expected_counts = [x*len(df) for x in prob]\n","print(expected_counts)"]},{"cell_type":"code","source":["# 3) 관찰 빈도수 계산\n","observed_counts = df['항암약'].value_counts().sort_index().to_list()\n","print(observed_counts)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"K0KzbM8ijM4a","executionInfo":{"status":"ok","timestamp":1722874973077,"user_tz":-540,"elapsed":7,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"de2bc798-af8b-4f06-94da-211a9e2e9c6a"},"execution_count":5,"outputs":[{"output_type":"stream","name":"stdout","text":["[4, 3, 2, 11]\n"]}]},{"cell_type":"code","source":["# 4) 카이제곱 검정 수행\n","print(chisquare(f_obs=observed_counts, f_exp=expected_counts))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"naDXhxrCjPdL","executionInfo":{"status":"ok","timestamp":1722874973077,"user_tz":-540,"elapsed":5,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"a982c52f-a3d7-49d3-eebe-e5885f42fd60"},"execution_count":6,"outputs":[{"output_type":"stream","name":"stdout","text":["Power_divergenceResult(statistic=6.976190476190476, pvalue=0.07266054733847573)\n"]}]},{"cell_type":"code","source":["# 참고 - f_obs, f_exp 생략 가능\n","print(chisquare(observed_counts, expected_counts))"],"metadata":{"id":"NmCzsf-UfZz4","executionInfo":{"status":"ok","timestamp":1722874973077,"user_tz":-540,"elapsed":4,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"e473187a-50c8-42a7-d3d7-ce6fc35d60b0","colab":{"base_uri":"https://localhost:8080/"}},"execution_count":7,"outputs":[{"output_type":"stream","name":"stdout","text":["Power_divergenceResult(statistic=6.976190476190476, pvalue=0.07266054733847573)\n"]}]},{"cell_type":"markdown","source":["### 문제2"],"metadata":{"id":"t6IB5WUEq-SI"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"data6-3-2.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch6/data6-3-2.csv\")\n","\n","print(df.head(3))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Hy0NI7InTB5F","executionInfo":{"status":"ok","timestamp":1722874973585,"user_tz":-540,"elapsed":511,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"473d57d8-9067-4942-ffd8-aed054c4d8da"},"execution_count":8,"outputs":[{"output_type":"stream","name":"stdout","text":[" solar wind o3 temperature\n","0 89.14 6.28 33.52 23.0\n","1 109.97 1.04 27.01 20.7\n","2 102.83 6.42 41.00 20.5\n"]}]},{"cell_type":"code","source":["from statsmodels.formula.api import ols\n","\n","# 1) R스타일 formula\n","formula = 'temperature ~ solar + wind + o3'\n","\n","# 2) 회귀 모델 학습\n","model= ols(formula, data=df).fit()\n","\n","# 3) 회귀 모델 요약 정보\n","summary = model.summary()\n","print(summary)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"2bjnOPN6gCLw","executionInfo":{"status":"ok","timestamp":1722874974443,"user_tz":-540,"elapsed":860,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"d08030f1-9f49-44bd-969a-2df9134841f8"},"execution_count":9,"outputs":[{"output_type":"stream","name":"stdout","text":[" OLS Regression Results \n","==============================================================================\n","Dep. Variable: temperature R-squared: 0.044\n","Model: OLS Adj. R-squared: 0.014\n","Method: Least Squares F-statistic: 1.464\n","Date: Mon, 05 Aug 2024 Prob (F-statistic): 0.229\n","Time: 16:22:54 Log-Likelihood: -195.45\n","No. Observations: 100 AIC: 398.9\n","Df Residuals: 96 BIC: 409.3\n","Df Model: 3 \n","Covariance Type: nonrobust \n","==============================================================================\n"," coef std err t P>|t| [0.025 0.975]\n","------------------------------------------------------------------------------\n","Intercept 19.0507 1.994 9.555 0.000 15.093 23.008\n","solar 0.0039 0.015 0.251 0.802 -0.027 0.035\n","wind -0.0252 0.090 -0.280 0.780 -0.204 0.153\n","o3 0.0749 0.036 2.079 0.040 0.003 0.146\n","==============================================================================\n","Omnibus: 0.654 Durbin-Watson: 2.328\n","Prob(Omnibus): 0.721 Jarque-Bera (JB): 0.672\n","Skew: 0.187 Prob(JB): 0.715\n","Kurtosis: 2.855 Cond. No. 1.20e+03\n","==============================================================================\n","\n","Notes:\n","[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n","[2] The condition number is large, 1.2e+03. This might indicate that there are\n","strong multicollinearity or other numerical problems.\n"]}]},{"cell_type":"code","source":["# 2-1. o3에 대한 회귀계수\n","print(\"2-1. o3의 회귀계수:\", model.params['o3'])"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"xELLjouUFXoq","executionInfo":{"status":"ok","timestamp":1722874974443,"user_tz":-540,"elapsed":5,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"09b7896f-9d5a-4dfd-e198-76dcdfee296d"},"execution_count":10,"outputs":[{"output_type":"stream","name":"stdout","text":["2-1. o3의 회귀계수: 0.0749385437813658\n"]}]},{"cell_type":"code","source":["# 2-2. wind에 대한 p-value\n","print(\"2-2. wind의 p-value:\", model.pvalues['wind'])"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"9M316CMLTsjQ","executionInfo":{"status":"ok","timestamp":1722874974907,"user_tz":-540,"elapsed":466,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"7ca51dca-c1a0-447e-84b2-10aa6a4e7120"},"execution_count":11,"outputs":[{"output_type":"stream","name":"stdout","text":["2-2. wind의 p-value: 0.7797177202071661\n"]}]},{"cell_type":"code","source":["# 2-3. 예측값\n","# 1) 새 데이터를 데이터프레임으로 만들기\n","new_data = pd.DataFrame({\n"," 'solar': [100],\n"," 'wind': [5],\n"," 'o3': [30]\n","})\n","print(new_data)"],"metadata":{"id":"RqikMfLeTvYZ","executionInfo":{"status":"ok","timestamp":1722874974907,"user_tz":-540,"elapsed":5,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"colab":{"base_uri":"https://localhost:8080/"},"outputId":"6c788931-5572-4607-c7d0-0dda481bb748"},"execution_count":12,"outputs":[{"output_type":"stream","name":"stdout","text":[" solar wind o3\n","0 100 5 30\n"]}]},{"cell_type":"code","source":["# 2) 구축된 모델을 사용해 예측\n","pred = model.predict(new_data)\n","print(pred)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"jGr4psT2ESrc","executionInfo":{"status":"ok","timestamp":1722874974907,"user_tz":-540,"elapsed":4,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"2be9cf97-3a21-4fc4-a75b-28e4e038caf4"},"execution_count":13,"outputs":[{"output_type":"stream","name":"stdout","text":["0 21.56163\n","dtype: float64\n"]}]}]} -------------------------------------------------------------------------------- /part4/ch7/p7_type1.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyMewqKFn73wqBc13lAIZSRT"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch7/p7_type1.ipynb)"],"metadata":{"id":"zNG24pNSof-O"}},{"cell_type":"markdown","source":["## 작업형1"],"metadata":{"id":"ImS-bayEojdA"}},{"cell_type":"markdown","source":["### 문제1"],"metadata":{"id":"JO-2zLFZsse1"}},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"QfWBB-yjCuX8","executionInfo":{"status":"ok","timestamp":1713104075940,"user_tz":-540,"elapsed":308,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"fcef071e-5a6b-4ff3-d92c-00c98c0f4fa1"},"outputs":[{"output_type":"stream","name":"stdout","text":["2.183\n"]}],"source":["import pandas as pd\n","# df = pd.read_csv(\"student_assessment.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch7/student_assessment.csv\")\n","\n","# 1) 결측치 제거\n","# print(df.shape)\n","df = df.dropna()\n","# print(df.shape)\n","\n","# 2) 가장 많이 수강한 과목 필터링\n","id = df['id_assessment'].value_counts().idxmax()\n","cond = df['id_assessment'] == id\n","df = df[cond]\n","\n","# 3) 과목 점수 스탠다드 스케일\n","from sklearn.preprocessing import StandardScaler\n","scaler = StandardScaler()\n","df['score'] = scaler.fit_transform(df[['score']])\n","\n","# 4) 가장 큰 값\n","print(round(df['score'].max(), 3))"]},{"cell_type":"markdown","source":["### 문제2"],"metadata":{"id":"sHC4blpRswAJ"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"stock_market.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch7/stock_market.csv\")\n","\n","# 1) close와의 상관관계(절대값)\n","df_corr = df.corr()['close'].abs()\n","\n","# 2) 상관관계가 높은 변수명\n","col = df_corr.loc['DE1':'DE77'].idxmax()\n","\n","# 3) '2)'에서 구한 변수명의 평균값\n","print(round(df[col].mean(), 4))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"usqyPcI4J0eL","executionInfo":{"status":"ok","timestamp":1713104076541,"user_tz":-540,"elapsed":316,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"f64aefee-35d8-40e0-8dc4-3b3e144cc73a"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["-0.0004\n"]}]},{"cell_type":"markdown","source":["### 문제3"],"metadata":{"id":"E9QDz8l-sy4D"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"air_quality.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch7/air_quality.csv\")\n","\n","# 1) IQR 계산\n","Q1 = df['CO2'].quantile(0.25)\n","Q3 = df['CO2'].quantile(0.75)\n","IQR = Q3 - Q1\n","\n","# 2) 상한 및 하한 계산\n","upper = Q3 + 1.5 * IQR\n","lower = Q1 - 1.5 * IQR\n","\n","# 3) 이상치 식별\n","outliers = df[(df['CO2'] < lower) | (df['CO2'] > upper)]\n","\n","# 4) 이상치 수\n","print(len(outliers))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"LipHWwikUBL-","executionInfo":{"status":"ok","timestamp":1713104076542,"user_tz":-540,"elapsed":3,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"7887d4ce-375f-46de-8a51-978d5ff6af97"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["304\n"]}]}]} -------------------------------------------------------------------------------- /part4/ch7/p7_type3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch7/p7_type3.ipynb)" 21 | ], 22 | "metadata": { 23 | "id": "znZZZKmko0kX" 24 | } 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "source": [ 29 | "## 작업형3" 30 | ], 31 | "metadata": { 32 | "id": "H3N9Ou_Oo2mV" 33 | } 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "source": [ 38 | "### 문제1-1" 39 | ], 40 | "metadata": { 41 | "id": "paHiKoCrtKnv" 42 | } 43 | }, 44 | { 45 | "cell_type": "code", 46 | "source": [ 47 | "import pandas as pd\n", 48 | "# df = pd.read_csv(\"clam.csv\")\n", 49 | "df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch7/clam.csv\")\n", 50 | "\n", 51 | "# 데이터셋 분할\n", 52 | "print(df.shape)\n", 53 | "train = df.iloc[:210]\n", 54 | "test = df.iloc[210:]\n", 55 | "print(train.shape)\n", 56 | "\n", 57 | "print(train.head())" 58 | ], 59 | "metadata": { 60 | "colab": { 61 | "base_uri": "https://localhost:8080/" 62 | }, 63 | "id": "H38dKQCTrB_5", 64 | "outputId": "733154f4-f39c-4261-d606-77a8023ae92d" 65 | }, 66 | "execution_count": 6, 67 | "outputs": [ 68 | { 69 | "output_type": "stream", 70 | "name": "stdout", 71 | "text": [ 72 | "(300, 6)\n", 73 | "(210, 6)\n", 74 | " age length diameter height weight gender\n", 75 | "0 6 0.474627 0.211352 0.178189 78.971766 1\n", 76 | "1 1 0.465847 0.339388 0.170522 98.781960 1\n", 77 | "2 4 0.122807 0.238691 0.106924 88.792625 0\n", 78 | "3 4 0.204579 0.360543 0.034261 1.028847 0\n", 79 | "4 8 0.243458 0.358037 0.128080 6.503367 0\n" 80 | ] 81 | } 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "source": [ 87 | "from statsmodels.formula.api import logit\n", 88 | "import numpy as np\n", 89 | "\n", 90 | "# 1) 로지스틱 회귀 모델 생성 및 학습\n", 91 | "model = logit(\"gender ~ weight\", data=train).fit()\n", 92 | "\n", 93 | "# 2) 오즈비 계산\n", 94 | "odds_ratio = np.exp(model.params['weight'])\n", 95 | "print(round(odds_ratio, 4))" 96 | ], 97 | "metadata": { 98 | "colab": { 99 | "base_uri": "https://localhost:8080/" 100 | }, 101 | "id": "m9Y_2uBYFnot", 102 | "outputId": "13a98535-a066-4be7-a676-4fc793b836ba" 103 | }, 104 | "execution_count": 7, 105 | "outputs": [ 106 | { 107 | "output_type": "stream", 108 | "name": "stdout", 109 | "text": [ 110 | "Optimization terminated successfully.\n", 111 | " Current function value: 0.690045\n", 112 | " Iterations 4\n", 113 | "1.0047\n" 114 | ] 115 | } 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "source": [ 121 | "### 문제1-2" 122 | ], 123 | "metadata": { 124 | "id": "yeKVhetBtS8N" 125 | } 126 | }, 127 | { 128 | "cell_type": "code", 129 | "source": [ 130 | "# 1) 로지스틱 회귀 모델 생성 및 학습\n", 131 | "formula = \"gender ~ age + length + diameter + height + weight\"\n", 132 | "model = logit(formula, data=train).fit()\n", 133 | "\n", 134 | "# 2) 잔차이탈도\n", 135 | "print(round(-2 * model.llf,2))" 136 | ], 137 | "metadata": { 138 | "colab": { 139 | "base_uri": "https://localhost:8080/" 140 | }, 141 | "id": "4rCVKX-t_L61", 142 | "outputId": "2b6b377e-7d1a-4952-8021-75e4c1c12fea" 143 | }, 144 | "execution_count": 8, 145 | "outputs": [ 146 | { 147 | "output_type": "stream", 148 | "name": "stdout", 149 | "text": [ 150 | "Optimization terminated successfully.\n", 151 | " Current function value: 0.683173\n", 152 | " Iterations 4\n", 153 | "286.93\n" 154 | ] 155 | } 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "source": [ 161 | "### 문제1-3" 162 | ], 163 | "metadata": { 164 | "id": "x4u7dfFTtUs9" 165 | } 166 | }, 167 | { 168 | "cell_type": "code", 169 | "source": [ 170 | "from sklearn.metrics import accuracy_score\n", 171 | "\n", 172 | "# 1) test데이터를 사용해 예측 (0.5 미만: 0, 0.5 이상 1)\n", 173 | "model = logit(\"gender ~ weight\", data=train).fit()\n", 174 | "target = test.pop('gender')\n", 175 | "pred = model.predict(test)\n", 176 | "pred = (pred > 0.5).astype(int)\n", 177 | "\n", 178 | "# 2) 실제 값과 예측값을 사용하여 정확도 계산\n", 179 | "accuracy = accuracy_score(target, pred)\n", 180 | "\n", 181 | "# 3) 오류율 계산\n", 182 | "error_rate = 1 - accuracy\n", 183 | "print(round(error_rate, 3))\n", 184 | "# 0.478" 185 | ], 186 | "metadata": { 187 | "colab": { 188 | "base_uri": "https://localhost:8080/" 189 | }, 190 | "id": "7x8XLDu4HksT", 191 | "outputId": "5a5e5d32-f0ea-4e62-9cbc-7765ae478b6e" 192 | }, 193 | "execution_count": 9, 194 | "outputs": [ 195 | { 196 | "output_type": "stream", 197 | "name": "stdout", 198 | "text": [ 199 | "Optimization terminated successfully.\n", 200 | " Current function value: 0.690045\n", 201 | " Iterations 4\n", 202 | "0.478\n" 203 | ] 204 | } 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "source": [ 210 | "### 문제2-1" 211 | ], 212 | "metadata": { 213 | "id": "4MSQ0eB1tWsh" 214 | } 215 | }, 216 | { 217 | "cell_type": "code", 218 | "source": [ 219 | "import pandas as pd\n", 220 | "\n", 221 | "# df = pd.read_csv(\"system_cpu.csv\")\n", 222 | "df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch7/system_cpu.csv\")\n", 223 | "\n", 224 | "print(df.head())" 225 | ], 226 | "metadata": { 227 | "colab": { 228 | "base_uri": "https://localhost:8080/" 229 | }, 230 | "id": "OlvzgzRDcNt8", 231 | "outputId": "424000fc-c180-4157-8975-81a9f87ce6cd" 232 | }, 233 | "execution_count": null, 234 | "outputs": [ 235 | { 236 | "output_type": "stream", 237 | "name": "stdout", 238 | "text": [ 239 | " ERP Feature1 Feature2 Feature3 CPU\n", 240 | "0 30.6 235.1 44.5 44.0 112.3\n", 241 | "1 40.3 36.6 46.4 36.1 58.6\n", 242 | "2 57.7 52.2 66.5 2.0 55.3\n", 243 | "3 128.3 196.2 59.8 57.4 103.2\n", 244 | "4 80.3 75.2 59.6 58.2 104.1\n" 245 | ] 246 | } 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "source": [ 252 | "import pandas as pd\n", 253 | "# df = pd.read_csv(\"system_cpu.csv\")\n", 254 | "df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch7/system_cpu.csv\")\n", 255 | "\n", 256 | "# 1) ERP와 각 변수 사이의 상관계수 계산\n", 257 | "corr_matrix = df.corr()\n", 258 | "\n", 259 | "# 2) ERP와 다른 변수들과의 상관계수 출력\n", 260 | "erp_corr = corr_matrix['ERP'].sort_values(ascending=False)\n", 261 | "print(erp_corr)" 262 | ], 263 | "metadata": { 264 | "colab": { 265 | "base_uri": "https://localhost:8080/" 266 | }, 267 | "id": "6POPUMi3Axb6", 268 | "outputId": "f33741aa-f50a-4364-80a6-e544491fc239" 269 | }, 270 | "execution_count": null, 271 | "outputs": [ 272 | { 273 | "output_type": "stream", 274 | "name": "stdout", 275 | "text": [ 276 | "ERP 1.000000\n", 277 | "Feature3 0.882194\n", 278 | "CPU 0.092455\n", 279 | "Feature2 0.092432\n", 280 | "Feature1 -0.053848\n", 281 | "Name: ERP, dtype: float64\n" 282 | ] 283 | } 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "source": [ 289 | "### 문제2-2, 2-3" 290 | ], 291 | "metadata": { 292 | "id": "EVAJRDHi6gAG" 293 | } 294 | }, 295 | { 296 | "cell_type": "code", 297 | "source": [ 298 | "from statsmodels.formula.api import ols\n", 299 | "\n", 300 | "# 1) CPU가 100 미만인 데이터 필터링\n", 301 | "filtered_df = df[df['CPU'] < 100]\n", 302 | "\n", 303 | "# 2) 선형회귀 모델 생성: ERP를 종속 변수로, 나머지 변수들을 독립 변수로 설정\n", 304 | "model = ols('ERP ~ Feature1 + Feature2 + Feature3 + CPU', data=filtered_df).fit()\n", 305 | "\n", 306 | "# 3) 모델 요약 정보 출력\n", 307 | "print(model.summary())" 308 | ], 309 | "metadata": { 310 | "colab": { 311 | "base_uri": "https://localhost:8080/" 312 | }, 313 | "id": "QW0SyhImA1l0", 314 | "outputId": "e654d8d5-ada0-4f4d-f30e-a8e555466034" 315 | }, 316 | "execution_count": null, 317 | "outputs": [ 318 | { 319 | "output_type": "stream", 320 | "name": "stdout", 321 | "text": [ 322 | " OLS Regression Results \n", 323 | "==============================================================================\n", 324 | "Dep. Variable: ERP R-squared: 0.755\n", 325 | "Model: OLS Adj. R-squared: 0.736\n", 326 | "Method: Least Squares F-statistic: 39.30\n", 327 | "Date: Mon, 05 Aug 2024 Prob (F-statistic): 5.36e-15\n", 328 | "Time: 16:26:10 Log-Likelihood: -260.40\n", 329 | "No. Observations: 56 AIC: 530.8\n", 330 | "Df Residuals: 51 BIC: 540.9\n", 331 | "Df Model: 4 \n", 332 | "Covariance Type: nonrobust \n", 333 | "==============================================================================\n", 334 | " coef std err t P>|t| [0.025 0.975]\n", 335 | "------------------------------------------------------------------------------\n", 336 | "Intercept 51.4133 19.112 2.690 0.010 13.045 89.782\n", 337 | "Feature1 -0.0242 0.059 -0.409 0.684 -0.143 0.094\n", 338 | "Feature2 0.0602 0.106 0.569 0.572 -0.152 0.273\n", 339 | "Feature3 1.4126 0.113 12.458 0.000 1.185 1.640\n", 340 | "CPU -0.4651 0.234 -1.985 0.053 -0.936 0.005\n", 341 | "==============================================================================\n", 342 | "Omnibus: 3.758 Durbin-Watson: 1.762\n", 343 | "Prob(Omnibus): 0.153 Jarque-Bera (JB): 2.757\n", 344 | "Skew: 0.436 Prob(JB): 0.252\n", 345 | "Kurtosis: 3.648 Cond. No. 780.\n", 346 | "==============================================================================\n", 347 | "\n", 348 | "Notes:\n", 349 | "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" 350 | ] 351 | } 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "source": [], 357 | "metadata": { 358 | "id": "vf9FgVSOZKhd" 359 | }, 360 | "execution_count": null, 361 | "outputs": [] 362 | } 363 | ] 364 | } -------------------------------------------------------------------------------- /part4/ch7/system_cpu.csv: -------------------------------------------------------------------------------- 1 | ERP,Feature1,Feature2,Feature3,CPU 2 | 30.6,235.1,44.5,44.0,112.3 3 | 40.3,36.6,46.4,36.1,58.6 4 | 57.7,52.2,66.5,2.0,55.3 5 | 128.3,196.2,59.8,57.4,103.2 6 | 80.3,75.2,59.6,58.2,104.1 7 | 49.8,183.5,25.7,7.0,113.8 8 | -14.6,97.1,49.6,4.3,122.6 9 | 113.3,57.4,38.9,66.7,147.6 10 | -27.1,199.9,54.2,-24.0,101.7 11 | 56.5,186.7,48.5,28.1,82.4 12 | 107.5,201.9,63.7,22.4,129.5 13 | 90.8,177.1,54.8,72.0,77.2 14 | -0.6,69.6,61.8,-4.9,94.0 15 | 50.7,203.7,43.0,37.4,57.9 16 | -7.2,94.7,35.8,5.6,52.6 17 | 10.1,149.2,43.8,25.4,146.3 18 | 76.0,180.1,49.7,31.5,133.6 19 | 12.2,87.7,55.7,9.2,119.6 20 | 34.7,138.8,83.9,25.7,91.0 21 | -4.5,131.8,49.4,10.8,67.4 22 | 100.2,68.9,35.7,67.4,65.7 23 | 67.7,54.8,44.8,26.7,75.1 24 | 74.2,133.1,43.0,27.1,105.0 25 | 130.2,155.5,57.2,85.5,121.5 26 | -24.3,108.6,26.9,-42.7,116.1 27 | 82.2,63.0,50.9,64.7,78.1 28 | 98.1,77.9,52.3,44.5,145.5 29 | 38.5,194.3,53.5,19.2,123.8 30 | 119.3,166.3,41.0,49.4,105.5 31 | 7.2,151.0,46.4,2.9,111.2 32 | 99.2,62.3,28.6,68.7,92.0 33 | 72.9,136.1,42.6,48.0,74.8 34 | 34.8,46.5,41.9,24.8,85.7 35 | 50.5,62.1,56.2,26.6,125.8 36 | 100.9,59.3,32.7,28.8,51.5 37 | 16.6,139.0,61.7,-4.4,61.7 38 | 60.0,155.2,72.4,24.9,54.7 39 | 48.8,113.0,19.0,31.3,54.2 40 | 87.9,113.2,56.4,45.0,135.6 41 | 16.8,36.0,60.2,23.2,120.4 42 | 13.1,-0.0,40.4,13.7,97.5 43 | 86.5,94.9,44.0,44.1,59.9 44 | 84.2,74.7,48.0,76.2,99.2 45 | 82.6,172.7,45.5,47.4,97.4 46 | 87.4,216.1,45.4,37.2,67.4 47 | -3.5,141.3,24.9,26.1,93.4 48 | 133.5,26.6,67.3,84.8,89.9 49 | 127.8,174.8,66.2,91.1,111.6 50 | 43.9,-2.0,37.8,20.3,113.5 51 | 129.9,71.3,28.0,79.6,54.6 52 | 73.2,78.5,57.8,14.8,87.5 53 | 127.2,198.5,41.4,75.4,112.6 54 | 137.5,50.3,52.1,66.4,100.4 55 | 111.3,32.6,45.2,35.3,135.7 56 | 95.3,94.2,60.4,63.1,115.9 57 | -13.5,73.4,60.4,-33.9,66.4 58 | 4.7,161.0,39.1,-26.3,57.1 59 | 45.1,48.8,29.2,28.0,114.3 60 | 121.1,26.5,26.3,69.1,52.7 61 | 41.5,69.5,59.2,35.3,108.6 62 | 35.7,60.2,32.2,29.0,144.0 63 | 106.6,195.1,42.4,75.4,107.6 64 | 82.6,134.1,41.1,51.8,88.9 65 | 103.7,70.1,49.2,49.9,114.4 66 | 73.6,38.6,21.0,35.7,95.9 67 | 91.8,136.7,52.8,41.7,104.6 68 | 35.7,14.4,57.9,22.3,144.2 69 | -12.3,27.4,51.3,-22.6,88.7 70 | 93.2,144.8,45.3,51.7,146.1 71 | 25.2,117.8,51.5,11.3,140.5 72 | 19.1,163.4,56.0,13.7,69.7 73 | 16.7,119.5,8.4,12.5,57.0 74 | 5.9,168.2,79.3,-33.8,60.2 75 | 92.0,40.8,55.9,32.5,51.9 76 | 58.5,53.2,40.2,27.6,59.5 77 | 98.8,122.4,44.1,48.6,118.3 78 | 97.9,40.0,57.4,52.8,57.2 79 | 67.8,51.9,48.3,38.4,82.0 80 | 11.6,69.6,19.5,28.5,134.5 81 | 123.2,102.0,81.0,66.3,52.4 82 | 51.1,57.9,48.3,20.9,131.5 83 | 38.8,45.1,65.3,-4.6,78.3 84 | 48.4,75.2,39.6,3.7,61.9 85 | 118.8,-48.6,73.0,67.4,119.7 86 | -1.9,162.9,54.3,15.0,112.9 87 | 46.5,53.0,59.1,46.4,137.8 88 | 92.8,65.0,34.3,34.0,123.5 89 | 29.2,99.2,68.2,10.0,130.4 90 | 93.9,39.4,60.3,50.6,78.3 91 | 29.8,202.9,69.5,45.6,67.8 92 | 15.3,23.4,40.6,9.4,125.1 93 | 38.9,138.6,42.8,42.7,130.7 94 | 72.1,102.1,84.6,38.6,149.1 95 | 54.5,57.6,34.1,44.5,91.3 96 | 131.9,134.9,48.0,81.3,87.3 97 | 51.9,105.0,67.1,28.4,127.7 98 | 139.1,141.1,51.5,54.2,84.1 99 | 35.0,179.4,58.7,13.5,143.1 100 | 24.7,217.2,44.0,34.7,135.9 101 | 34.9,178.9,55.6,30.6,93.0 102 | 211.7,171.5,57.3,99.3,125.1 103 | 128.7,206.7,-47.8,59.2,125.5 104 | 206.2,196.7,-36.0,77.1,60.4 105 | 225.6,223.5,223.7,134.5,140.3 106 | 158.6,19.0,43.7,92.6,100.6 107 | 197.6,21.5,-10.5,96.7,132.7 108 | 159.7,-18.8,118.4,82.9,82.1 109 | 241.0,-37.2,-34.9,87.9,139.6 110 | 137.2,97.0,238.0,61.3,89.0 111 | 120.3,-6.4,72.3,80.2,51.2 112 | 207.9,178.6,32.4,118.7,140.5 113 | 150.7,215.0,8.3,80.3,59.2 114 | 186.3,68.0,-7.2,92.9,82.0 115 | 141.0,73.0,158.5,82.4,145.0 116 | 155.8,35.9,249.6,73.7,145.1 117 | -------------------------------------------------------------------------------- /part4/ch8/chem.csv: -------------------------------------------------------------------------------- 1 | sample,co,nmhc,etc 2 | 샘플1,79,54,31 3 | 샘플2,84,57,58 4 | 샘플3,109,74,113 5 | 샘플4,15,77,21 6 | 샘플5,65,77,115 7 | 샘플6,51,19,64 8 | 샘플7,49,93,108 9 | 샘플8,63,31,44 10 | 샘플9,108,46,87 11 | 샘플10,18,97,102 12 | 샘플11,57,80,23 13 | 샘플12,92,98,96 14 | 샘플13,36,98,55 15 | 샘플14,88,22,106 16 | 샘플15,15,68,81 17 | 샘플16,25,75,89 18 | 샘플17,58,49,107 19 | 샘플18,73,97,63 20 | 샘플19,38,56,52 21 | 샘플20,74,98,31 22 | 샘플21,17,91,104 23 | 샘플22,77,47,30 24 | 샘플23,50,35,74 25 | 샘플24,109,87,57 26 | 샘플25,82,82,48 27 | 샘플26,97,19,22 28 | 샘플27,61,30,47 29 | 샘플28,35,90,103 30 | 샘플29,96,79,109 31 | 샘플30,65,89,43 32 | 샘플31,42,57,73 33 | 샘플32,29,74,71 34 | 샘플33,56,92,66 35 | 샘플34,73,98,40 36 | 샘플35,80,59,73 37 | 샘플36,51,39,49 38 | 샘플37,25,29,87 39 | 샘플38,101,29,55 40 | 샘플39,58,24,59 41 | 샘플40,26,49,29 42 | 샘플41,17,42,93 43 | 샘플42,66,75,61 44 | 샘플43,95,19,43 45 | 샘플44,47,67,23 46 | 샘플45,69,42,66 47 | 샘플46,15,41,110 48 | 샘플47,53,84,70 49 | 샘플48,34,33,23 50 | 샘플49,61,45,51 51 | 샘플50,57,85,29 52 | 샘플51,71,65,30 53 | 샘플52,75,38,47 54 | 샘플53,92,44,65 55 | 샘플54,45,10,91 56 | 샘플55,39,10,59 57 | 샘플56,17,46,81 58 | 샘플57,18,63,105 59 | 샘플58,109,15,117 60 | 샘플59,28,48,64 61 | 샘플60,55,27,54 62 | 샘플61,87,89,54 63 | 샘플62,34,14,108 64 | 샘플63,87,52,53 65 | 샘플64,41,68,25 66 | 샘플65,81,41,56 67 | 샘플66,67,11,20 68 | 샘플67,82,75,95 69 | 샘플68,76,51,54 70 | 샘플69,29,67,89 71 | 샘플70,19,45,73 72 | 샘플71,82,21,100 73 | 샘플72,26,56,82 74 | 샘플73,101,92,28 75 | 샘플74,92,10,81 76 | 샘플75,90,24,21 77 | 샘플76,71,63,101 78 | 샘플77,31,22,55 79 | 샘플78,39,52,111 80 | 샘플79,44,94,60 81 | 샘플80,36,85,56 82 | 샘플81,40,78,68 83 | 샘플82,95,16,45 84 | 샘플83,75,78,87 85 | 샘플84,76,57,55 86 | 샘플85,98,13,50 87 | 샘플86,48,86,49 88 | 샘플87,47,62,53 89 | 샘플88,85,88,38 90 | 샘플89,100,25,37 91 | 샘플90,46,30,113 92 | 샘플91,28,68,104 93 | 샘플92,86,33,22 94 | 샘플93,71,89,89 95 | 샘플94,39,23,32 96 | 샘플95,94,95,64 97 | 샘플96,56,58,86 98 | 샘플97,33,59,111 99 | 샘플98,55,79,105 100 | 샘플99,69,51,59 101 | 샘플100,94,45,59 -------------------------------------------------------------------------------- /part4/ch8/drinks.csv: -------------------------------------------------------------------------------- 1 | country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent 2 | Afghanistan,0,0,0,0.0,Asia 3 | Albania,89,132,54,4.9,Europe 4 | Algeria,25,0,14,0.7,Africa 5 | Andorra,245,138,312,12.4,Europe 6 | Angola,217,57,45,5.9,Africa 7 | Antigua & Barbuda,102,128,45,4.9,North America 8 | Argentina,193,25,221,8.3,South America 9 | Armenia,21,179,11,3.8,Europe 10 | Australia,261,72,212,10.4,Oceania 11 | Austria,279,75,191,9.7,Europe 12 | Azerbaijan,21,46,5,1.3,Europe 13 | Bahamas,122,176,51,6.3,North America 14 | Bahrain,42,63,7,2.0,Asia 15 | Bangladesh,0,0,0,0.0,Asia 16 | Barbados,143,173,36,6.3,North America 17 | Belarus,142,373,42,14.4,Europe 18 | Belgium,295,84,212,10.5,Europe 19 | Belize,263,114,8,6.8,North America 20 | Benin,34,4,13,1.1,Africa 21 | Bhutan,23,0,0,0.4,Asia 22 | Bolivia,167,41,8,3.8,South America 23 | Bosnia-Herzegovina,76,173,8,4.6,Europe 24 | Botswana,173,35,35,5.4,Africa 25 | Brazil,245,145,16,7.2,South America 26 | Brunei,31,2,1,0.6,Asia 27 | Bulgaria,231,252,94,10.3,Europe 28 | Burkina Faso,25,7,7,4.3,Africa 29 | Burundi,88,0,0,6.3,Africa 30 | Cote d'Ivoire,37,1,7,4.0,Africa 31 | Cabo Verde,144,56,16,4.0,Africa 32 | Cambodia,57,65,1,2.2,Asia 33 | Cameroon,147,1,4,5.8,Africa 34 | Canada,240,122,100,8.2,North America 35 | Central African Republic,17,2,1,1.8,Africa 36 | Chad,15,1,1,0.4,Africa 37 | Chile,130,124,172,7.6,South America 38 | China,79,192,8,5.0,Asia 39 | Colombia,159,76,3,4.2,South America 40 | Comoros,1,3,1,0.1,Africa 41 | Congo,76,1,9,1.7,Africa 42 | Cook Islands,0,254,74,5.9,Oceania 43 | Costa Rica,149,87,11,4.4,North America 44 | Croatia,230,87,254,10.2,Europe 45 | Cuba,93,137,5,4.2,North America 46 | Cyprus,192,154,113,8.2,Europe 47 | Czech Republic,361,170,134,11.8,Europe 48 | North Korea,0,0,0,0.0,Asia 49 | DR Congo,32,3,1,2.3,Africa 50 | Denmark,224,81,278,10.4,Europe 51 | Djibouti,15,44,3,1.1,Africa 52 | Dominica,52,286,26,6.6,North America 53 | Dominican Republic,193,147,9,6.2,North America 54 | Ecuador,162,74,3,4.2,South America 55 | Egypt,6,4,1,0.2,Africa 56 | El Salvador,52,69,2,2.2,North America 57 | Equatorial Guinea,92,0,233,5.8,Africa 58 | Eritrea,18,0,0,0.5,Africa 59 | Estonia,224,194,59,9.5,Europe 60 | Ethiopia,20,3,0,0.7,Africa 61 | Fiji,77,35,1,2.0,Oceania 62 | Finland,263,133,97,10.0,Europe 63 | France,127,151,370,11.8,Europe 64 | Gabon,347,98,59,8.9,Africa 65 | Gambia,8,0,1,2.4,Africa 66 | Georgia,52,100,149,5.4,Europe 67 | Germany,346,117,175,11.3,Europe 68 | Ghana,31,3,10,1.8,Africa 69 | Greece,133,112,218,8.3,Europe 70 | Grenada,199,438,28,11.9,North America 71 | Guatemala,53,69,2,2.2,North America 72 | Guinea,9,0,2,0.2,Africa 73 | Guinea-Bissau,28,31,21,2.5,Africa 74 | Guyana,93,302,1,7.1,South America 75 | Haiti,1,326,1,5.9,North America 76 | Honduras,69,98,2,3.0,North America 77 | Hungary,234,215,185,11.3,Europe 78 | Iceland,233,61,78,6.6,Europe 79 | India,9,114,0,2.2,Asia 80 | Indonesia,5,1,0,0.1,Asia 81 | Iran,0,0,0,0.0,Asia 82 | Iraq,9,3,0,0.2,Asia 83 | Ireland,313,118,165,11.4,Europe 84 | Israel,63,69,9,2.5,Asia 85 | Italy,85,42,237,6.5,Europe 86 | Jamaica,82,97,9,3.4,North America 87 | Japan,77,202,16,7.0,Asia 88 | Jordan,6,21,1,0.5,Asia 89 | Kazakhstan,124,246,12,6.8,Asia 90 | Kenya,58,22,2,1.8,Africa 91 | Kiribati,21,34,1,1.0,Oceania 92 | Kuwait,0,0,0,0.0,Asia 93 | Kyrgyzstan,31,97,6,2.4,Asia 94 | Laos,62,0,123,6.2,Asia 95 | Latvia,281,216,62,10.5,Europe 96 | Lebanon,20,55,31,1.9,Asia 97 | Lesotho,82,29,0,2.8,Africa 98 | Liberia,19,152,2,3.1,Africa 99 | Libya,0,0,0,0.0,Africa 100 | Lithuania,343,244,56,12.9,Europe 101 | Luxembourg,236,133,271,11.4,Europe 102 | Madagascar,26,15,4,0.8,Africa 103 | Malawi,8,11,1,1.5,Africa 104 | Malaysia,13,4,0,0.3,Asia 105 | Maldives,0,0,0,0.0,Asia 106 | Mali,5,1,1,0.6,Africa 107 | Malta,149,100,120,6.6,Europe 108 | Marshall Islands,0,0,0,0.0,Oceania 109 | Mauritania,0,0,0,0.0,Africa 110 | Mauritius,98,31,18,2.6,Africa 111 | Mexico,238,68,5,5.5,North America 112 | Micronesia,62,50,18,2.3,Oceania 113 | Monaco,0,0,0,0.0,Europe 114 | Mongolia,77,189,8,4.9,Asia 115 | Montenegro,31,114,128,4.9,Europe 116 | Morocco,12,6,10,0.5,Africa 117 | Mozambique,47,18,5,1.3,Africa 118 | Myanmar,5,1,0,0.1,Asia 119 | Namibia,376,3,1,6.8,Africa 120 | Nauru,49,0,8,1.0,Oceania 121 | Nepal,5,6,0,0.2,Asia 122 | Netherlands,251,88,190,9.4,Europe 123 | New Zealand,203,79,175,9.3,Oceania 124 | Nicaragua,78,118,1,3.5,North America 125 | Niger,3,2,1,0.1,Africa 126 | Nigeria,42,5,2,9.1,Africa 127 | Niue,188,200,7,7.0,Oceania 128 | Norway,169,71,129,6.7,Europe 129 | Oman,22,16,1,0.7,Asia 130 | Pakistan,0,0,0,0.0,Asia 131 | Palau,306,63,23,6.9,Oceania 132 | Panama,285,104,18,7.2,North America 133 | Papua New Guinea,44,39,1,1.5,Oceania 134 | Paraguay,213,117,74,7.3,South America 135 | Peru,163,160,21,6.1,South America 136 | Philippines,71,186,1,4.6,Asia 137 | Poland,343,215,56,10.9,Europe 138 | Portugal,194,67,339,11.0,Europe 139 | Qatar,1,42,7,0.9,Asia 140 | South Korea,140,16,9,9.8,Asia 141 | Moldova,109,226,18,6.3,Europe 142 | Romania,297,122,167,10.4,Europe 143 | Russian Federation,247,326,73,11.5,Asia 144 | Rwanda,43,2,0,6.8,Africa 145 | St. Kitts & Nevis,194,205,32,7.7,North America 146 | St. Lucia,171,315,71,10.1,North America 147 | St. Vincent & the Grenadines,120,221,11,6.3,North America 148 | Samoa,105,18,24,2.6,Oceania 149 | San Marino,0,0,0,0.0,Europe 150 | Sao Tome & Principe,56,38,140,4.2,Africa 151 | Saudi Arabia,0,5,0,0.1,Asia 152 | Senegal,9,1,7,0.3,Africa 153 | Serbia,283,131,127,9.6,Europe 154 | Seychelles,157,25,51,4.1,Africa 155 | Sierra Leone,25,3,2,6.7,Africa 156 | Singapore,60,12,11,1.5,Asia 157 | Slovakia,196,293,116,11.4,Europe 158 | Slovenia,270,51,276,10.6,Europe 159 | Solomon Islands,56,11,1,1.2,Oceania 160 | Somalia,0,0,0,0.0,Africa 161 | South Africa,225,76,81,8.2,Africa 162 | Spain,284,157,112,10.0,Europe 163 | Sri Lanka,16,104,0,2.2,Asia 164 | Sudan,8,13,0,1.7,Africa 165 | Suriname,128,178,7,5.6,South America 166 | Swaziland,90,2,2,4.7,Africa 167 | Sweden,152,60,186,7.2,Europe 168 | Switzerland,185,100,280,10.2,Europe 169 | Syria,5,35,16,1.0,Asia 170 | Tajikistan,2,15,0,0.3,Asia 171 | Thailand,99,258,1,6.4,Asia 172 | Macedonia,106,27,86,3.9,Europe 173 | Timor-Leste,1,1,4,0.1,Asia 174 | Togo,36,2,19,1.3,Africa 175 | Tonga,36,21,5,1.1,Oceania 176 | Trinidad & Tobago,197,156,7,6.4,North America 177 | Tunisia,51,3,20,1.3,Africa 178 | Turkey,51,22,7,1.4,Asia 179 | Turkmenistan,19,71,32,2.2,Asia 180 | Tuvalu,6,41,9,1.0,Oceania 181 | Uganda,45,9,0,8.3,Africa 182 | Ukraine,206,237,45,8.9,Europe 183 | United Arab Emirates,16,135,5,2.8,Asia 184 | United Kingdom,219,126,195,10.4,Europe 185 | Tanzania,36,6,1,5.7,Africa 186 | USA,249,158,84,8.7,North America 187 | Uruguay,115,35,220,6.6,South America 188 | Uzbekistan,25,101,8,2.4,Asia 189 | Vanuatu,21,18,11,0.9,Oceania 190 | Venezuela,333,100,3,7.7,South America 191 | Vietnam,111,2,1,2.0,Asia 192 | Yemen,6,0,0,0.1,Asia 193 | Zambia,32,19,4,2.5,Africa 194 | Zimbabwe,64,18,4,4.7,Africa 195 | -------------------------------------------------------------------------------- /part4/ch8/p8_type1.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"mount_file_id":"1iHXiVOq_xtN9hptWv34tb6d6jd_TcLNT","authorship_tag":"ABX9TyN1U0z3LW6NfwEWG86wbE4e"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch8/p8_type1.ipynb)"],"metadata":{"id":"i49zMTluEvHo"}},{"cell_type":"markdown","source":["## 작업형1"],"metadata":{"id":"j9jYUsDdNwKF"}},{"cell_type":"markdown","source":["### 문제1"],"metadata":{"id":"YAyh1xITWurF"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"drinks.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch8/drinks.csv\")\n","\n","# 1) 대륙별 맥주 소비량의 평균\n","continent = df.groupby(\"continent\")['beer_servings'].mean() # Europe\n","top_continent = continent.idxmax()\n","\n","# 2) 국가별 맥주 소비량\n","cond = df['continent'] == top_continent\n","df = df[cond]\n","df = df.sort_values('beer_servings', ascending=False)\n","df.iloc[4, 1] # 또는 df.iloc[4]['beer_servings']"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cKnyz8hjYNp0","executionInfo":{"status":"ok","timestamp":1722875838970,"user_tz":-540,"elapsed":1697,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"f9445be8-e018-4656-9c7a-8c1573420b7c"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["313"]},"metadata":{},"execution_count":1}]},{"cell_type":"markdown","source":["### 문제2"],"metadata":{"id":"HPyFlnMZWwFs"}},{"cell_type":"code","source":["# 방법1\n","import pandas as pd\n","# df = pd.read_csv(\"tourist.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch8/tourist.csv\")\n","\n","# 1) 방문객 합계 및 관관객 비율 계산\n","df['방문객합계'] = df['관광'] + df['공무'] + df['사업'] + df['기타']\n","df['관광객비율'] = df['관광'] / df['방문객합계']\n","\n","# 2) 조건에 맞는 값 찾기\n","a = df.sort_values('관광객비율', ascending=False).iloc[1, 3] # 또는 iloc[1]['사업']\n","b = df.sort_values('관광', ascending=False).iloc[1, 2] # 또는 iloc[1]['공무']\n","\n","print(a+b)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"EnVLX3BqKyIZ","executionInfo":{"status":"ok","timestamp":1720249582333,"user_tz":-540,"elapsed":372,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"17395b35-cf31-4980-bfc0-53a9d02a04a7"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["441\n"]}]},{"cell_type":"code","source":["# 방법2\n","import pandas as pd\n","# df = pd.read_csv(\"tourist.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch8/tourist.csv\")\n","\n","# 1) 방문객 합계 및 관관객 비율 계산\n","df['방문객합계'] = df['관광'] + df['공무'] + df['사업'] + df['기타']\n","df['관광객비율'] = df['관광'] / df['방문객합계']\n","\n","# 2) 조건에 맞는 값 찾기\n","a = df.nlargest(2, '관광객비율').iloc[1]['사업']\n","b = df.nlargest(2, '관광').iloc[1]['공무']\n","\n","print(a+b)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"8Y0L2dJXYllX","executionInfo":{"status":"ok","timestamp":1720249590584,"user_tz":-540,"elapsed":429,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"440cc59b-59de-44a1-ad0c-37aae19814ee"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["441\n"]}]},{"cell_type":"markdown","source":["### 문제3"],"metadata":{"id":"GE7Ggj0KZMOx"}},{"cell_type":"code","source":["# 방법1\n","import pandas as pd\n","from sklearn.preprocessing import MinMaxScaler\n","\n","# df = pd.read_csv(\"chem.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch8/chem.csv\")\n","\n","# 1) Min-Max 스케일링 (데이터프레임으로 만들기)\n","scaler = MinMaxScaler()\n","df['co_scaled'] = scaler.fit_transform(df[['co']])\n","df['nmhc_scaled'] = scaler.fit_transform(df[['nmhc']])\n","\n","# 2) 표준편차 계산 (판다스 표준편차)\n","co_std = df['co_scaled'].std()\n","nmhc_std = df['nmhc_scaled'].std()\n","print(co_std, nmhc_std)\n","\n","# 3) 표준편차 차이 계산 및 반올림\n","std_diff = round(co_std - nmhc_std, 3)\n","print(std_diff)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"YazPGRcxgrxp","executionInfo":{"status":"ok","timestamp":1720249909533,"user_tz":-540,"elapsed":3,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"496dab78-83ab-4ce6-b393-00af8aa8bb57"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["0.2856516497116944 0.3030617020578397\n","-0.017\n"]}]},{"cell_type":"code","source":["# 방법2\n","import pandas as pd\n","from sklearn.preprocessing import MinMaxScaler\n","\n","# df = pd.read_csv(\"chem.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch8/chem.csv\")\n","\n","# 1) Min-Max 스케일링 (transform의 결과는 넘파이)\n","scaler = MinMaxScaler()\n","co_scaled = scaler.fit_transform(df[['co']])\n","nmhc_scaled = scaler.fit_transform(df[['nmhc']])\n","\n","# 2) 표준편차 계산 (넘파이 표준편차)\n","co_std = co_scaled.std()\n","nmhc_std = nmhc_scaled.std()\n","print(co_std, nmhc_std)\n","\n","# 3) 표준편차 차이 계산 및 반올림\n","std_diff = round(co_std - nmhc_std, 3)\n","print(std_diff)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ctWqLY_1aKn7","executionInfo":{"status":"ok","timestamp":1720249982457,"user_tz":-540,"elapsed":437,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"4091cdb2-30b9-4272-bf06-c80b0a3de6f3"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["0.2842198028519168 0.3015425862157\n","-0.017\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"GqQuCqEBDz1g"},"execution_count":null,"outputs":[]}]} -------------------------------------------------------------------------------- /part4/ch8/p8_type2.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyP+iM3+CLs5xtO4gKriPWUA"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch8/p8_type2.ipynb)"],"metadata":{"id":"QYnF_RFLE3X5"}},{"cell_type":"code","source":["# 문제정의\n","# 평가: MAE\n","# target: TotalCharges\n","# 최종파일: result.csv(컬럼 1개 pred)\n","\n","# 라이브러리 및 데이터 불러오기\n","import pandas as pd\n","# train = pd.read_csv(\"churn_train.csv\")\n","# test = pd.read_csv(\"churn_test.csv\")\n","train = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch8/churn_train.csv\")\n","test = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch8/churn_test.csv\")\n","\n","# 탐색적 데이터 분석(EDA)\n","print(\"===== 데이터 크기 =====\")\n","print(\"Train Shape:\", train.shape)\n","print(\"Test Shape:\", test.shape)\n","\n","print(\"\\n ===== 데이터 정보(자료형) =====\")\n","print(train.info())\n","\n","print(\"\\n ===== train 결측치 수 =====\")\n","print(train.isnull().sum().sum())\n","\n","print(\"\\n ===== test 결측치 수 =====\")\n","print(test.isnull().sum().sum())\n","\n","print(\"\\n ===== customerID unique 수 =====\")\n","print(train['customerID'].nunique())\n","\n","print(\"\\n ===== target 기술 통계 =====\")\n","print(train['TotalCharges'].describe())"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"U_YZY2gvjyvc","executionInfo":{"status":"ok","timestamp":1722876159202,"user_tz":-540,"elapsed":1254,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"cb66c07b-267e-450d-e0cb-a1a29e0eda4d"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["===== 데이터 크기 =====\n","Train Shape: (4116, 19)\n","Test Shape: (1764, 18)\n","\n"," ===== 데이터 정보(자료형) =====\n","\n","RangeIndex: 4116 entries, 0 to 4115\n","Data columns (total 19 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 customerID 4116 non-null object \n"," 1 gender 4116 non-null object \n"," 2 SeniorCitizen 4116 non-null int64 \n"," 3 Partner 4116 non-null object \n"," 4 Dependents 4116 non-null object \n"," 5 tenure 4116 non-null int64 \n"," 6 PhoneService 4116 non-null object \n"," 7 MultipleLines 4116 non-null object \n"," 8 InternetService 4116 non-null object \n"," 9 OnlineSecurity 4116 non-null object \n"," 10 OnlineBackup 4116 non-null object \n"," 11 DeviceProtection 4116 non-null object \n"," 12 TechSupport 4116 non-null object \n"," 13 StreamingTV 4116 non-null object \n"," 14 StreamingMovies 4116 non-null object \n"," 15 Contract 4116 non-null object \n"," 16 PaperlessBilling 4116 non-null object \n"," 17 PaymentMethod 4116 non-null object \n"," 18 TotalCharges 4116 non-null float64\n","dtypes: float64(1), int64(2), object(16)\n","memory usage: 611.1+ KB\n","None\n","\n"," ===== train 결측치 수 =====\n","0\n","\n"," ===== test 결측치 수 =====\n","0\n","\n"," ===== customerID unique 수 =====\n","4116\n","\n"," ===== target 기술 통계 =====\n","count 4116.000000\n","mean 2566.580782\n","std 1911.356766\n","min 20.030000\n","25% 1020.922500\n","50% 2144.285000\n","75% 3765.670000\n","max 8589.600000\n","Name: TotalCharges, dtype: float64\n"]}]},{"cell_type":"code","source":["# 데이터 전처리\n","train = train.drop('customerID', axis=1)\n","test = test.drop(['customerID'], axis=1)\n","target = train.pop('TotalCharges')\n","\n","# 레이블 인코딩\n","from sklearn.preprocessing import LabelEncoder\n","cols = train.select_dtypes(include='O').columns\n","\n","for col in cols:\n"," le = LabelEncoder()\n"," train[col] = le.fit_transform(train[col])\n"," test[col] = le.transform(test[col])\n","\n","# 검증데이터 분리\n","from sklearn.model_selection import train_test_split\n","X_tr, X_val, y_tr, y_val = train_test_split(train, target, test_size=0.2, random_state=0)\n","\n","# 랜덤포레스트\n","from sklearn.ensemble import RandomForestRegressor\n","rf = RandomForestRegressor(random_state=0)\n","rf.fit(X_tr, y_tr)\n","pred = rf.predict(X_val)\n","\n","# MAE\n","from sklearn.metrics import mean_absolute_error\n","print(mean_absolute_error(y_val, pred))\n","\n","# LightGBM\n","import lightgbm as lgb\n","lg = lgb.LGBMRegressor(random_state=0, verbose=-1)\n","lg.fit(X_tr, y_tr)\n","pred = lg.predict(X_val)\n","print(mean_absolute_error(y_val, pred))\n","\n","# 최종 제출 파일\n","pred = rf.predict(test)\n","result = pd.DataFrame({\n"," 'pred':pred\n","})\n","result.to_csv(\"result.csv\", index=False)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-VHVlEn7eZ6j","executionInfo":{"status":"ok","timestamp":1722876170198,"user_tz":-540,"elapsed":9640,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"003fa2d7-b489-4a10-9556-90e68a018446"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["951.0960435538027\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/dask/dataframe/__init__.py:42: FutureWarning: \n","Dask dataframe query planning is disabled because dask-expr is not installed.\n","\n","You can install it with `pip install dask[dataframe]` or `conda install dask`.\n","This will raise in a future version.\n","\n"," warnings.warn(msg, FutureWarning)\n"]},{"output_type":"stream","name":"stdout","text":["952.7925407798712\n"]}]},{"cell_type":"code","source":["# 1. pred 행의 수\n","print(pred.shape)\n","\n","# 2. 생성한 csv 확인\n","print(pd.read_csv(\"result.csv\").head())"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"gR0wVy3tkIRU","executionInfo":{"status":"ok","timestamp":1722876170199,"user_tz":-540,"elapsed":6,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"65d774a0-7227-4f22-eca9-d1f7f0a6bb77"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["(1764,)\n"," pred\n","0 3707.6212\n","1 923.7132\n","2 4057.4078\n","3 952.6143\n","4 1322.1638\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"-JyPFkwlFHzv"},"execution_count":null,"outputs":[]}]} -------------------------------------------------------------------------------- /part4/ch8/piq.csv: -------------------------------------------------------------------------------- 1 | PIQ,Brain,Height,Weight 2 | 132,85.78,62.5,127 3 | 96,86.54,68.0,135 4 | 84,90.49,66.3,134 5 | 134,79.06,62.0,122 6 | 86,88.91,70.0,180 7 | 102,83.18,63.0,114 8 | 128,107.95,70.0,151 9 | 128,107.95,70.0,151 10 | 131,93.55,72.0,171 11 | 94,89.4,64.5,139 12 | 131,93.55,72.0,171 13 | 128,96.54,68.8,172 14 | 128,95.5,68.0,132 15 | 150,103.84,73.3,143 16 | 131,93.55,72.0,171 17 | 137,94.96,67.0,191 18 | 89,93.59,75.5,179 19 | 150,103.84,73.3,143 20 | 134,79.06,62.0,122 21 | 81,83.43,66.5,143 22 | 124,92.41,69.0,155 23 | 128,95.5,68.0,132 24 | 84,79.86,68.0,140 25 | 72,79.35,63.0,106 26 | 124,86.67,66.5,159 27 | 120,85.22,68.5,127 28 | 96,86.54,68.0,135 29 | 128,96.54,68.8,172 30 | 74,93.0,74.0,148 31 | 98,85.43,66.0,175 32 | 134,79.06,62.0,122 33 | 147,95.55,68.8,172 34 | 84,80.8,66.3,136 35 | 134,95.15,65.0,147 36 | 84,79.86,68.0,140 37 | 90,87.89,66.0,146 38 | 147,95.55,68.8,172 39 | 110,106.25,77.0,187 40 | 150,103.84,73.3,143 41 | 84,90.59,76.5,186 42 | 124,86.67,66.5,159 43 | 98,85.43,66.0,175 44 | 84,90.49,66.3,134 45 | 124,94.94,70.5,144 46 | 90,87.89,66.0,146 47 | 102,94.51,73.5,178 48 | 94,89.4,64.5,139 49 | 134,95.15,65.0,147 50 | 150,103.84,73.3,143 51 | 131,99.13,64.5,138 52 | -------------------------------------------------------------------------------- /part4/ch8/tourist.csv: -------------------------------------------------------------------------------- 1 | 나라,관광,공무,사업,기타 2 | 국가1,1184,270,380,55 3 | 국가2,1059,184,267,86 4 | 국가3,1129,168,261,50 5 | 국가4,692,106,214,125 6 | 국가5,1335,296,296,84 7 | 국가6,1263,147,204,119 8 | 국가7,1207,227,395,103 9 | 국가8,859,231,339,130 10 | 국가9,509,200,286,112 11 | 국가10,1223,280,321,58 12 | 국가11,777,178,309,111 13 | 국가12,1254,243,275,51 14 | 국가13,1304,248,384,131 15 | 국가14,1099,286,216,85 16 | 국가15,570,123,352,141 17 | 국가16,972,241,357,90 18 | 국가17,1100,217,349,86 19 | 국가18,896,185,310,98 20 | 국가19,814,148,225,75 21 | 국가20,1205,149,388,117 22 | 국가21,986,169,321,85 23 | 국가22,1051,269,318,80 24 | 국가23,587,263,317,79 25 | 국가24,674,292,389,83 26 | 국가25,1100,195,283,68 27 | 국가26,1349,297,361,67 28 | 국가27,1177,194,304,143 29 | 국가28,1037,100,360,134 30 | 국가29,1345,213,321,52 31 | 국가30,572,278,270,119 32 | 국가31,1277,136,231,62 33 | 국가32,1416,262,213,94 34 | 국가33,615,148,271,116 35 | 국가34,1476,193,384,141 36 | 국가35,1255,231,352,135 37 | 국가36,1209,198,279,89 38 | 국가37,1347,142,241,89 39 | 국가38,931,212,218,125 40 | 국가39,948,249,240,72 41 | 국가40,1350,227,382,80 42 | 국가41,599,100,211,67 43 | 국가42,1484,238,366,120 44 | 국가43,677,214,311,121 45 | 국가44,1255,143,293,68 46 | 국가45,1297,286,329,142 47 | 국가46,1159,227,318,93 48 | 국가47,647,123,244,133 49 | 국가48,1410,287,325,99 50 | 국가49,923,230,224,91 51 | 국가50,788,221,267,143 52 | 국가51,1461,198,203,96 53 | 국가52,765,162,235,71 54 | 국가53,1197,263,389,123 55 | 국가54,1139,223,397,139 56 | 국가55,1044,295,243,146 57 | 국가56,1043,182,232,141 58 | 국가57,1214,274,211,123 59 | 국가58,744,248,304,78 60 | 국가59,651,150,338,131 61 | 국가60,1175,255,382,108 62 | 국가61,1010,114,365,50 63 | 국가62,959,141,325,136 64 | 국가63,1382,158,356,113 65 | 국가64,683,293,311,66 66 | 국가65,528,136,202,86 67 | 국가66,1302,110,227,144 68 | 국가67,628,186,351,74 69 | 국가68,628,143,253,113 70 | 국가69,1432,204,251,117 71 | 국가70,553,111,374,101 72 | 국가71,1401,102,348,58 73 | 국가72,1050,151,381,106 74 | 국가73,988,180,229,141 75 | 국가74,1256,132,267,143 76 | 국가75,773,282,235,137 77 | 국가76,835,228,239,82 78 | 국가77,888,138,337,69 79 | 국가78,1117,119,273,122 80 | 국가79,542,274,241,121 81 | 국가80,942,142,351,137 82 | 국가81,1043,215,331,63 83 | 국가82,1388,284,246,108 84 | 국가83,757,288,378,131 85 | 국가84,821,177,308,105 86 | 국가85,1499,130,203,114 87 | 국가86,1437,124,231,125 88 | 국가87,557,225,209,142 89 | 국가88,791,102,338,86 90 | 국가89,1370,103,227,75 91 | 국가90,619,194,373,82 92 | 국가91,1279,207,399,92 93 | 국가92,930,113,367,64 94 | 국가93,582,212,261,136 95 | 국가94,591,140,285,78 96 | 국가95,1396,172,297,70 97 | 국가96,898,119,244,132 98 | 국가97,1111,195,234,118 99 | 국가98,1065,172,362,72 100 | 국가99,1408,254,288,149 101 | 국가100,1133,294,233,133 102 | --------------------------------------------------------------------------------