├── .github
└── workflows
│ ├── issue-response.yml
│ └── respond_to_issue.py
├── README.md
├── part1
├── ch1
│ └── ch1_python.ipynb
├── ch2
│ └── ch2_pandas.ipynb
└── ch3
│ ├── ch3_ex_type1.ipynb
│ ├── delivery_time.csv
│ ├── school_data.csv
│ ├── school_data_science.csv
│ ├── school_data_social.csv
│ ├── type1_data1.csv
│ └── type1_data2.csv
├── part2
├── ch2
│ ├── ch2_classification.ipynb
│ ├── test.csv
│ └── train.csv
├── ch3
│ └── ch3_metrics.ipynb
├── ch4
│ ├── ch4_regression.ipynb
│ ├── test.csv
│ └── train.csv
├── ch5
│ ├── ch5_multi_class_classification.ipynb
│ ├── test.csv
│ └── train.csv
├── ch6
│ ├── ch6_ex_classification.ipynb
│ ├── creditcard_test.csv
│ ├── creditcard_train.csv
│ ├── diabetes_test.csv
│ ├── diabetes_train.csv
│ ├── hr_test.csv
│ └── hr_train.csv
├── ch7
│ ├── ch7_ex_multi_class_classification.ipynb
│ ├── drug_test.csv
│ ├── drug_train.csv
│ ├── glass_test.csv
│ ├── glass_train.csv
│ ├── score_test.csv
│ └── score_train.csv
└── ch8
│ ├── car_test.csv
│ ├── car_train.csv
│ ├── ch8_ex_regression.ipynb
│ ├── flight_test.csv
│ ├── flight_train.csv
│ ├── laptop_test.csv
│ └── laptop_train.csv
├── part3
├── ch1
│ └── ch1_hypothesis_testing.ipynb
├── ch2
│ ├── ch2_anova.ipynb
│ ├── fertilizer.csv
│ └── tree.csv
├── ch3
│ └── ch3_chi_square.ipynb
├── ch4
│ ├── ch4_linear_regression.ipynb
│ └── study.csv
├── ch5
│ ├── ch5_logistic_regression.ipynb
│ └── health_survey.csv
└── ch6
│ ├── ch6_ex_type3.ipynb
│ ├── customer_travel.csv
│ ├── math.csv
│ └── tomato2.csv
└── part4
├── ch2
├── X_test.csv
├── X_train.csv
├── members.csv
├── p2_type1.ipynb
├── p2_type2.ipynb
└── y_train.csv
├── ch3
├── members.csv
├── p3_type1.ipynb
├── p3_type2.ipynb
├── test.csv
├── train.csv
└── year.csv
├── ch4
├── data4-1.csv
├── data4-2.csv
├── data4-3.csv
├── p4_type1.ipynb
├── p4_type2.ipynb
├── test.csv
└── train.csv
├── ch5
├── data5-1.csv
├── data5-2.csv
├── data5-3.csv
├── p5_type1.ipynb
├── p5_type2.ipynb
├── test.csv
└── train.csv
├── ch6
├── data6-1-1.csv
├── data6-1-2.csv
├── data6-1-3.csv
├── data6-3-2.csv
├── energy_test.csv
├── energy_train.csv
├── p6_type1.ipynb
├── p6_type2.ipynb
└── p6_type3.ipynb
├── ch7
├── air_quality.csv
├── clam.csv
├── mart_test.csv
├── mart_train.csv
├── p7_type1.ipynb
├── p7_type2.ipynb
├── p7_type3.ipynb
├── stock_market.csv
├── student_assessment.csv
└── system_cpu.csv
└── ch8
├── chem.csv
├── churn.csv
├── churn_test.csv
├── churn_train.csv
├── drinks.csv
├── p8_type1.ipynb
├── p8_type2.ipynb
├── p8_type3.ipynb
├── piq.csv
└── tourist.csv
/.github/workflows/issue-response.yml:
--------------------------------------------------------------------------------
1 | name: Issue Response Bot
2 |
3 | on:
4 | issues:
5 | types: [opened]
6 | issue_comment:
7 | types: [created]
8 |
9 | permissions:
10 | issues: write # 이슈 및 댓글에 대한 쓰기 권한 부여
11 | contents: read # 필요한 경우 리포지토리 읽기 권한
12 |
13 | jobs:
14 | respond:
15 | runs-on: ubuntu-latest
16 |
17 | steps:
18 | - name: Check out repository
19 | uses: actions/checkout@v4
20 |
21 | - name: Set up Python
22 | uses: actions/setup-python@v4
23 | with:
24 | python-version: '3.x'
25 |
26 | - name: Install dependencies
27 | run: |
28 |
29 | pip install openai==0.28.0 requests
30 |
31 | - name: Generate Response
32 | env:
33 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
34 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
35 | GITHUB_REPOSITORY: ${{ github.repository }}
36 | ISSUE_NUMBER: ${{ github.event.issue.number }}
37 | COMMENT_BODY: ${{ github.event.issue.body || github.event.comment.body }}
38 | COMMENT_AUTHOR: ${{ github.event.issue.user.login || github.event.comment.user.login }}
39 | run: |
40 | python .github/workflows/respond_to_issue.py
41 |
--------------------------------------------------------------------------------
/.github/workflows/respond_to_issue.py:
--------------------------------------------------------------------------------
1 | import os
2 | import openai
3 | import requests
4 |
5 | # GitHub 환경 변수 설정
6 | GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')
7 | REPO_NAME = os.getenv('GITHUB_REPOSITORY')
8 | ISSUE_NUMBER = os.getenv('ISSUE_NUMBER')
9 | COMMENT_BODY = os.getenv('COMMENT_BODY')
10 | COMMENT_AUTHOR = os.getenv('COMMENT_AUTHOR')
11 | OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
12 |
13 | # OpenAI API 키 설정
14 | openai.api_key = OPENAI_API_KEY
15 |
16 | # 이슈의 제목, 본문, 그리고 모든 댓글 가져오기
17 | def get_issue_and_comments():
18 | issue_url = f"https://api.github.com/repos/{REPO_NAME}/issues/{ISSUE_NUMBER}"
19 | comments_url = f"https://api.github.com/repos/{REPO_NAME}/issues/{ISSUE_NUMBER}/comments"
20 | headers = {"Authorization": f"token {GITHUB_TOKEN}"}
21 |
22 | # 이슈 정보 가져오기
23 | issue_response = requests.get(issue_url, headers=headers)
24 | print(f"Issue API response status: {issue_response.status_code}")
25 |
26 | if issue_response.status_code == 200:
27 | issue_data = issue_response.json()
28 | issue_title = issue_data.get('title', '')
29 | issue_body = issue_data.get('body', '')
30 | else:
31 | print("Failed to fetch issue data from GitHub API.")
32 | return None, None, None
33 |
34 | # 이슈의 모든 댓글 가져오기
35 | comments_response = requests.get(comments_url, headers=headers)
36 | print(f"Comments API response status: {comments_response.status_code}")
37 |
38 | if comments_response.status_code == 200:
39 | comments_data = comments_response.json()
40 | comments = [f"{comment['user']['login']} said: {comment['body']}" for comment in comments_data]
41 | else:
42 | print("Failed to fetch comments from GitHub API.")
43 | return issue_title, issue_body, None
44 |
45 | return issue_title, issue_body, comments
46 |
47 | # ChatGPT API 호출
48 | def get_chatgpt_response(title, body, comments, new_comment):
49 | try:
50 | # 프롬프트 생성: 제목, 본문, 기존 댓글, 새 댓글
51 | prompt = f"Title: {title}\n\nBody: {body}\n\nComments:\n"
52 | if comments:
53 | prompt += "\n".join(comments)
54 | if new_comment:
55 | prompt += f"\n\nNew comment from {COMMENT_AUTHOR}: {new_comment}\n\nRespond to the new comment:"
56 | else:
57 | prompt += "\n\nRespond to the issue:"
58 |
59 | response = openai.ChatCompletion.create(
60 | model="gpt-3.5-turbo",
61 | messages=[
62 | {"role": "system", "content": "You are a helpful assistant."},
63 | {"role": "user", "content": prompt}
64 | ],
65 | max_tokens=2024 # 필요에 따라 조정
66 | )
67 | return response.choices[0].message['content'].strip()
68 | except Exception as e:
69 | print(f"Error calling OpenAI API: {e}")
70 | return None
71 |
72 | # 이슈 댓글에 답글 추가
73 | def comment_on_issue(response):
74 | if not response:
75 | print("No response to post.")
76 | return
77 |
78 | url = f"https://api.github.com/repos/{REPO_NAME}/issues/{ISSUE_NUMBER}/comments"
79 | headers = {"Authorization": f"token {GITHUB_TOKEN}"}
80 | data = {"body": response}
81 | response = requests.post(url, json=data, headers=headers)
82 |
83 | if response.status_code == 201:
84 | print("Response posted successfully.")
85 | else:
86 | print(f"Failed to post response. Status code: {response.status_code}, Response: {response.text}")
87 |
88 | def main():
89 | issue_title, issue_body, comments = get_issue_and_comments()
90 | if issue_title and issue_body: # 제목과 본문이 모두 있을 때만 응답 생성
91 | response = get_chatgpt_response(issue_title, issue_body, comments, COMMENT_BODY)
92 | print(f"Generated response: {response}") # 디버깅용 출력
93 | comment_on_issue(response)
94 | else:
95 | print("No issue title or body found.")
96 |
97 | if __name__ == "__main__":
98 | main()
99 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # [퇴근후딴짓] 빅데이터 분석기사 실기 - 길벗 시나공 시리즈
2 |
3 | []()
4 | []()
5 | []()
6 | []()
7 | []()
8 | [](https://colab.research.google.com/)
9 |
10 | ## 🌱 도서 링크
11 | - [교보문고](https://product.kyobobook.co.kr/detail/S000214299800), [yes24](https://www.yes24.com/Product/Goods/133311973)
12 |
13 |
14 | ## 🌱 정오표:
15 | - https://bit.ly/3YahBcW
16 | - 시험환경 업데이트 9회 준비 기준 (판다스, 사이킷런 등이 코랩과 동일한 버전으로 업데이트 되었어요)
17 |
18 | ## 🌱 목차
19 | - Intro. 시험 응시 전략, 시험 환경 소개, 코드 및 데이터 불러오기, 자주하는 질문 등
20 | - PART1. 작업형1 (파이썬, 판다스, 연습문제)
21 | - PART2. 작업형2 (이진분류, 다중분류, 회귀, 평가지표, 연습문제)
22 | - PART3. 작업형3 (가설검정, 분산 분석, 카이제곱, 회귀, 로지스틱 회귀, 연습문제)
23 | - PART4. 기출유형 (예시문제, 2회 ~ 8회까지)
24 |
25 |
26 | ## 🌱 예제코드 바로 실행하는 방법
27 | - 노트북 선택(part/chapter) -> 구글 코랩에서 실행하기 -> Drive로 복사 -> 실행
28 | 
29 |
30 | ## 🌱 예제코드 전체 다운로드 방법
31 | - "Code" 버튼 클릭 -> 풀다운 메뉴에서 "Download Zip"을 선택
32 | - 입문자는 "예제코드 바로 실행하는 방법"을 추천합니다.
33 |
34 |
35 |
36 | ## 🌱 실습 중 오류가 발생했을 때
37 | - 제공된 최종 노트북 코드와 현재 코드를 비교하여 문제를 파악해보세요. 코드를 복사하여 붙여넣기 한 후 정상적으로 실행되는지 확인
38 | - 문제가 지속될 경우, ChatGPT(https://chat.openai.com/), Claude(https://claude.ai) 를 활용
39 |
40 |
41 | ## 🌱 안내사항
42 | - 아래와 같은 lightgbm 모델에서 학습시 발생하는 워닝은 무시해 주세요. (시험환경에서도 워닝은 무시해도 됨)
43 | ```text
44 | /usr/local/lib/python3.10/dist-packages/dask/dataframe/__init__.py:42: FutureWarning:
45 | Dask dataframe query planning is disabled because dask-expr is not installed.
46 | You can install it with `pip install dask[dataframe]` or `conda install dask`.
47 | This will raise in a future version.
48 | warnings.warn(msg, FutureWarning)
49 | ```
50 |
51 | ## 🌱 커뮤니티
52 | - 9회 스터디(오징어게임) 초대장 : https://bit.ly/3zFDXK0
53 | - 디스코드 입장 링크: https://discord.gg/V8acvTnHhH
54 | - 학습과 관련해 1:1 질의응답은 진행하지 않습니다. 미션을 수행하고, 멤버간 질의응답을 하는 공간입니다.
55 |
56 |
57 | ## 레포지토리 구조
58 | ```text
59 | .
60 | ├── README.md
61 | ├── part1 (작업형1)
62 | │ ├── ch1
63 | │ │ └── ch1_python.ipynb (코드)
64 | │ ├── ch2
65 | │ │ └── ch2_pandas.ipynb (코드)
66 | │ └── ch3
67 | │ ├── ch3_ex_type1.ipynb (코드)
68 | │ ├── delivery_time.csv
69 | │ ├── school_data.csv
70 | │ ├── school_data_science.csv
71 | │ ├── school_data_social.csv
72 | │ ├── type1_data1.csv
73 | │ └── type1_data2.csv
74 | ├── part2 (작업형2)
75 | │ ├── ch2
76 | │ │ ├── ch2_classification.ipynb (코드)
77 | │ │ ├── test.csv
78 | │ │ └── train.csv
79 | │ ├── ch3
80 | │ │ └── ch3_metrics.ipynb (코드)
81 | │ ├── ch4
82 | │ │ ├── ch4_regression.ipynb (코드)
83 | │ │ ├── test.csv
84 | │ │ └── train.csv
85 | │ ├── ch5
86 | │ │ ├── ch5_multi_class_classification.ipynb (코드)
87 | │ │ ├── test.csv
88 | │ │ └── train.csv
89 | │ ├── ch6
90 | │ │ ├── ch6_ex_classification.ipynb (코드)
91 | │ │ ├── creditcard_test.csv
92 | │ │ ├── creditcard_train.csv
93 | │ │ ├── diabetes_test.csv
94 | │ │ ├── diabetes_train.csv
95 | │ │ ├── hr_test.csv
96 | │ │ └── hr_train.csv
97 | │ ├── ch7
98 | │ │ ├── ch7_ex_multi_class_classification.ipynb (코드)
99 | │ │ ├── drug_test.csv
100 | │ │ ├── drug_train.csv
101 | │ │ ├── glass_test.csv
102 | │ │ ├── glass_train.csv
103 | │ │ ├── score_test.csv
104 | │ │ └── score_train.csv
105 | │ └── ch8
106 | │ ├── car_test.csv
107 | │ ├── car_train.csv
108 | │ ├── ch8_ex_regression.ipynb (코드)
109 | │ ├── flight_test.csv
110 | │ ├── flight_train.csv
111 | │ ├── laptop_test.csv
112 | │ └── laptop_train.csv
113 | ├── part3 (작업형3)
114 | │ ├── ch1
115 | │ │ └── ch1_hypothesis_testing.ipynb (코드)
116 | │ ├── ch2
117 | │ │ ├── ch2_anova.ipynb (코드)
118 | │ │ ├── fertilizer.csv
119 | │ │ └── tree.csv
120 | │ ├── ch3
121 | │ │ └── ch3_chi_square.ipynb (코드)
122 | │ ├── ch4
123 | │ │ ├── ch4_linear_regression.ipynb (코드)
124 | │ │ └── study.csv
125 | │ ├── ch5
126 | │ │ ├── ch5_logistic_regression.ipynb (코드)
127 | │ │ └── health_survey.csv
128 | │ └── ch6
129 | │ ├── ch6_ex_type3.ipynb (코드)
130 | │ ├── math.csv
131 | │ └── tomato2.csv
132 | └── part4 (기출유형)
133 | ├── ch2
134 | │ ├── X_test.csv
135 | │ ├── X_train.csv
136 | │ ├── members.csv
137 | │ ├── p2_type1.ipynb (작업형1 코드)
138 | │ ├── p2_type2.ipynb (작업형2 코드)
139 | │ └── y_train.csv
140 | ├── ch3
141 | │ ├── members.csv
142 | │ ├── p3_type1.ipynb (작업형1 코드)
143 | │ ├── p3_type2.ipynb (작업형2 코드)
144 | │ ├── test.csv
145 | │ ├── train.csv
146 | │ └── year.csv
147 | ├── ch4
148 | │ ├── data4-1.csv
149 | │ ├── data4-2.csv
150 | │ ├── data4-3.csv
151 | │ ├── p4_type1.ipynb (작업형1 코드)
152 | │ ├── p4_type2.ipynb (작업형2 코드)
153 | │ ├── test.csv
154 | │ └── train.csv
155 | ├── ch5
156 | │ ├── data5-1.csv
157 | │ ├── data5-2.csv
158 | │ ├── data5-3.csv
159 | │ ├── p5_type1.ipynb (작업형1 코드)
160 | │ ├── p5_type2.ipynb (작업형2 코드)
161 | │ ├── test.csv
162 | │ └── train.csv
163 | ├── ch6
164 | │ ├── data6-1-1.csv
165 | │ ├── data6-1-2.csv
166 | │ ├── data6-1-3.csv
167 | │ ├── data6-3-2.csv
168 | │ ├── energy_test.csv
169 | │ ├── energy_train.csv
170 | │ ├── p6_type1.ipynb (작업형1 코드)
171 | │ ├── p6_type2.ipynb (작업형2 코드)
172 | │ └── p6_type3.ipynb (작업형3 코드)
173 | ├── ch7
174 | │ ├── air_quality.csv
175 | │ ├── clam.csv
176 | │ ├── mart_test.csv
177 | │ ├── mart_train.csv
178 | │ ├── p7_type1.ipynb (작업형1 코드)
179 | │ ├── p7_type2.ipynb (작업형2 코드)
180 | │ ├── p7_type3.ipynb (작업형3 코드)
181 | │ ├── stock_market.csv
182 | │ ├── student_assessment.csv
183 | │ └── system_cpu.csv
184 | └── ch8
185 | ├── chem.csv
186 | ├── churn.csv
187 | ├── churn_test.csv
188 | ├── churn_train.csv
189 | ├── customer_travel.csv
190 | ├── drinks.csv
191 | ├── p8_type1.ipynb (작업형1 코드)
192 | ├── p8_type2.ipynb (작업형2 코드)
193 | ├── p8_type3.ipynb (작업형3 코드)
194 | ├── piq.csv
195 | └── tourist.csv
196 | ```
197 |
198 | 이 레포지토리에 실린 모든 내용의 저작권은 저자에게 있으며, 저자의 허락 없이 이 코드의 일부 또는 전부를 복제, 배포할 수 없습니다.
199 |
--------------------------------------------------------------------------------
/part1/ch3/school_data.csv:
--------------------------------------------------------------------------------
1 | 이름,수학,영어,국어,수학교사,영어교사,국어교사
2 | 강아지,66,61,26,김선생,장선생,최선생
3 | 고양이,92,48,80,김선생,장선생,이선생
4 | 토끼,98,7,6,김선생,장선생,최선생
5 | 사자,17,99,14,김선생,유선생,최선생
6 | 호랑이,83,92,75,박선생,장선생,이선생
7 | 곰,57,52,54,김선생,유선생,이선생
8 | 원숭이,86,97,71,김선생,장선생,이선생
9 | 기린,97,85,1,김선생,유선생,최선생
10 | 코끼리,96,94,43,김선생,유선생,이선생
11 | 판다,47,27,58,박선생,장선생,이선생
12 | 늑대,73,34,55,박선생,장선생,최선생
13 | 여우,32,97,25,박선생,유선생,이선생
14 | 펭귄,46,76,50,김선생,장선생,이선생
15 | 하이에나,96,40,84,김선생,장선생,최선생
16 | 코알라,25,3,56,박선생,장선생,이선생
17 | 강치,83,69,49,박선생,유선생,이선생
18 | 햄스터,78,64,12,김선생,장선생,최선생
19 | 뱀,36,75,18,김선생,장선생,이선생
20 | 독수리,96,34,81,박선생,장선생,이선생
21 | 침팬지,80,58,1,박선생,장선생,이선생
22 | 하마,68,10,51,박선생,유선생,이선생
23 | 두더지,49,22,44,김선생,유선생,최선생
24 | 물소,55,77,48,박선생,장선생,이선생
25 | 캥거루,67,18,56,김선생,장선생,최선생
26 | 참새,2,100,91,김선생,유선생,최선생
27 | 타조,84,15,49,박선생,유선생,이선생
28 | 개구리,39,27,86,박선생,장선생,이선생
29 | 펠리칸,66,30,3,김선생,유선생,이선생
30 | 돌고래,84,52,67,박선생,유선생,최선생
31 | 매,47,70,11,김선생,유선생,이선생
32 |
--------------------------------------------------------------------------------
/part1/ch3/school_data_science.csv:
--------------------------------------------------------------------------------
1 | 이름,과학,과학교사
2 | 강아지,66,황선생
3 | 고양이,92,임선생
4 | 토끼,98,황선생
5 | 사자,17,임선생
6 | 호랑이,83,임선생
7 | 곰,57,임선생
8 | 원숭이,86,황선생
9 | 기린,97,황선생
10 | 코끼리,96,황선생
11 | 판다,47,황선생
12 | 늑대,73,임선생
13 | 여우,32,임선생
14 | 펭귄,46,황선생
15 | 하이에나,96,황선생
16 | 코알라,25,임선생
17 | 강치,83,황선생
18 | 햄스터,78,임선생
19 | 뱀,36,황선생
20 | 독수리,96,황선생
21 | 침팬지,80,임선생
22 | 하마,68,황선생
23 | 두더지,49,임선생
24 | 물소,55,황선생
25 | 캥거루,67,임선생
26 | 참새,2,황선생
27 | 타조,84,황선생
28 | 개구리,39,황선생
29 | 펠리칸,66,황선생
30 | 돌고래,84,임선생
31 | 매,47,황선생
32 |
--------------------------------------------------------------------------------
/part1/ch3/school_data_social.csv:
--------------------------------------------------------------------------------
1 | 이름,사회,사회교사
2 | 기린,47,오선생
3 | 매,61,우선생
4 | 곰,48,우선생
5 | 개구리,7,오선생
6 | 코끼리,99,오선생
7 | 펠리칸,92,오선생
8 | 펭귄,52,오선생
9 | 두더지,97,우선생
10 | 여우,85,오선생
11 | 캥거루,94,우선생
12 | 독수리,27,우선생
13 | 호랑이,34,오선생
14 | 사자,97,우선생
15 | 참새,76,우선생
16 | 햄스터,40,우선생
17 | 코알라,3,오선생
18 | 하마,69,우선생
19 | 판다,64,우선생
20 | 강치,75,우선생
21 | 타조,34,오선생
22 | 강아지,58,오선생
23 | 고양이,10,오선생
24 | 물소,22,우선생
25 | 늑대,77,오선생
26 | 침팬지,18,오선생
27 | 뱀,100,우선생
28 | 원숭이,15,우선생
29 | 돌고래,27,우선생
30 | 토끼,30,오선생
31 | 하이에나,52,우선생
32 |
--------------------------------------------------------------------------------
/part1/ch3/type1_data1.csv:
--------------------------------------------------------------------------------
1 | id,age,city,f1,f2,f3,f4,f5,subscribed,views
2 | id01,2,서울,,0,gold,ENFJ,91.29779092,2024-07-16,6820
3 | id02,9,서울,70,1,,ENFJ,60.33982554,2024-05-12,2534
4 | id03,27,서울,61,1,gold,ISTJ,17.25298557,2024-03-16,7312
5 | id04,75,서울,,2,,INFP,52.66707799,2024-07-21,493
6 | id05,24,서울,85,2,,ISFJ,29.26986926,2024-03-07,1338
7 | id06,22,서울,57,0,vip,INTP,20.1294441,2024-09-12,21550
8 | id07,36.3,서울,60,1,,ISFJ,9.796377581,2024-01-11,61
9 | id08,38,서울,101,1,silver,INFJ,83.68538032,2024-03-06,3260
10 | id09,3.3,서울,35,2,,ESFJ,17.25298557,2024-03-21,2764
11 | id10,95,서울,74,1,gold,ISFP,98.42989897,2024-04-03,9992
12 | id100,47,경기,53,0,vip,ESFP,33.30899901,2024-02-21,15535
13 | id11,40,서울,68,0,gold,ENFP,98.42989897,2024-10-29,6752
14 | id12,20,서울,,0,,ESTP,91.29779092,2024-11-30,1367
15 | id13,15,서울,68,0,gold,ESFJ,83.68538032,2024-12-30,5643
16 | id14,77,서울,50,1,gold,ENTJ,67.8863732,2024-09-19,5700
17 | id15,22,서울,67,1,gold,ENTP,9.796377581,2024-05-26,7676
18 | id16,68,서울,85,0,gold,ESFP,16.2838541,2024-07-25,9472
19 | id17,74,서울,,1,gold,ISTP,67.8863732,2024-10-26,9441
20 | id18,41,서울,87,2,gold,ISFJ,80.13828012,2024-03-03,7933
21 | id19,53,서울,,0,gold,ISFP,83.68538032,2024-12-24,5287
22 | id20,11,서울,51,1,,INTJ,91.29779092,2024-07-16,
23 | id21,90,부산,,1,gold,ISFP,29.26986926,2024-05-03,9690
24 | id22,-6.3,부산,72,1,gold,ENFP,52.66707799,2024-02-09,6147
25 | id23,34,부산,75,1,gold,ISTP,69.73031281,2024-05-21,6236
26 | id24,80,부산,44,0,gold,INFJ,73.58639712,2024-09-11,5976
27 | id25,34,부산,,0,gold,ESTP,60.33982554,2024-07-12,8954
28 | id26,55,부산,57,1,gold,ENFP,83.68538032,2024-05-01,5857
29 | id27,37,부산,60,0,silver,ESTP,73.58639712,2024-10-13,4255
30 | id28,38,부산,34,1,gold,ENTP,80.13828012,2024-10-31,5068
31 | id29,-13.5,부산,47,2,gold,ENTP,67.8863732,2024-08-28,6793
32 | id30,16,부산,,0,,ESTJ,17.25298557,2024-05-28,240
33 | id31,86,부산,77,0,gold,ESFJ,73.58639712,2024-02-11,8014
34 | id32,25,부산,64,0,vip,ISFJ,13.04992129,2024-05-24,17421
35 | id33,47,부산,94,0,silver,ENFJ,17.25298557,2024-04-02,3880
36 | id34,65,부산,,1,silver,INFP,48.43118381,2024-02-01,3163
37 | id35,30,부산,,2,silver,ESTJ,33.30899901,2024-06-10,3084
38 | id36,68,부산,77,1,gold,INTP,13.04992129,2024-07-20,9713
39 | id37,100,부산,,0,silver,ESTP,33.30899901,2024-07-08,4068
40 | id38,87,부산,,1,,ESTP,83.68538032,2024-06-21,1048
41 | id39,56,부산,50,0,,INFJ,33.30899901,2024-12-22,
42 | id40,56,대구,75,0,gold,ENFP,17.25298557,2024-01-22,8481
43 | id41,81,대구,55,0,gold,ENFJ,37.11373918,2024-10-04,8640
44 | id42,65,대구,48,2,gold,ESTP,33.30899901,2024-02-09,5999
45 | id43,23,대구,60,0,silver,ISTP,29.26986926,2024-05-18,3878
46 | id44,44,대구,,0,,INTP,16.2838541,2024-11-10,546
47 | id45,97,대구,88,0,gold,ENFJ,13.04992129,2024-06-21,8317
48 | id46,93,대구,,0,gold,ESTJ,67.8863732,2024-05-23,9711
49 | id47,34.6,대구,75,1,gold,ESTJ,90.49699927,2024-05-28,8628
50 | id48,18,대구,,0,,ENFP,20.1294441,2024-03-25,
51 | id49,75,대구,88,0,gold,INTP,37.11373918,2024-03-31,9737
52 | id50,86,대구,78,1,,ENFP,60.33982554,2024-12-05,1935
53 | id51,36,대구,,0,gold,ISTJ,83.68538032,2024-08-20,7217
54 | id52,97,대구,82,1,gold,ISFJ,90.49699927,2024-05-20,8518
55 | id53,52,대구,50,0,gold,ESTP,20.1294441,2024-09-09,7012
56 | id54,53,대구,,1,gold,ENFJ,69.73031281,2024-06-21,5872
57 | id55,75,대구,63,2,gold,ENTP,13.04992129,2024-02-06,6042
58 | id56,59,대구,,1,vip,ESTJ,73.58639712,2024-04-05,19589
59 | id57,3,대구,111,0,silver,ISFJ,29.26986926,2024-01-12,4421
60 | id58,0,대구,100,2,,ESTP,33.30899901,2024-04-18,1928
61 | id59,64,대구,,1,silver,ESFJ,20.1294441,2024-06-23,4994
62 | id60,56,경기,,0,gold,ESFP,52.66707799,2024-11-24,6794
63 | id61,87,경기,62,2,,INTP,69.73031281,2024-02-03,218
64 | id62,52,경기,,0,,INTP,60.33982554,2024-04-10,2100
65 | id63,88,경기,86,1,silver,ISFJ,73.58639712,2024-12-01,4053
66 | id64,43,경기,62,2,gold,ESFP,73.58639712,2024-02-22,5995
67 | id65,26.5,경기,,0,silver,ISFP,91.29779092,2024-01-10,3336
68 | id66,87,경기,,1,gold,ISFP,17.25298557,2024-08-05,8471
69 | id67,66,경기,52,1,,ISFJ,73.58639712,2024-06-17,1159
70 | id68,35,경기,45,2,gold,ISFP,67.8863732,2024-07-29,8599
71 | id69,75,경기,85,0,,ESTJ,69.73031281,2024-11-14,2708
72 | id70,-9,경기,96,1,silver,ISTP,48.43118381,2024-11-17,4442
73 | id71,35,경기,84,2,gold,ISFP,52.66707799,2024-07-15,8087
74 | id72,8,경기,97,0,,ESTJ,97.38103419,2024-01-30,602
75 | id73,90,경기,,1,,ISFJ,73.58639712,2024-08-12,512
76 | id74,45,경기,98,0,gold,ESTP,52.66707799,2024-05-27,7739
77 | id75,63,경기,47,0,gold,ESTP,20.1294441,2024-06-12,6779
78 | id76,71,경기,12,0,,ENTJ,83.68538032,2024-07-28,2872
79 | id77,77,경기,31,0,,INFP,98.42989897,2024-01-16,1518
80 | id78,92,경기,96,1,gold,INTJ,69.73031281,2024-10-27,7565
81 | id79,30,경기,,0,gold,INTJ,80.13828012,2024-08-14,8777
82 | id80,67,경기,60,0,silver,ISFP,83.68538032,2024-01-14,4381
83 | id81,86,경기,50,1,,ISFJ,37.11373918,2024-09-14,244
84 | id82,48,경기,,0,,ENTJ,37.11373918,2024-10-17,
85 | id83,73,경기,50,1,vip,ENTP,80.13828012,2024-09-26,19139
86 | id84,66,경기,44,0,gold,INTP,83.68538032,2024-12-19,5650
87 | id85,83.6,경기,55,0,gold,INFJ,80.13828012,2024-09-24,6719
88 | id86,2,경기,,0,,ESTP,29.26986926,2024-02-16,2155
89 | id87,19,경기,,1,gold,ISFP,97.38103419,2024-08-30,6516
90 | id88,89,경기,75,0,,ESTJ,60.33982554,2024-01-06,2713
91 | id89,34,경기,66,1,gold,ENTJ,33.30899901,2024-10-14,6119
92 | id90,54,경기,,0,silver,ENTP,29.26986926,2024-04-03,3818
93 | id91,6,경기,72,0,gold,INTP,9.796377581,2024-08-23,8988
94 | id92,97,경기,78,1,gold,INFP,97.38103419,2024-05-08,9625
95 | id93,21.8,경기,57,0,,ISFP,73.58639712,2024-06-07,42
96 | id94,84,경기,,1,silver,ESTJ,90.49699927,2024-08-16,3774
97 | id95,77,경기,43,1,gold,INTJ,91.29779092,2024-05-21,8697
98 | id96,92,경기,53,1,silver,ENTJ,52.66707799,2024-05-06,4336
99 | id97,100,경기,,0,gold,INFP,67.8863732,2024-03-18,6687
100 | id98,39,경기,58,2,,INFP,98.42989897,2024-10-02,865
101 | id99,1,경기,47,0,gold,ESFJ,97.38103419,2024-12-02,6090
102 | id100,47,경기,53,0,vip,ESFP,33.30899901,2024-02-21,15535
103 | id68,35,경기,45,2,gold,ISFP,67.8863732,2024-07-29,8599
104 | id101,36,경기,65,0,silver,ISFP,0,2025-01-29,5735
105 | id102,43,대구,34,0,,ISFP,0,2025-01-30,8765
106 | id103,21,부산,53,0,vip,ISFP,0,2025-02-17,13543
107 | id104,21,서울,13,0,silver,ESFJ,0,2025-02-17,4556
108 | id105,21,경기,24,0,gold,ESFJ,0,2025-02-17,7654
109 | id106,21,대구,65,0,vip,INFP,0,2025-02-17,4212
110 | id107,21,부산,76,1,silver,ESTJ,0,2025-02-17,2346
111 | id108,21,서울,54,1,gold,ESTJ,0,2025-02-17,2178
112 | id109,21,경기,78,1,vip,ESTJ,0,2025-02-17,6790
113 | id110,22,대구,45,2,silver,ESTJ,0,2025-02-17,2112
114 | id111,22,부산,65,2,gold,ENTP,0,2025-02-17,7894
115 | id112,22,서울,45,2,vip,ENTP,0,2025-02-17,9877
116 | id113,22,경기,34,2,silver,ENTP,0,2025-02-17,10346
117 | id114,22,대구,23,0,gold,INTP,0,2025-02-17,9747
118 | id115,23,부산,65,0,vip,ENTP,0,2025-02-17,5628
119 | id116,23,서울,12,1,silver,INFP,0,2025-02-17,1267
120 | id117,23,대구,65,2,gold,INFP,0,2025-02-17,6543
121 | id118,24,부산,94,1,vip,ESFJ,0,2025-02-17,2356
--------------------------------------------------------------------------------
/part1/ch3/type1_data2.csv:
--------------------------------------------------------------------------------
1 | year,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199
2 | 2000,137,74,114,140,80,150,16,133,178,181,106,176,163,81,4,195,111,80,166,161,117,141,134,146,10,191,68,11,200,95,74,104,18,107,68,114,43,4,14,59,173,124,108,18,175,30,174,28,144,100,162,116,71,9,138,162,83,112,66,1,111,35,8,180,191,7,108,174,180,189,83,139,109,73,24,197,28,148,47,194,26,106,103,196,115,200,145,33,182,70,158,112,196,41,27,50,34,19,120,33,103,26,43,140,184,159,16,111,34,191,192,168,93,53,95,141,184,85,68,81,164,129,27,119,128,172,103,105,100,162,55,139,193,114,128,57,165,38,39,175,184,119,98,73,23,62,67,180,163,145,68,48,75,187,17,199,74,48,35,54,117,173,76,51,1,40,81,5,165,175,176,72,103,7,175,178,172,152,104,195,15,153,44,111,176,55,39,40,80,85,124,94,118,12,50,191,137,174,56,128
3 | 2001,176,87,64,110,128,16,8,4,123,87,190,146,53,52,21,55,75,131,76,181,72,82,121,182,97,162,86,179,68,36,77,146,155,13,133,134,28,14,108,4,194,197,153,96,16,53,172,125,57,50,184,122,3,3,168,32,99,189,197,27,7,188,120,181,23,172,56,45,68,191,109,14,66,101,58,1,33,72,74,55,74,87,62,112,14,47,68,15,172,173,196,190,79,117,137,141,171,105,186,128,159,194,29,8,147,24,199,120,94,94,171,158,115,141,92,13,86,54,182,117,193,186,171,198,136,38,77,35,93,80,88,74,199,9,85,78,176,25,137,54,94,181,35,156,82,175,74,15,16,52,122,162,106,36,17,190,135,128,169,78,160,137,147,188,130,146,97,7,37,25,133,134,162,155,19,112,170,154,170,103,197,8,55,131,48,117,112,68,57,117,59,22,3,108,17,104,101,161,156,43
4 | 2002,128,132,123,112,134,160,180,47,115,155,50,152,13,137,89,193,156,122,137,88,173,173,57,59,114,99,77,27,41,4,191,172,11,165,45,39,38,181,178,23,151,76,188,19,110,28,39,89,88,100,174,12,116,142,70,141,91,112,72,147,195,142,89,34,179,3,101,171,144,33,122,196,142,145,88,159,167,44,84,173,52,31,123,187,99,187,5,95,193,101,186,30,176,134,14,164,9,122,29,126,38,137,110,130,153,167,14,22,185,54,162,124,100,155,98,3,18,193,199,168,126,121,152,174,131,61,148,16,171,64,169,95,109,45,139,86,179,17,186,151,88,100,147,74,31,200,91,16,32,142,137,162,74,31,20,131,77,106,96,187,102,166,85,179,178,37,39,158,186,17,36,122,6,125,190,156,90,116,68,146,39,136,123,54,186,103,154,46,22,87,160,111,116,75,48,51,81,101,194,73
5 | 2003,78,45,26,50,177,119,47,72,163,125,5,22,162,177,8,85,199,191,13,162,113,176,9,65,151,51,154,67,146,28,141,150,69,33,159,16,79,190,180,20,29,65,187,41,25,133,70,161,62,194,113,109,175,113,20,7,132,141,126,41,105,43,74,190,12,113,144,126,3,27,25,49,39,102,74,133,124,136,200,154,186,45,36,87,147,30,60,28,132,103,15,85,131,24,100,128,84,173,44,48,189,9,102,62,108,65,122,192,86,15,184,200,70,198,13,88,126,189,123,113,60,17,36,96,197,101,65,57,31,70,154,169,174,75,113,55,85,48,107,13,16,115,96,127,111,120,78,24,144,120,146,84,54,187,134,67,1,177,95,133,107,126,156,60,20,51,70,190,119,144,188,32,135,38,50,149,79,87,109,116,72,176,79,47,26,34,147,186,151,15,163,88,52,79,192,83,5,75,196,119
6 | 2004,1,162,124,12,1,126,58,123,186,161,69,39,56,122,112,134,15,17,123,29,101,124,161,140,80,116,189,67,147,72,159,18,147,36,116,47,122,126,32,180,63,149,167,151,4,108,193,71,185,16,186,156,92,12,141,119,65,179,192,100,198,174,183,92,130,15,60,45,122,132,119,88,127,124,139,131,17,47,5,80,16,37,133,198,182,100,191,188,76,33,111,185,96,24,17,62,60,64,33,151,26,38,43,60,109,109,165,53,111,135,6,83,87,125,132,49,156,93,58,128,176,11,81,1,200,90,183,157,180,37,19,193,1,97,88,200,115,115,181,54,17,139,57,182,80,81,141,22,119,164,114,196,20,129,117,191,117,150,148,12,73,150,65,52,132,62,21,3,19,192,154,164,129,37,31,158,163,177,128,23,34,157,94,86,75,151,43,122,17,157,100,93,86,12,115,72,29,30,88,150
7 |
--------------------------------------------------------------------------------
/part2/ch3/ch3_metrics.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyMCK1VEa3NmQxvmuxyJe94e"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part2/ch3/ch3_metrics.ipynb)"],"metadata":{"id":"zpGaMMdZUki7"}},{"cell_type":"markdown","source":["## 이진분류 평가지표"],"metadata":{"id":"xkQxClqpjqya"}},{"cell_type":"code","source":["# 이진분류 데이터\n","import pandas as pd\n","y_true = pd.DataFrame([1, 1, 1, 0, 0, 1, 1, 1, 1, 0]) #실제값\n","y_pred = pd.DataFrame([1, 0, 1, 1, 0, 0, 0, 1, 1, 0]) #예측값\n","\n","y_true_str = pd.DataFrame(['A', 'A', 'A', 'B', 'B', 'A', 'A', 'A', 'A', 'B']) #실제값\n","y_pred_str = pd.DataFrame(['A', 'B', 'A', 'A', 'B', 'B', 'B', 'A', 'A', 'B']) #예측값"],"metadata":{"id":"qVxYWt82irzy"},"execution_count":null,"outputs":[]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"pvCf5mUdihRO","executionInfo":{"status":"ok","timestamp":1711787831407,"user_tz":-540,"elapsed":496,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"840ef927-4cdc-4ef0-c5e6-11b35fef5703"},"outputs":[{"output_type":"stream","name":"stdout","text":["정확도: 0.6\n","정확도: 0.6\n"]}],"source":["# 정확도(Accuracy)\n","from sklearn.metrics import accuracy_score\n","accuracy = accuracy_score(y_true, y_pred)\n","print(\"정확도:\", accuracy)\n","\n","accuracy = accuracy_score(y_true_str, y_pred_str)\n","print(\"정확도:\", accuracy)"]},{"cell_type":"code","source":["# 정밀도(Precision)\n","from sklearn.metrics import precision_score\n","precision = precision_score(y_true, y_pred)\n","print(\"정밀도:\", precision)\n","\n","precision = precision_score(y_true_str, y_pred_str, pos_label='A')\n","print(\"정밀도:\", precision)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"kYd9ae01jPb4","executionInfo":{"status":"ok","timestamp":1711787831407,"user_tz":-540,"elapsed":4,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"c4d8207b-c032-419a-b290-4f8a2a07c583"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["정밀도: 0.8\n","정밀도: 0.8\n"]}]},{"cell_type":"code","source":["# 재현율(Recall)\n","from sklearn.metrics import recall_score\n","recall = recall_score(y_true, y_pred)\n","print(\"재현율:\", recall)\n","\n","recall = recall_score(y_true_str, y_pred_str, pos_label='A')\n","print(\"재현율:\", recall)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"_m3LRdF7jPgD","executionInfo":{"status":"ok","timestamp":1711787831709,"user_tz":-540,"elapsed":305,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"d36cafd1-7e82-411d-d9df-e120360da41b"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["재현율: 0.5714285714285714\n","재현율: 0.5714285714285714\n"]}]},{"cell_type":"code","source":["# F1 스코어(F1 Score)\n","from sklearn.metrics import f1_score\n","f1 = f1_score(y_true, y_pred)\n","print(\"F1 스코어:\", f1)\n","\n","f1 = f1_score(y_true_str, y_pred_str, pos_label='A')\n","print(\"F1 스코어:\", f1)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nhbdwRnWjPil","executionInfo":{"status":"ok","timestamp":1711787832084,"user_tz":-540,"elapsed":377,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"366c1a7f-28cb-4788-e731-d077c73faaa4"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["F1 스코어: 0.6666666666666666\n","F1 스코어: 0.6666666666666666\n"]}]},{"cell_type":"code","source":["# ROC-AUC\n","from sklearn.metrics import roc_auc_score\n","# 실제값 (0: 음성, 1: 양성)\n","y_true = pd.DataFrame([0, 1, 0, 1, 1, 0, 0, 0, 1, 1])\n","# 예측값 중 양성(1) 확률\n","y_pred_proba = pd.DataFrame([0.4, 0.9, 0.1, 0.3, 0.8, 0.6, 0.4, 0.2, 0.7, 0.6])\n","\n","roc_auc = roc_auc_score(y_true, y_pred_proba)\n","print(\"ROC-AUC:\", roc_auc)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"A8ymIAM3jPk-","executionInfo":{"status":"ok","timestamp":1711787832084,"user_tz":-540,"elapsed":5,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"3f23d2cf-832c-4094-c51e-7ee94abc917c"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["ROC-AUC: 0.86\n"]}]},{"cell_type":"code","source":["# 실제값\n","y_true_str = pd.DataFrame(['A', 'B', 'A', 'B', 'B', 'A', 'A', 'A', 'B', 'B'])\n","\n","# 예측값 중 B 확률\n","y_pred_proba_str = pd.DataFrame([0.4, 0.9, 0.1, 0.3, 0.8, 0.6, 0.4, 0.2, 0.7, 0.6])\n","roc_auc = roc_auc_score(y_true_str, y_pred_proba_str)\n","\n","print(\"ROC-AUC:\", roc_auc)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"kDHt8BmA82ml","executionInfo":{"status":"ok","timestamp":1711787905418,"user_tz":-540,"elapsed":274,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"ac1ed1b7-2eba-4738-8b79-ae73274cbe97"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["ROC-AUC: 0.86\n"]}]},{"cell_type":"code","source":["# 참고 (실제값을 0과 1로 변경)\n","from sklearn.metrics import roc_auc_score\n","\n","y_true_str = pd.DataFrame(['A', 'B', 'A', 'B', 'B', 'A', 'A', 'A', 'B', 'B'])\n","y_pred_proba_str = pd.DataFrame([0.4, 0.9, 0.1, 0.3, 0.8, 0.6, 0.4, 0.2, 0.7, 0.6])\n","\n","# 'A'를 0, 'B'를 1으로 변환\n","y_true_binary = (y_true_str == 'B').astype(int)\n","\n","roc_auc = roc_auc_score(y_true_binary, y_pred_proba)\n","print(\"ROC-AUC:\", roc_auc)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"EGq3WO9rgRGB","executionInfo":{"status":"ok","timestamp":1711787947340,"user_tz":-540,"elapsed":4,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"4b5bcb0e-bc33-4f98-8c79-89c88f300441"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["ROC-AUC: 0.86\n"]}]},{"cell_type":"markdown","source":["## 다중분류 평가지표"],"metadata":{"id":"CCrgqDtQlWLe"}},{"cell_type":"code","source":["# 다중분류 데이터\n","y_true = pd.DataFrame([1, 2, 3, 3, 2, 1, 3, 3, 2, 1]) # 실제값\n","y_pred = pd.DataFrame([1, 2, 1, 3, 2, 1, 1, 2, 2, 1]) # 예측값\n","\n","y_true_str = pd.DataFrame(['A', 'B', 'C', 'C', 'B', 'A', 'C', 'C', 'B', 'A']) # 실제값\n","y_pred_str = pd.DataFrame(['A', 'B', 'A', 'C', 'B', 'A', 'A', 'B', 'B', 'A']) # 예측값"],"metadata":{"id":"CQhA_jUQjHxW"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# 정확도(Accuracy)\n","from sklearn.metrics import accuracy_score\n","accuracy = accuracy_score(y_true, y_pred)\n","print(\"정확도:\", accuracy)\n","\n","accuracy = accuracy_score(y_true_str, y_pred_str)\n","print(\"정확도:\", accuracy)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"lQGHo-DdlYIp","executionInfo":{"status":"ok","timestamp":1711787832397,"user_tz":-540,"elapsed":316,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"080ebd81-b958-4584-e85e-73a7c4e30c72"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["정확도: 0.7\n","정확도: 0.7\n"]}]},{"cell_type":"code","source":["# 정밀도(Precision)\n","from sklearn.metrics import precision_score\n","precision = precision_score(y_true, y_pred, average='macro') # average= micro, macro, weighted\n","print(\"정밀도:\", precision)\n","\n","precision = precision_score(y_true_str, y_pred_str, average='macro')\n","print(\"정밀도:\", precision)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"4mSLzqU3lzTr","executionInfo":{"status":"ok","timestamp":1711787832709,"user_tz":-540,"elapsed":314,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"c816bb9e-d408-457e-bdeb-fc450ac65730"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["정밀도: 0.7833333333333333\n","정밀도: 0.7833333333333333\n"]}]},{"cell_type":"code","source":["# 재현율(Recall)\n","from sklearn.metrics import recall_score\n","recall = recall_score(y_true, y_pred, average='macro') # average= micro, macro, weighted\n","print(\"재현율:\", recall)\n","\n","recall = recall_score(y_true_str, y_pred_str, average='macro')\n","print(\"재현율:\", recall)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"E_n4u8wUl3nS","executionInfo":{"status":"ok","timestamp":1711787832709,"user_tz":-540,"elapsed":4,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"ea47f782-ef01-4c05-d05e-bf2018516108"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["재현율: 0.75\n","재현율: 0.75\n"]}]},{"cell_type":"code","source":["# F1 스코어(F1 Score)\n","from sklearn.metrics import f1_score\n","f1 = f1_score(y_true, y_pred, average='macro') # average= micro, macro, weighted\n","print(\"F1 스코어:\", f1)\n","\n","f1 = f1_score(y_true_str, y_pred_str, average='macro')\n","print(\"F1 스코어:\", f1)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"fPNIshTOmzhA","executionInfo":{"status":"ok","timestamp":1711787833028,"user_tz":-540,"elapsed":322,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"6eb5b091-836d-430c-fb6b-9464563d3084"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["F1 스코어: 0.669047619047619\n","F1 스코어: 0.669047619047619\n"]}]},{"cell_type":"markdown","source":["## 회귀 평가지표"],"metadata":{"id":"CPpvxE1gox0C"}},{"cell_type":"code","source":["# 회귀 데이터\n","import pandas as pd\n","y_true = pd.DataFrame([1, 2, 5, 2, 4, 4, 7, 9]) # 실제값\n","y_pred = pd.DataFrame([1.14, 2.53, 4.87, 3.08, 4.21, 5.53, 7.51, 10.32]) # 예측값"],"metadata":{"id":"gxaFPg9urVDH"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# MSE(Mean Squared Error)\n","from sklearn.metrics import mean_squared_error\n","mse = mean_squared_error(y_true, y_pred)\n","print(\"MSE:\", mse)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"JkPY1sH_ofQc","executionInfo":{"status":"ok","timestamp":1711787833028,"user_tz":-540,"elapsed":8,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"c71cb29e-e6d4-4240-bf3e-abd9cb7aa6d1"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["MSE: 0.7339125000000001\n"]}]},{"cell_type":"code","source":["# MAE(Mean Absolute Error)\n","from sklearn.metrics import mean_absolute_error\n","mae = mean_absolute_error(y_true, y_pred)\n","print(\"MAE:\", mae)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"foCZLZSso-Sg","executionInfo":{"status":"ok","timestamp":1711787833028,"user_tz":-540,"elapsed":6,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"f7492a39-e940-4b34-a869-a40ecd1a78db"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["MAE: 0.68125\n"]}]},{"cell_type":"code","source":["# 결정 계수(R-squared)\n","from sklearn.metrics import r2_score\n","r2 = r2_score(y_true, y_pred)\n","print(\"결정 계수:\", r2)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"pijoRvYdo-aD","executionInfo":{"status":"ok","timestamp":1711787833372,"user_tz":-540,"elapsed":349,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"f9172311-68a4-49ee-e5af-8ca8f04b7f88"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["결정 계수: 0.8859941747572815\n"]}]},{"cell_type":"code","source":["# RMSE(Root Mean Squared Error)\n","from sklearn.metrics import mean_squared_error\n","mse = mean_squared_error(y_true, y_pred)\n","rmse = mse ** 0.5\n","print(\"RMSE:\", rmse)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0tsm8n_5o-hT","executionInfo":{"status":"ok","timestamp":1711787833372,"user_tz":-540,"elapsed":10,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"dcc7eec9-af0c-4196-9d5b-2ed4634046af"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["RMSE: 0.8566869323154171\n"]}]},{"cell_type":"code","source":["# MSLE(Mean Squared Log Error)\n","from sklearn.metrics import mean_squared_log_error\n","msle = mean_squared_log_error(y_true, y_pred)\n","print(\"MSLE:\", msle)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"RHG3TIHqq1M0","executionInfo":{"status":"ok","timestamp":1711787833372,"user_tz":-540,"elapsed":9,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"e76deb4f-0b93-448f-8f8a-9f29330dab89"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["MSLE: 0.027278486182156975\n"]}]},{"cell_type":"code","source":["# RMSLE(Root Mean Squared Log Error)\n","from sklearn.metrics import mean_squared_log_error\n","rmsle = mean_squared_log_error(y_true, y_pred) ** 0.5\n","print(\"RMSLE:\", rmsle)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"wsAnyeb1qGyt","executionInfo":{"status":"ok","timestamp":1711787833372,"user_tz":-540,"elapsed":8,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"a5bab992-0e65-4a2f-a405-f522f4042a09"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["RMSLE: 0.1651619998127807\n"]}]},{"cell_type":"code","source":["# MAPE(Mean Absolute Percentage Error)\n","mape = (abs((y_true - y_pred) / y_true)).mean() * 100\n","print(\"MAPE:\", mape)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cZVmLXZJ_pxE","executionInfo":{"status":"ok","timestamp":1711787833705,"user_tz":-540,"elapsed":9,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"20a3d800-3cb7-4410-eda2-3d0c6fefb827"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["MAPE: 0 20.319048\n","dtype: float64\n"]}]},{"cell_type":"code","source":["epsilon = 1e-10\n","mape = (abs((y_true - y_pred) / (y_true + epsilon))).mean() * 100\n","print(\"MAPE:\", mape)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"i9gT--uOCW-b","executionInfo":{"status":"ok","timestamp":1711787833705,"user_tz":-540,"elapsed":7,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"1dfc4141-2fe8-4f2a-f528-3a937db8ad0a"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["MAPE: 0 20.319048\n","dtype: float64\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"RGg7hqRkCZJb"},"execution_count":null,"outputs":[]}]}
--------------------------------------------------------------------------------
/part2/ch6/diabetes_test.csv:
--------------------------------------------------------------------------------
1 | Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
2 | 3,102,74,0,0,29.5,0.121,32
3 | 5,104,74,0,0,28.8,0.153,48
4 | 4,95,70,32,0,32.1,0.612,24
5 | 1,88,62,24,44,29.9,0.422,23
6 | 2,68,70,32,66,25.0,0.187,25
7 | 4,173,70,14,168,29.7,0.361,33
8 | 6,111,64,39,0,34.2,0.26,24
9 | 4,136,70,0,0,31.2,1.182,22
10 | 9,112,82,32,175,34.2,0.26,36
11 | 2,93,64,32,160,38.0,0.674,23
12 | 3,111,56,39,0,30.1,0.557,30
13 | 6,117,96,0,0,28.7,0.157,30
14 | 1,112,72,30,176,34.4,0.528,25
15 | 0,167,0,0,0,32.3,0.839,30
16 | 7,129,68,49,125,38.5,0.439,43
17 | 10,101,76,48,180,32.9,0.171,63
18 | 5,158,70,0,0,29.8,0.207,63
19 | 6,99,60,19,54,26.9,0.497,32
20 | 0,177,60,29,478,34.6,1.072,21
21 | 5,139,80,35,160,31.6,0.361,25
22 | 7,106,92,18,0,22.7,0.235,48
23 | 13,158,114,0,0,42.3,0.257,44
24 | 1,95,66,13,38,19.6,0.334,25
25 | 4,197,70,39,744,36.7,2.329,31
26 | 3,130,64,0,0,23.1,0.314,22
27 | 0,93,100,39,72,43.4,1.021,35
28 | 1,136,74,50,204,37.4,0.399,24
29 | 5,117,86,30,105,39.1,0.251,42
30 | 9,170,74,31,0,44.0,0.403,43
31 | 10,162,84,0,0,27.7,0.182,54
32 | 4,116,72,12,87,22.1,0.463,37
33 | 3,173,84,33,474,35.7,0.258,22
34 | 0,146,70,0,0,37.9,0.334,28
35 | 0,95,80,45,92,36.5,0.33,26
36 | 6,147,80,0,0,29.5,0.178,50
37 | 0,95,64,39,105,44.6,0.366,22
38 | 2,114,68,22,0,28.7,0.092,25
39 | 3,61,82,28,0,34.4,0.243,46
40 | 1,99,58,10,0,25.4,0.551,21
41 | 0,120,74,18,63,30.5,0.285,26
42 | 4,146,78,0,0,38.5,0.52,67
43 | 9,156,86,28,155,34.3,1.189,42
44 | 7,147,76,0,0,39.4,0.257,43
45 | 6,80,66,30,0,26.2,0.313,41
46 | 6,195,70,0,0,30.9,0.328,31
47 | 1,117,60,23,106,33.8,0.466,27
48 | 7,62,78,0,0,32.6,0.391,41
49 | 0,165,76,43,255,47.9,0.259,26
50 | 1,97,70,40,0,38.1,0.218,30
51 | 5,166,76,0,0,45.7,0.34,27
52 | 4,99,72,17,0,25.6,0.294,28
53 | 11,135,0,0,0,52.3,0.578,40
54 | 4,123,62,0,0,32.0,0.226,35
55 | 12,100,84,33,105,30.0,0.488,46
56 | 3,180,64,25,70,34.0,0.271,26
57 | 0,126,84,29,215,30.7,0.52,24
58 | 7,133,84,0,0,40.2,0.696,37
59 | 2,90,70,17,0,27.3,0.085,22
60 | 11,120,80,37,150,42.3,0.785,48
61 | 0,137,40,35,168,43.1,2.288,33
62 | 1,135,54,0,0,26.7,0.687,62
63 | 8,126,74,38,75,25.9,0.162,39
64 | 3,99,80,11,64,19.3,0.284,30
65 | 5,116,74,29,0,32.3,0.66,35
66 | 1,97,68,21,0,27.2,1.095,22
67 | 10,129,62,36,0,41.2,0.441,38
68 | 4,132,0,0,0,32.9,0.302,23
69 | 7,168,88,42,321,38.2,0.787,40
70 | 3,111,62,0,0,22.6,0.142,21
71 | 4,151,90,38,0,29.7,0.294,36
72 | 2,100,66,20,90,32.9,0.867,28
73 | 1,124,60,32,0,35.8,0.514,21
74 | 10,111,70,27,0,27.5,0.141,40
75 | 6,119,50,22,176,27.1,1.318,33
76 | 5,132,80,0,0,26.8,0.186,69
77 | 2,56,56,28,45,24.2,0.332,22
78 | 1,114,66,36,200,38.1,0.289,21
79 | 4,146,85,27,100,28.9,0.189,27
80 | 10,101,86,37,0,45.6,1.136,38
81 | 5,155,84,44,545,38.7,0.619,34
82 | 1,80,74,11,60,30.0,0.527,22
83 | 1,90,62,18,59,25.1,1.268,25
84 | 8,133,72,0,0,32.9,0.27,39
85 | 1,106,70,28,135,34.2,0.142,22
86 | 0,100,70,26,50,30.8,0.597,21
87 | 0,119,64,18,92,34.9,0.725,23
88 | 11,138,74,26,144,36.1,0.557,50
89 | 6,114,0,0,0,0.0,0.189,26
90 | 2,118,80,0,0,42.9,0.693,21
91 | 6,92,92,0,0,19.9,0.188,28
92 | 9,120,72,22,56,20.8,0.733,48
93 | 4,95,64,0,0,32.0,0.161,31
94 | 4,97,60,23,0,28.2,0.443,22
95 | 6,134,80,37,370,46.2,0.238,46
96 | 0,125,96,0,0,22.5,0.262,21
97 | 1,95,74,21,73,25.9,0.673,36
98 | 4,96,56,17,49,20.8,0.34,26
99 | 9,145,88,34,165,30.3,0.771,53
100 | 2,88,74,19,53,29.0,0.229,22
101 | 0,119,0,0,0,32.4,0.141,24
102 | 5,108,72,43,75,36.1,0.263,33
103 | 9,130,70,0,0,34.2,0.652,45
104 | 3,126,88,41,235,39.3,0.704,27
105 | 3,128,72,25,190,32.4,0.549,27
106 | 5,168,64,0,0,32.9,0.135,41
107 | 1,138,82,0,0,40.1,0.236,28
108 | 4,99,68,38,0,32.8,0.145,33
109 | 8,120,86,0,0,28.4,0.259,22
110 | 10,168,74,0,0,38.0,0.537,34
111 | 1,130,60,23,170,28.6,0.692,21
112 | 3,182,74,0,0,30.5,0.345,29
113 | 7,81,78,40,48,46.7,0.261,42
114 | 2,90,80,14,55,24.4,0.249,24
115 | 0,137,68,14,148,24.8,0.143,21
116 | 8,120,0,0,0,30.0,0.183,38
117 | 9,140,94,0,0,32.7,0.734,45
118 | 3,191,68,15,130,30.9,0.299,34
119 | 4,158,78,0,0,32.9,0.803,31
120 | 1,90,62,12,43,27.2,0.58,24
121 | 3,99,54,19,86,25.6,0.154,24
122 | 7,142,60,33,190,28.8,0.687,61
123 | 1,91,54,25,100,25.2,0.234,23
124 | 4,110,66,0,0,31.9,0.471,29
125 | 10,75,82,0,0,33.3,0.263,38
126 | 10,115,0,0,0,35.3,0.134,29
127 | 1,143,86,30,330,30.1,0.892,23
128 | 3,87,60,18,0,21.8,0.444,21
129 | 8,125,96,0,0,0.0,0.232,54
130 | 2,112,86,42,160,38.4,0.246,28
131 | 2,92,52,0,0,30.1,0.141,22
132 | 2,121,70,32,95,39.1,0.886,23
133 | 2,146,0,0,0,27.5,0.24,28
134 | 1,79,75,30,0,32.0,0.396,22
135 | 1,73,50,10,0,23.0,0.248,21
136 | 0,94,0,0,0,0.0,0.256,25
137 | 3,150,76,0,0,21.0,0.207,37
138 | 0,104,64,37,64,33.6,0.51,22
139 | 2,128,64,42,0,40.0,1.101,24
140 | 1,95,82,25,180,35.0,0.233,43
141 | 5,86,68,28,71,30.2,0.364,24
142 | 1,133,102,28,140,32.8,0.234,45
143 | 1,196,76,36,249,36.5,0.875,29
144 | 1,189,60,23,846,30.1,0.398,59
145 | 5,106,82,30,0,39.5,0.286,38
146 | 2,120,54,0,0,26.8,0.455,27
147 | 4,122,68,0,0,35.0,0.394,29
148 | 3,121,52,0,0,36.0,0.127,25
149 | 13,153,88,37,140,40.6,1.174,39
150 | 9,91,68,0,0,24.2,0.2,58
151 | 5,147,78,0,0,33.7,0.218,65
152 | 0,105,84,0,0,27.9,0.741,62
153 | 2,101,58,35,90,21.8,0.155,22
154 | 1,144,82,46,180,46.1,0.335,46
155 | 3,141,0,0,0,30.0,0.761,27
156 |
--------------------------------------------------------------------------------
/part2/ch7/drug_test.csv:
--------------------------------------------------------------------------------
1 | Age,Sex,BP,Cholesterol,Na_to_K
2 | 74,F,LOW,HIGH,20.942
3 | 65,M,HIGH,NORMAL,34.997
4 | 58,F,LOW,HIGH,38.247
5 | 34,M,NORMAL,HIGH,22.456
6 | 59,M,HIGH,HIGH,13.935
7 | 47,M,LOW,HIGH,10.114
8 | 24,M,HIGH,NORMAL,9.475
9 | 41,F,LOW,NORMAL,18.739
10 | 39,F,NORMAL,NORMAL,9.709
11 | 60,M,NORMAL,HIGH,15.171
12 | 58,F,HIGH,HIGH,19.416
13 | 61,F,LOW,HIGH,18.043
14 | 69,M,LOW,NORMAL,11.455
15 | 58,M,HIGH,HIGH,18.991
16 | 59,F,LOW,HIGH,10.444
17 | 63,M,NORMAL,HIGH,25.917
18 | 20,F,HIGH,HIGH,11.262
19 | 31,M,HIGH,NORMAL,11.227
20 | 32,F,HIGH,NORMAL,10.292
21 | 38,M,LOW,HIGH,18.295
22 | 20,M,HIGH,NORMAL,35.639
23 | 26,F,HIGH,NORMAL,12.307
24 | 49,M,LOW,NORMAL,11.014
25 | 48,F,LOW,HIGH,15.036
26 | 36,F,NORMAL,HIGH,16.753
27 | 32,F,LOW,HIGH,9.712
28 | 29,F,HIGH,HIGH,29.45
29 | 49,M,LOW,HIGH,10.537
30 | 35,F,HIGH,HIGH,12.894
31 | 41,M,HIGH,NORMAL,15.156
32 | 64,F,LOW,NORMAL,25.741
33 | 43,M,LOW,NORMAL,19.368
34 | 57,F,NORMAL,NORMAL,25.893
35 | 43,M,HIGH,HIGH,13.972
36 | 16,M,LOW,HIGH,12.006
37 | 34,M,HIGH,HIGH,18.703
38 | 28,F,NORMAL,HIGH,12.879
39 | 65,F,LOW,NORMAL,13.769
40 | 21,F,HIGH,NORMAL,28.632
41 | 51,F,LOW,NORMAL,23.003
42 | 54,M,NORMAL,HIGH,24.658
43 | 60,M,HIGH,HIGH,13.934
44 | 55,F,HIGH,HIGH,10.977
45 | 49,M,HIGH,NORMAL,6.269
46 | 36,F,HIGH,HIGH,11.198
47 | 60,F,HIGH,HIGH,13.303
48 | 65,F,HIGH,NORMAL,31.876
49 | 28,M,NORMAL,HIGH,27.064
50 | 39,F,NORMAL,NORMAL,17.225
51 | 74,M,HIGH,NORMAL,15.436
52 | 53,M,LOW,HIGH,22.963
53 | 64,M,NORMAL,HIGH,7.761
54 | 20,F,LOW,NORMAL,11.686
55 | 18,F,NORMAL,NORMAL,8.75
56 | 23,M,NORMAL,HIGH,12.26
57 | 49,M,HIGH,NORMAL,8.7
58 | 68,M,HIGH,HIGH,11.009
59 | 58,F,HIGH,NORMAL,14.239
60 | 15,F,HIGH,NORMAL,16.725
61 | 61,F,LOW,NORMAL,7.34
62 | 23,M,NORMAL,HIGH,16.85
63 | 66,M,HIGH,HIGH,16.347
64 | 70,F,NORMAL,HIGH,20.489
65 | 34,F,HIGH,NORMAL,19.199
66 | 47,F,LOW,HIGH,10.067
67 | 46,M,NORMAL,NORMAL,7.285
68 | 66,F,NORMAL,NORMAL,8.107
69 | 41,F,NORMAL,NORMAL,22.905
70 | 23,M,NORMAL,HIGH,31.686
71 | 39,M,NORMAL,HIGH,15.969
72 | 72,F,LOW,NORMAL,14.642
73 | 50,F,NORMAL,HIGH,12.703
74 | 47,F,LOW,HIGH,11.767
75 | 56,F,LOW,HIGH,11.567
76 | 23,M,LOW,HIGH,7.298
77 | 22,M,LOW,HIGH,8.151
78 | 42,M,LOW,HIGH,20.013
79 | 70,M,HIGH,HIGH,13.967
80 | 40,M,HIGH,HIGH,27.826
81 | 72,M,LOW,HIGH,16.31
82 | 43,M,LOW,HIGH,15.376
83 | 60,M,HIGH,NORMAL,8.621
84 | 46,F,HIGH,HIGH,34.686
85 | 57,M,LOW,NORMAL,19.128
86 | 26,M,LOW,NORMAL,20.909
87 | 37,M,LOW,NORMAL,8.968
88 | 42,F,HIGH,HIGH,21.036
89 | 22,F,NORMAL,HIGH,8.607
90 | 67,M,LOW,NORMAL,20.693
91 | 37,M,LOW,NORMAL,16.724
92 | 28,F,LOW,HIGH,13.127
93 | 22,M,NORMAL,HIGH,11.953
94 | 50,M,NORMAL,NORMAL,15.79
95 | 31,M,HIGH,NORMAL,17.069
96 | 49,F,NORMAL,NORMAL,9.381
97 | 50,F,NORMAL,NORMAL,12.295
98 | 47,M,LOW,HIGH,13.093
99 | 47,M,LOW,NORMAL,33.542
100 | 56,F,HIGH,HIGH,25.395
101 | 56,M,NORMAL,HIGH,8.966
102 |
--------------------------------------------------------------------------------
/part2/ch7/drug_train.csv:
--------------------------------------------------------------------------------
1 | Age,Sex,BP,Cholesterol,Na_to_K,Drug
2 | 70,M,HIGH,HIGH,9.849,drugB
3 | 36,M,LOW,NORMAL,11.424,drugX
4 | 23,F,HIGH,HIGH,25.355,DrugY
5 | 40,F,NORMAL,HIGH,10.103,drugX
6 | 45,M,LOW,NORMAL,10.017,drugX
7 | 67,F,NORMAL,HIGH,15.891,DrugY
8 | 65,M,HIGH,NORMAL,11.34,drugB
9 | 68,M,LOW,HIGH,10.291,drugC
10 | 56,M,LOW,HIGH,15.015,DrugY
11 | 62,M,NORMAL,HIGH,16.594,DrugY
12 | 49,M,LOW,NORMAL,13.598,drugX
13 | 22,F,HIGH,NORMAL,22.818,DrugY
14 | 73,F,NORMAL,HIGH,19.221,DrugY
15 | 16,M,HIGH,NORMAL,19.007,DrugY
16 | 28,F,NORMAL,HIGH,19.675,DrugY
17 | 37,F,LOW,NORMAL,12.006,drugX
18 | 17,M,NORMAL,NORMAL,10.832,drugX
19 | 32,F,LOW,NORMAL,10.84,drugX
20 | 50,F,NORMAL,NORMAL,17.211,DrugY
21 | 72,M,LOW,HIGH,6.769,drugC
22 | 33,F,LOW,HIGH,33.486,DrugY
23 | 31,M,HIGH,NORMAL,11.871,drugA
24 | 68,F,HIGH,NORMAL,10.189,drugB
25 | 39,M,HIGH,HIGH,9.664,drugA
26 | 51,F,NORMAL,HIGH,13.597,drugX
27 | 31,M,HIGH,HIGH,30.366,DrugY
28 | 38,F,LOW,NORMAL,29.875,DrugY
29 | 53,F,HIGH,NORMAL,12.495,drugB
30 | 57,F,NORMAL,HIGH,14.216,drugX
31 | 18,F,HIGH,HIGH,37.188,DrugY
32 | 29,M,HIGH,HIGH,12.856,drugA
33 | 42,F,LOW,NORMAL,29.271,DrugY
34 | 32,M,HIGH,NORMAL,9.445,drugA
35 | 64,M,HIGH,NORMAL,20.932,DrugY
36 | 39,M,LOW,NORMAL,13.938,drugX
37 | 19,F,HIGH,HIGH,13.313,drugA
38 | 37,F,HIGH,HIGH,13.091,drugA
39 | 52,M,NORMAL,HIGH,9.894,drugX
40 | 61,F,HIGH,HIGH,25.475,DrugY
41 | 49,F,NORMAL,HIGH,16.275,DrugY
42 | 19,F,HIGH,NORMAL,25.969,DrugY
43 | 26,F,LOW,HIGH,14.16,drugC
44 | 38,F,HIGH,NORMAL,11.326,drugA
45 | 41,M,LOW,HIGH,11.037,drugC
46 | 51,M,HIGH,NORMAL,11.343,drugB
47 | 24,M,NORMAL,HIGH,25.786,DrugY
48 | 15,M,HIGH,NORMAL,17.206,DrugY
49 | 74,M,HIGH,HIGH,9.567,drugB
50 | 55,M,NORMAL,NORMAL,7.261,drugX
51 | 32,F,HIGH,NORMAL,25.974,DrugY
52 | 67,M,NORMAL,NORMAL,10.898,drugX
53 | 72,M,HIGH,NORMAL,9.677,drugB
54 | 18,F,HIGH,NORMAL,24.276,DrugY
55 | 32,F,NORMAL,HIGH,7.477,drugX
56 | 69,F,NORMAL,HIGH,10.065,drugX
57 | 22,M,HIGH,NORMAL,28.294,DrugY
58 | 43,M,NORMAL,NORMAL,12.859,drugX
59 | 40,F,LOW,NORMAL,11.349,drugX
60 | 34,F,LOW,NORMAL,12.923,drugX
61 | 51,M,HIGH,HIGH,18.295,DrugY
62 | 47,M,HIGH,HIGH,10.403,drugA
63 | 42,M,HIGH,NORMAL,12.766,drugA
64 | 53,M,NORMAL,HIGH,14.133,drugX
65 | 23,M,NORMAL,NORMAL,14.02,drugX
66 | 74,M,LOW,NORMAL,11.939,drugX
67 | 23,M,HIGH,HIGH,8.011,drugA
68 | 68,F,NORMAL,NORMAL,27.05,DrugY
69 | 24,F,NORMAL,HIGH,10.605,drugX
70 | 37,F,HIGH,NORMAL,23.091,DrugY
71 | 35,M,NORMAL,NORMAL,7.845,drugX
72 | 47,F,NORMAL,NORMAL,6.683,drugX
73 | 16,F,HIGH,NORMAL,15.516,DrugY
74 | 58,F,LOW,HIGH,26.645,DrugY
75 | 24,F,HIGH,NORMAL,18.457,DrugY
76 | 25,M,NORMAL,HIGH,19.011,DrugY
77 | 35,M,LOW,NORMAL,9.17,drugX
78 | 20,F,NORMAL,NORMAL,9.281,drugX
79 | 28,F,LOW,HIGH,19.796,DrugY
80 | 45,M,LOW,HIGH,17.951,DrugY
81 | 30,F,NORMAL,HIGH,10.443,drugX
82 | 48,M,HIGH,NORMAL,10.446,drugA
83 | 45,M,LOW,NORMAL,8.37,drugX
84 | 50,M,HIGH,HIGH,7.49,drugA
85 | 39,F,LOW,NORMAL,22.697,DrugY
86 | 60,M,NORMAL,NORMAL,10.091,drugX
87 | 52,M,LOW,NORMAL,32.922,DrugY
88 | 15,M,NORMAL,HIGH,9.084,drugX
89 | 26,F,HIGH,NORMAL,19.161,DrugY
90 | 73,F,HIGH,HIGH,18.348,DrugY
91 | 59,F,NORMAL,HIGH,13.884,drugX
92 | 61,M,NORMAL,HIGH,9.443,drugX
93 | 47,M,LOW,NORMAL,30.568,DrugY
94 | 45,F,HIGH,HIGH,12.854,drugA
95 | 62,M,LOW,NORMAL,27.183,DrugY
96 | 67,M,NORMAL,NORMAL,9.514,drugX
97 | 28,F,NORMAL,HIGH,7.798,drugX
98 | 57,F,HIGH,NORMAL,9.945,drugB
99 | 36,F,HIGH,NORMAL,15.49,DrugY
100 | 28,F,HIGH,NORMAL,18.809,DrugY
101 | 69,M,LOW,HIGH,15.478,DrugY
102 |
--------------------------------------------------------------------------------
/part2/ch7/glass_test.csv:
--------------------------------------------------------------------------------
1 | RI,Na,Mg,Al,Si,K,Ca,Ba,Fe
2 | 1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0.0,0.17
3 | 1.52058,12.85,1.61,2.17,72.18,0.76,9.7,0.24,0.51
4 | 1.52475,11.45,0.0,1.88,72.19,0.81,13.24,0.0,0.34
5 | 1.5169,13.33,3.54,1.61,72.54,0.68,8.11,0.0,0.0
6 | 1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0.0,0.0
7 | 1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0
8 | 1.51811,12.96,2.96,1.43,72.92,0.6,8.79,0.14,0.0
9 | 1.51784,12.68,3.67,1.16,73.11,0.61,8.7,0.0,0.0
10 | 1.51623,14.2,0.0,2.79,73.46,0.04,9.04,0.4,0.09
11 | 1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22
12 | 1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0.0,0.0
13 | 1.51918,14.04,3.58,1.37,72.08,0.56,8.3,0.0,0.0
14 | 1.51852,14.09,2.19,1.66,72.67,0.0,9.32,0.0,0.0
15 | 1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0
16 | 1.51966,14.77,3.75,0.29,72.02,0.03,9.0,0.0,0.0
17 | 1.5182,12.62,2.76,0.83,73.81,0.35,9.42,0.0,0.2
18 | 1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0.0,0.11
19 | 1.51732,14.95,0.0,1.8,72.99,0.0,8.61,1.55,0.0
20 | 1.51605,12.9,3.44,1.45,73.06,0.44,8.27,0.0,0.0
21 | 1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.0,0.0
22 | 1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0.0,0.0
23 | 1.52369,13.44,0.0,1.58,72.22,0.32,12.24,0.0,0.0
24 | 1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0.0,0.0
25 | 1.52171,11.56,1.88,1.56,72.86,0.47,11.41,0.0,0.0
26 | 1.51646,13.04,3.4,1.26,73.01,0.52,8.58,0.0,0.0
27 | 1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0.0,0.0
28 | 1.51751,12.81,3.57,1.35,73.02,0.62,8.59,0.0,0.0
29 | 1.52177,13.2,3.68,1.15,72.75,0.54,8.52,0.0,0.0
30 | 1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0.0,0.0
31 | 1.51683,14.56,0.0,1.98,73.29,0.0,8.52,1.57,0.07
32 | 1.51969,12.64,0.0,1.65,73.75,0.38,11.53,0.0,0.0
33 | 1.5166,12.99,3.18,1.23,72.97,0.58,8.81,0.0,0.24
34 | 1.53393,12.3,0.0,1.0,70.16,0.12,16.19,0.0,0.24
35 | 1.51916,14.15,0.0,2.09,72.74,0.0,10.88,0.0,0.0
36 | 1.51755,13.0,3.6,1.36,72.99,0.57,8.4,0.0,0.11
37 | 1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0.0,0.0
38 | 1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0.0,0.0
39 | 1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0.0,0.0
40 | 1.5167,13.24,3.57,1.38,72.7,0.56,8.44,0.0,0.1
41 | 1.51806,13.0,3.8,1.08,73.07,0.56,8.38,0.0,0.12
42 | 1.51592,12.86,3.52,2.12,72.66,0.69,7.97,0.0,0.0
43 | 1.51709,13.0,3.47,1.79,72.72,0.66,8.18,0.0,0.0
44 | 1.51321,13.0,0.0,3.02,70.7,6.21,6.93,0.0,0.0
45 | 1.51818,13.72,0.0,0.56,74.45,0.0,10.99,0.0,0.0
46 | 1.5232,13.72,3.72,0.51,71.75,0.09,10.06,0.0,0.16
47 | 1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0.0,0.0
48 | 1.51623,14.14,0.0,2.88,72.61,0.08,9.18,1.06,0.0
49 | 1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0.0
50 | 1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.0,0.0
51 | 1.51685,14.92,0.0,1.99,73.06,0.0,8.4,1.59,0.0
52 | 1.51707,13.48,3.48,1.71,72.52,0.62,7.99,0.0,0.0
53 | 1.51753,12.57,3.47,1.38,73.39,0.6,8.55,0.0,0.06
54 | 1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0.0,0.0
55 | 1.5172,13.38,3.5,1.15,72.85,0.5,8.43,0.0,0.0
56 | 1.51131,13.69,3.2,1.81,72.81,1.76,5.43,1.19,0.0
57 | 1.51665,13.14,3.45,1.76,72.48,0.6,8.38,0.0,0.17
58 | 1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0.0,0.15
59 | 1.5161,13.42,3.4,1.22,72.69,0.59,8.32,0.0,0.0
60 | 1.51687,13.23,3.54,1.48,72.84,0.56,8.1,0.0,0.0
61 | 1.51768,12.65,3.56,1.3,73.08,0.61,8.69,0.0,0.14
62 | 1.5221,13.73,3.84,0.72,71.76,0.17,9.74,0.0,0.0
63 | 1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0.0,0.0
64 | 1.52725,13.8,3.15,0.66,70.57,0.08,11.64,0.0,0.0
65 | 1.52152,13.12,3.58,0.9,72.2,0.23,9.82,0.0,0.16
66 | 1.51755,12.71,3.42,1.2,73.2,0.59,8.64,0.0,0.0
67 |
--------------------------------------------------------------------------------
/part2/ch7/glass_train.csv:
--------------------------------------------------------------------------------
1 | RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
2 | 1.51829,14.46,2.24,1.62,72.38,0.0,9.26,0.0,0.0,6
3 | 1.5161,13.33,3.53,1.34,72.67,0.56,8.33,0.0,0.0,3
4 | 1.52172,13.48,3.74,0.9,72.01,0.18,9.61,0.0,0.07,1
5 | 1.51905,13.6,3.62,1.11,72.64,0.14,8.76,0.0,0.0,1
6 | 1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0.0,0.0,2
7 | 1.51763,12.8,3.66,1.27,73.01,0.6,8.56,0.0,0.0,1
8 | 1.51409,14.25,3.09,2.08,72.28,1.1,7.08,0.0,0.0,2
9 | 1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0.0,0.0,1
10 | 1.519,13.49,3.48,1.35,71.95,0.55,9.0,0.0,0.0,1
11 | 1.52127,14.32,3.9,0.83,71.5,0.0,9.49,0.0,0.0,3
12 | 1.51646,13.41,3.55,1.25,72.81,0.68,8.1,0.0,0.0,2
13 | 1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0.0,0.0,1
14 | 1.51844,13.25,3.76,1.32,72.4,0.58,8.42,0.0,0.0,2
15 | 1.51613,13.88,1.78,1.79,73.1,0.0,8.67,0.76,0.0,7
16 | 1.52043,13.38,0.0,1.4,72.25,0.33,12.5,0.0,0.0,5
17 | 1.51763,12.61,3.59,1.31,73.29,0.58,8.5,0.0,0.0,1
18 | 1.51571,12.72,3.46,1.56,73.2,0.67,8.09,0.0,0.24,1
19 | 1.5164,14.37,0.0,2.74,72.85,0.0,9.45,0.54,0.0,7
20 | 1.51911,13.9,3.73,1.18,72.12,0.06,8.89,0.0,0.0,1
21 | 1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0.0,0.0,3
22 | 1.51658,14.8,0.0,1.99,73.11,0.0,8.28,1.71,0.0,7
23 | 1.51574,14.86,3.67,1.74,71.87,0.16,7.36,0.0,0.12,2
24 | 1.51824,12.87,3.48,1.29,72.95,0.6,8.43,0.0,0.0,1
25 | 1.51299,14.4,1.74,1.54,74.55,0.0,7.59,0.0,0.0,6
26 | 1.51589,12.88,3.43,1.4,73.28,0.69,8.05,0.0,0.24,1
27 | 1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0.0,0.19,2
28 | 1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,2
29 | 1.5175,12.82,3.55,1.49,72.75,0.54,8.52,0.0,0.19,1
30 | 1.5159,13.02,3.58,1.51,73.12,0.69,7.96,0.0,0.0,2
31 | 1.51673,13.3,3.64,1.53,72.53,0.65,8.03,0.0,0.29,2
32 | 1.51851,13.2,3.63,1.07,72.83,0.57,8.41,0.09,0.17,2
33 | 1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0.0,0.26,1
34 | 1.52172,13.51,3.86,0.88,71.79,0.23,9.54,0.0,0.11,1
35 | 1.51769,13.65,3.66,1.11,72.77,0.11,8.6,0.0,0.0,3
36 | 1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0.0,0.14,2
37 | 1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0.0,0.0,2
38 | 1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0.0,0.0,1
39 | 1.51596,13.02,3.56,1.54,73.11,0.72,7.9,0.0,0.0,2
40 | 1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0.0,0.0,1
41 | 1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0.0,0.3,1
42 | 1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
43 | 1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0.0,0.0,2
44 | 1.51743,12.2,3.25,1.16,73.55,0.62,8.9,0.0,0.24,2
45 | 1.52315,13.44,3.34,1.23,72.38,0.6,8.83,0.0,0.0,7
46 | 1.51609,15.01,0.0,2.51,73.05,0.05,8.83,0.53,0.0,7
47 | 1.5186,13.36,3.43,1.43,72.26,0.51,8.6,0.0,0.0,2
48 | 1.51915,12.73,1.85,1.86,72.69,0.6,10.09,0.0,0.0,5
49 | 1.53125,10.73,0.0,2.1,69.81,0.58,13.3,3.15,0.28,2
50 | 1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
51 | 1.5159,12.82,3.52,1.9,72.86,0.69,7.97,0.0,0.0,2
52 | 1.51514,14.85,0.0,2.42,73.72,0.0,8.39,0.56,0.0,7
53 | 1.51994,13.27,0.0,1.76,73.03,0.47,11.32,0.0,0.0,5
54 | 1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0.0,0.31,1
55 | 1.51831,14.39,0.0,1.82,72.86,1.41,6.47,2.88,0.0,7
56 | 1.52667,13.99,3.7,0.71,71.57,0.02,9.82,0.0,0.1,1
57 | 1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0.0,0.0,2
58 | 1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0.0,0.22,2
59 | 1.51514,14.01,2.68,3.5,69.89,1.68,5.87,2.2,0.0,5
60 | 1.51667,12.94,3.61,1.26,72.75,0.56,8.6,0.0,0.0,2
61 | 1.51793,12.79,3.5,1.12,73.03,0.64,8.77,0.0,0.0,1
62 | 1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,2
63 | 1.51848,13.64,3.87,1.27,71.96,0.54,8.32,0.0,0.32,2
64 | 1.52065,14.36,0.0,2.02,73.42,0.0,8.44,1.64,0.0,7
65 | 1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0.0,0.17,1
66 | 1.5164,12.55,3.48,1.87,73.23,0.63,8.08,0.0,0.09,2
67 | 1.51645,14.94,0.0,1.87,73.11,0.0,8.67,1.38,0.0,7
68 | 1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,2
69 | 1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0.0,0.09,1
70 | 1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0.0,0.0,2
71 | 1.51838,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0.0,7
72 | 1.51556,13.87,0.0,2.54,73.23,0.14,9.41,0.81,0.01,7
73 | 1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0.0,0.17,1
74 | 1.5159,13.24,3.34,1.47,73.1,0.39,8.22,0.0,0.0,2
75 | 1.51779,13.64,3.65,0.65,73.0,0.06,8.93,0.0,0.0,3
76 | 1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0.0,1
77 | 1.51719,14.75,0.0,2.0,73.02,0.0,8.53,1.59,0.08,7
78 | 1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0.0,0.0,1
79 | 1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0.0,0.14,2
80 | 1.51869,13.19,3.37,1.18,72.72,0.57,8.83,0.0,0.16,1
81 | 1.51545,14.14,0.0,2.68,73.39,0.08,9.07,0.61,0.05,7
82 | 1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0.0,0.0,2
83 | 1.51743,13.3,3.6,1.14,73.09,0.58,8.17,0.0,0.0,1
84 | 1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0.0,0.12,2
85 | 1.51769,12.45,2.71,1.29,73.7,0.56,9.06,0.0,0.24,1
86 | 1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0.0,0.09,2
87 | 1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0.0,0.0,1
88 | 1.51674,12.79,3.52,1.54,73.36,0.66,7.9,0.0,0.0,2
89 | 1.518,13.71,3.93,1.54,71.81,0.54,8.21,0.0,0.15,2
90 | 1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0.0,0.21,2
91 | 1.51747,12.84,3.5,1.14,73.27,0.56,8.55,0.0,0.0,1
92 | 1.51602,14.85,0.0,2.38,73.28,0.0,8.76,0.64,0.09,7
93 | 1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0.0,0.07,1
94 | 1.51926,13.2,3.33,1.28,72.36,0.6,9.14,0.0,0.11,1
95 | 1.52222,14.43,0.0,1.0,72.67,0.1,11.52,0.0,0.08,2
96 | 1.523,13.31,3.58,0.82,71.99,0.12,10.17,0.0,0.03,1
97 | 1.51796,13.5,3.36,1.63,71.94,0.57,8.81,0.0,0.09,3
98 | 1.5202,13.98,1.35,1.63,71.76,0.39,10.56,0.0,0.18,2
99 | 1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0.0,0.0,2
100 | 1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0.0,0.37,3
101 | 1.5241,13.83,2.9,1.17,71.15,0.08,10.79,0.0,0.0,2
102 | 1.51316,13.02,0.0,3.04,70.48,6.21,6.96,0.0,0.0,5
103 | 1.51711,14.23,0.0,2.08,73.36,0.0,8.62,1.67,0.0,7
104 | 1.51666,12.86,0.0,1.83,73.88,0.97,10.17,0.0,0.0,5
105 | 1.51846,13.41,3.89,1.33,72.38,0.51,8.28,0.0,0.0,2
106 | 1.51508,15.15,0.0,2.25,73.5,0.0,8.34,0.63,0.0,7
107 | 1.51115,17.38,0.0,0.34,75.41,0.0,6.65,0.0,0.0,6
108 | 1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0.0,0.0,1
109 | 1.51651,14.38,0.0,1.94,73.61,0.0,8.48,1.57,0.0,7
110 | 1.51789,13.19,3.9,1.3,72.33,0.55,8.44,0.0,0.28,2
111 | 1.51617,14.95,0.0,2.27,73.3,0.0,8.71,0.67,0.0,7
112 | 1.52099,13.69,3.59,1.12,71.96,0.09,9.4,0.0,0.0,1
113 | 1.51653,11.95,0.0,1.19,75.18,2.7,8.93,0.0,0.0,7
114 | 1.51593,13.09,3.59,1.52,73.1,0.67,7.83,0.0,0.0,2
115 | 1.51618,13.01,3.5,1.48,72.89,0.6,8.12,0.0,0.0,2
116 | 1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0.0,0.17,2
117 | 1.52739,11.02,0.0,0.75,73.08,0.0,14.96,0.0,0.0,2
118 | 1.51761,12.81,3.54,1.23,73.24,0.58,8.39,0.0,0.0,1
119 | 1.52151,11.03,1.71,1.56,73.44,0.58,11.62,0.0,0.0,5
120 | 1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0.0,0.0,1
121 | 1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0.0,7
122 | 1.52777,12.64,0.0,0.67,72.02,0.06,14.4,0.0,0.0,2
123 | 1.52247,14.86,2.2,2.06,70.26,0.76,9.76,0.0,0.0,7
124 | 1.51847,13.1,3.97,1.19,72.44,0.6,8.43,0.0,0.0,2
125 | 1.51937,13.79,2.41,1.19,72.76,0.0,9.77,0.0,0.0,6
126 | 1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0.0,0.0,1
127 | 1.51784,13.08,3.49,1.28,72.86,0.6,8.49,0.0,0.0,1
128 | 1.51832,13.33,3.34,1.54,72.14,0.56,8.99,0.0,0.0,3
129 | 1.51905,14.0,2.39,1.56,72.37,0.0,9.57,0.0,0.0,6
130 | 1.52227,14.17,3.81,0.78,71.35,0.0,9.69,0.0,0.0,1
131 | 1.52664,11.23,0.0,0.77,73.21,0.0,14.68,0.0,0.0,2
132 | 1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0.0,0.0,1
133 | 1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0.0,0.0,1
134 | 1.52614,13.7,0.0,1.36,71.24,0.19,13.44,0.0,0.1,2
135 | 1.51727,14.7,0.0,2.34,73.28,0.0,8.95,0.66,0.0,7
136 | 1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.0,0.0,1
137 | 1.51627,13.0,3.58,1.54,72.83,0.61,8.04,0.0,0.0,2
138 | 1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0.0,0.28,5
139 | 1.51888,14.99,0.78,1.74,72.5,0.0,9.95,0.0,0.0,6
140 | 1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0.0,0.35,2
141 | 1.51736,12.78,3.62,1.29,72.79,0.59,8.7,0.0,0.0,1
142 | 1.5173,12.35,2.72,1.63,72.87,0.7,9.23,0.0,0.0,2
143 | 1.51808,13.43,2.87,1.19,72.84,0.55,9.03,0.0,0.0,1
144 | 1.51829,13.24,3.9,1.41,72.33,0.55,8.31,0.0,0.1,2
145 | 1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
146 | 1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,3
147 | 1.51969,14.56,0.0,0.56,73.48,0.0,11.22,0.0,0.0,6
148 | 1.51531,14.38,0.0,2.66,73.1,0.04,9.08,0.64,0.0,7
149 | 1.51764,12.98,3.54,1.21,73.0,0.65,8.53,0.0,0.0,1
150 | 1.51645,13.4,3.49,1.52,72.65,0.67,8.08,0.0,0.1,2
151 |
--------------------------------------------------------------------------------
/part2/ch8/laptop_test.csv:
--------------------------------------------------------------------------------
1 | Brand,Model,Series,Processor,Processor_Gen,RAM,Hard_Disk_Capacity,OS,Rating
2 | DELL,Vostro,,i3,10th,8.0,256 GB SSD,Windows 10 Home,4.3
3 | Lenovo,IdeaPad,3,i3,10th,8.0,256 GB SSD,Windows 11 Home,4.3
4 | HP,,,i5,11th,8.0,512 GB SSD,Windows 11 Home,4.4
5 | Lenovo,IdeaPad,3,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.3
6 | HP,250-G6,,i5,7th,4.0,1 TB HDD,DOS,4.2
7 | DELL,Inspiron,Ryzen,3450U,-,8.0,512 GB SSD,Windows 11 Home,4.3
8 | acer,Aspire,7,i5,10th,8.0,512 GB SSD,Windows 10 Home,4.4
9 | ASUS,ROG,Zephyrus,4800HS,-,8.0,1 TB SSD,Windows 10 Home,4.6
10 | ASUS,ROG,Strix,i5,10th,8.0,512 GB SSD,Windows 10 Home,4.6
11 | ASUS,VivoBook,15,-,(4,4.0,256 GB SSD,Windows 10 Home,4.3
12 | Lenovo,IdeaPad,Flex,i3,11th,8.0,512 GB SSD,Windows 10 Home,4.5
13 | APPLE,MacBook,Air,,,,,,4.5
14 | realme,Book,(Slim),i3,11th,8.0,256 GB SSD,Windows 10 Home,4.4
15 | acer,Travelmate,,i5,11th,16.0,1 TB HDD,256 GB SSD,3.4
16 | DELL,,,i7,11th,16.0,512 GB SSD,Windows 11 Home,4.4
17 | DELL,Vostro,,i5,11th,8.0,1 TB HDD,Windows 10 Home,3.6
18 | HP,Ryzen,3,5300U,-,8.0,512 GB SSD,Windows 11 Home,4.2
19 | DELL,Inspiron,,i5,11th,8.0,1 TB HDD,256 GB SSD,4.2
20 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7
21 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7
22 | HP,Notebook,PC,i3,11th,8.0,1 TB HDD,Windows 10,3.9
23 | ASUS,Vivobook,14,i3,10th,8.0,1 TB HDD,Windows 10 Home,4.2
24 | MICROSOFT,Surface,Laptop,i5,8th,8.0,128 GB SSD,Windows 10 Home,4.5
25 | DELL,Inspiron,Ryzen,3250U,-,8.0,256 GB SSD,Windows 11 Home,4.3
26 | ASUS,,,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.4
27 | Lenovo,IdeaPad,3,i5,11th,8.0,512 GB SSD,Windows 10 Home,4.2
28 | MSI,Katana,GF66,i5,11th,16.0,512 GB SSD,Windows 10 Home,4.4
29 | ASUS,VivoBook,15,i3,10th,4.0,512 GB SSD,Windows 11 Home,4.2
30 | ASUS,,,i3,10th,4.0,1 TB HDD,Windows 10 Home,3.7
31 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7
32 | Lenovo,IdeaPad,3,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.2
33 | ASUS,TUF,Gaming,i5,10th,8.0,512 GB SSD,Windows 10 Home,4.5
34 | HP,HP,Pavilion,i5,11th,16.0,512 GB SSD,Windows 10 Home,4.5
35 | HP,,,i3,11th,8.0,512 GB SSD,Windows 10,4.0
36 | DELL,Inspiron,Ryzen,3450U,-,8.0,1 TB HDD,256 GB SSD,3.9
37 | Lenovo,IdeaPad,3,5500U,-,8.0,512 GB SSD,Windows 11 Home,4.4
38 | APPLE,MacBook,Air,,,,,,4.5
39 | DELL,Inspiron,,i5,11th,16.0,512 GB SSD,Windows 10,4.2
40 | Lenovo,IdeaPad,3,i3,10th,8.0,1 TB HDD,Windows 10 Home,4.1
41 |
--------------------------------------------------------------------------------
/part2/ch8/laptop_train.csv:
--------------------------------------------------------------------------------
1 | Brand,Model,Series,Processor,Processor_Gen,RAM,Hard_Disk_Capacity,OS,Rating,Price
2 | ASUS,VivoBook,15,i3,10th,8.0,512 GB SSD,Windows 11 Home,4.3,37940
3 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7,39040
4 | ASUS,VivoBook,15,i7,10th,16.0,512 GB SSD,Windows 11 Home,4.1,57940
5 | DELL,,,i3,10th,8.0,1 TB HDD,Windows 10,3.2,41340
6 | Lenovo,IdeaPad,Slim,i3,11th,8.0,512 GB SSD,Windows 10 Home,4.4,45440
7 | ASUS,TUF,Gaming,i5,11th,16.0,512 GB SSD,Windows 10 Home,4.6,89940
8 | ASUS,VivoBook,Ultra,i3,11th,8.0,512 GB SSD,Windows 11 Home,4.8,42940
9 | HP,,,i3,10th,8.0,512 GB SSD,Windows 10 Home,4.3,42340
10 | APPLE,2020,Macbook,,,,,,4.6,129990
11 | DELL,Inspiron,,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.3,41540
12 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7,39040
13 | Lenovo,Ideapad,Slim,i5,11th,8.0,512 GB SSD,Windows 10 Home,4.2,56449
14 | acer,Nitro,Ryzen,5600H,-,8.0,1 TB HDD,256 GB SSD,4.5,72940
15 | HP,Ryzen,3,3250U,-,8.0,256 GB SSD,Windows 10 Home,4.3,38940
16 | DELL,Vostro,3405,3450U,-,8.0,256 GB SSD,Windows 10 Home,4.2,44440
17 | Lenovo,IdeaPad,3,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.3,39940
18 | acer,Swift,3,i5,8th,8.0,1 TB HDD,128 GB SSD,4.6,59940
19 | DELL,Inspiron,,i3,11th,8.0,512 GB SSD,Windows 11 Home,4.4,45540
20 | Lenovo,IdeaPad,3,i5,10th,8.0,512 GB SSD,Windows 10 Home,4.3,50940
21 | HP,HP,Pavilion,4600H,-,8.0,512 GB SSD,Windows 10 Home,4.4,59940
22 | DELL,Inspiron,Athlon,3050U,-,4.0,256 GB SSD,Windows 11 Home,4.2,33940
23 | ASUS,Chromebook,Celeron,-,(4,4.0,64 GB EMMC Storage,Chrome OS,4.2,23490
24 | ASUS,ASUS,TUF,i5,11th,8.0,512 GB SSD,Windows 10 Home,4.5,71940
25 | HP,15q,,i3,7th,4.0,1 TB HDD,DOS,4.2,32905
26 | Lenovo,IdeaPad,5,i5,11th,16.0,512 GB SSD,Windows 10 Home,4.6,70940
27 | ASUS,VivoBook,15,i3,10th,4.0,1 TB HDD,Windows 10 Home,3.7,31940
28 | HP,,,i3,11th,8.0,512 GB SSD,Windows 11 Home,4.3,42440
29 | ASUS,Ryzen,3,3250U,3rd,8.0,256 GB SSD,Windows 10 Home,4.3,35940
30 | HP,Pavilion,Ryzen,5600H,-,8.0,512 GB SSD,Windows 11 Home,4.5,59940
31 | Lenovo,IdeaPad,3,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.3,39940
32 | Lenovo,,,i3,11th,4.0,256 GB SSD,Windows 10 Home,4.3,36940
33 | Lenovo,Thinkpad,Ryzen,5600U,-,8.0,512 GB SSD,DOS,4.1,57440
34 | HP,,,i5,11th,8.0,512 GB SSD,Windows 11 Home,4.1,55940
35 | ASUS,ZenBook,Duo,i5,11th,16.0,512 GB SSD,Windows 10 Home,4.4,114940
36 | Lenovo,,,i3,10th,8.0,512 GB SSD,Windows 11 Home,4.3,62140
37 | DELL,Vostro,,i5,11th,8.0,1 TB HDD,Windows 10 Home,3.6,50840
38 | Lenovo,IdeaPad,3,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.2,40940
39 | HP,Ryzen,5,-,(8,8.0,512 GB SSD,Windows 11 Home,4.6,48940
40 | HP,Envy,,i5,10th,8.0,512 GB SSD,Windows 10 Home,4.3,74940
41 | DELL,Inspiron,,i5,11th,16.0,512 GB SSD,Windows 10,4.4,67740
42 | ASUS,Vivobook,14,i3,11th,4.0,256 GB SSD,Windows 10 Home,4.2,36940
43 | ASUS,Chromebook,Celeron,-,(4,4.0,32 GB EMMC Storage,Chrome OS,3.6,17640
44 | Lenovo,Ideapad,Slim,i5,11th,16.0,512 GB SSD,Windows 10 Home,4.4,59940
45 | ASUS,Vivobook,15,i3,11th,8.0,1 TB HDD,Windows 10 Home,2.9,42750
46 | APPLE,2020,Macbook,,,,,,4.7,84940
47 | DELL,Inspiron,,i5,11th,16.0,512 GB SSD,Windows 10,4.2,69040
48 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,256 GB SSD,4.1,48940
49 | DELL,3000,,i3,11th,8.0,1 TB HDD,Windows 11 Home,4.2,43850
50 | ASUS,Vivobook,14,-,(8,8.0,256 GB SSD,Windows 11 Home,4.3,32940
51 | ASUS,VivoBook,K15,i5,11th,16.0,1 TB HDD,256 GB SSD,4.7,66940
52 | Lenovo,Ideapad,Gaming,i5,10th,8.0,1 TB HDD,256 GB SSD,4.4,64940
53 | Lenovo,Ideapad,Gaming,5600H,-,8.0,512 GB SSD,Windows 11 Home,4.5,60440
54 | Lenovo,IDEAPAD,3,AMD,Ryzen™,8.0,512 GB SSD,Windows 10,3.7,59850
55 | Lenovo,IdeaPad,3,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.3,40940
56 | Lenovo,v15,,i3,10th,4.0,1 TB HDD,DOS,3.1,39949
57 | ASUS,VivoBook,15,i5,11th,8.0,1 TB HDD,256 GB SSD,4.3,52940
58 | Lenovo,Ideapad,530s,i5,8th,8.0,512 GB SSD,Windows 10 Home,4.4,59949
59 | HP,Pavilion,Gaming,i7,11th,16.0,512 GB SSD,Windows 10 Home,4.4,98140
60 | MSI,,,i3,11th,8.0,512 GB SSD,Windows 10 Home,4.4,48740
61 | DELL,Vostro,,i5,11th,8.0,1 TB HDD,Windows 10 Home,3.6,50840
62 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,256 GB SSD,4.4,45340
63 | Lenovo,IdeaPad,3,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.3,39940
64 | DELL,Vostro,,i3,11th,8.0,256 GB SSD,Windows 10,4.3,41740
65 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7,39040
66 | HP,Pavilion,Gaming,4800H,-,16.0,1 TB HDD,256 GB SSD,4.5,75940
67 | ASUS,VivoBook,K15,i5,11th,16.0,1 TB HDD,256 GB SSD,4.3,65940
68 | APPLE,MacBook,Air,,,,,,4.5,105940
69 | ASUS,VivoBook,15,i3,11th,4.0,256 GB SSD,Windows 10 Home,4.2,36940
70 | ASUS,VivoBook,K15,i3,11th,8.0,256 GB SSD,Windows 11 Home,4.5,46940
71 | DELL,,,i3,10th,8.0,1 TB HDD,256 GB SSD,2.9,46840
72 | APPLE,2020,Macbook,,,,,,4.6,129990
73 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7,39040
74 | DELL,Inspiron,,i5,11th,8.0,512 GB SSD,Windows 11 Home,4.4,59150
75 | Lenovo,Ideapad,Slim,i5,11th,16.0,512 GB SSD,Windows 10 Home,4.4,59940
76 | MSI,GF63,Thin,i5,10th,8.0,512 GB SSD,Windows 10 Home,4.5,65940
77 | ASUS,Vivobook,Gaming,i7,10th,8.0,512 GB SSD,Windows 10 Home,3.7,61940
78 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7,39040
79 | Lenovo,,,i3,10th,8.0,512 GB SSD,Windows 11 Home,4.3,62140
80 | ASUS,Vivobook,14,i3,11th,8.0,256 GB SSD,Windows 10 Home,4.3,37840
81 | ASUS,Ryzen,5,5500U,-,8.0,512 GB SSD,Windows 10 Home,4.5,49940
82 | DELL,Vostro,,i3,10th,8.0,512 GB SSD,Windows 10,3.9,43040
83 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7,39040
84 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7,39040
85 | Lenovo,APU,Dual,A9,A99425,4.0,1 TB HDD,DOS,3.9,21846
86 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,256 GB SSD,4.4,44440
87 | DELL,Vostro,,i3,11th,4.0,1 TB HDD,256 GB SSD,4.1,43126
88 | DELL,Vostro,,i5,11th,8.0,1 TB HDD,Windows 10 Home,3.6,50840
89 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 10,4.0,42540
90 | DELL,Vostro,Ryzen,R3-3250U,-,,,,3.9,37340
91 | DELL,Inspiron,,i3,11th,8.0,1 TB HDD,Windows 11 Home,3.7,39040
92 | APPLE,2020,Macbook,,,,,,4.7,110940
93 |
--------------------------------------------------------------------------------
/part3/ch2/ch2_anova.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyOUTcimp/KUlGXgMsT+YVQR"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part3/ch2/ch2_anova.ipynb)"],"metadata":{"id":"8nZruVfngrdb"}},{"cell_type":"markdown","source":["# 1. 일원 분산 분석"],"metadata":{"id":"noR3fjbYSw1_"}},{"cell_type":"markdown","source":["### 1. 기본학습"],"metadata":{"id":"80IBaqHZS1JH"}},{"cell_type":"code","source":["import pandas as pd\n","df = pd.DataFrame({\n"," 'A': [10.5, 11.3, 10.8, 9.6, 11.1, 10.2, 10.9, 11.4, 10.5, 10.3],\n"," 'B': [11.9, 12.4, 12.1, 13.2, 12.5, 11.8, 12.2, 12.9, 12.4, 12.3],\n"," 'C': [11.2, 11.7, 11.6, 10.9, 11.3, 11.1, 10.8, 11.5, 11.4, 11.0],\n"," 'D': [9.8, 9.4, 9.1, 9.5, 9.6, 9.9, 9.2, 9.7, 9.3, 9.4]\n","})\n","print(df.head(2))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"8Tix8IOgjleA","executionInfo":{"status":"ok","timestamp":1719884010828,"user_tz":-540,"elapsed":413,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"1bc7425f-2413-4cec-e03f-152883a2959a"},"execution_count":30,"outputs":[{"output_type":"stream","name":"stdout","text":[" A B C D\n","0 10.5 11.9 11.2 9.8\n","1 11.3 12.4 11.7 9.4\n"]}]},{"cell_type":"code","execution_count":31,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ie4pxfxlWEvK","executionInfo":{"status":"ok","timestamp":1719884011190,"user_tz":-540,"elapsed":2,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"4aaa08bb-980a-43a0-f340-ded79e0104b6"},"outputs":[{"output_type":"stream","name":"stdout","text":["=== 정규성 검정 ===\n","ShapiroResult(statistic=0.9649055004119873, pvalue=0.840017557144165)\n","ShapiroResult(statistic=0.9468040466308594, pvalue=0.63086998462677)\n","ShapiroResult(statistic=0.9701647162437439, pvalue=0.8923683762550354)\n","ShapiroResult(statistic=0.9752339720726013, pvalue=0.9346861243247986)\n","\n"," === 등분산 검정 ===\n","LeveneResult(statistic=1.9355354288758708, pvalue=0.14127835331346628)\n","\n"," === 일원 분산 분석 ===\n","F_onewayResult(statistic=89.12613851177174, pvalue=1.001838152252373e-16)\n"]}],"source":["from scipy import stats\n","\n","print(\"=== 정규성 검정 ===\")\n","print(stats.shapiro(df['A']))\n","print(stats.shapiro(df['B']))\n","print(stats.shapiro(df['C']))\n","print(stats.shapiro(df['D']))\n","\n","print(\"\\n === 등분산 검정 ===\")\n","print(stats.levene(df['A'], df['B'], df['C'], df['D']))\n","\n","print(\"\\n === 일원 분산 분석 ===\")\n","print(stats.f_oneway(df['A'], df['B'], df['C'], df['D']))"]},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"fertilizer.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part3/ch2/fertilizer.csv\")"],"metadata":{"id":"9mnmdtnzFxd0","executionInfo":{"status":"ok","timestamp":1719884311261,"user_tz":-540,"elapsed":344,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}}},"execution_count":35,"outputs":[]},{"cell_type":"code","source":["from statsmodels.formula.api import ols\n","from statsmodels.stats.anova import anova_lm\n","model = ols('성장 ~ C(비료)', df).fit()\n","print(anova_lm(model))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"NT_kg6YdJ96X","executionInfo":{"status":"ok","timestamp":1719884328036,"user_tz":-540,"elapsed":343,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"d5f49195-129e-4c1b-ad94-c02df1c36f9e"},"execution_count":36,"outputs":[{"output_type":"stream","name":"stdout","text":[" df sum_sq mean_sq F PR(>F)\n","C(비료) 3.0 43.21875 14.406250 89.126139 1.001838e-16\n","Residual 36.0 5.81900 0.161639 NaN NaN\n"]}]},{"cell_type":"markdown","source":["# 이원 분산 분석"],"metadata":{"id":"zE_cfuF4TJD7"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"tree.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part3/ch2/tree.csv\")\n","print(df.sample(10))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"d-87C0HTjR9d","executionInfo":{"status":"ok","timestamp":1719883433968,"user_tz":-540,"elapsed":566,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"a93b5c03-9cea-49c6-da5e-42fca9d13156"},"execution_count":19,"outputs":[{"output_type":"stream","name":"stdout","text":[" 나무 비료 성장률\n","59 B 3 70.755451\n","106 D 2 86.861859\n","78 C 2 63.917608\n","65 C 1 73.562400\n","3 A 1 65.230299\n","10 A 2 48.365823\n","107 D 2 69.745778\n","118 D 3 82.428228\n","112 D 3 71.602302\n","18 A 2 43.919759\n"]}]},{"cell_type":"code","source":["import statsmodels.api as sm\n","from statsmodels.formula.api import ols\n","\n","model = ols('성장률 ~ 나무 + 비료 + 나무:비료', data=df).fit()\n","anova_table = sm.stats.anova_lm(model)\n","print(anova_table)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"40ToLh4Xe0vA","executionInfo":{"status":"ok","timestamp":1719883433968,"user_tz":-540,"elapsed":10,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"17cb1d31-be34-4021-acf0-660e8b1c1056"},"execution_count":20,"outputs":[{"output_type":"stream","name":"stdout","text":[" df sum_sq mean_sq F PR(>F)\n","나무 3.0 4783.353938 1594.451313 18.391274 9.016693e-10\n","비료 1.0 873.322002 873.322002 10.073374 1.942421e-03\n","나무:비료 3.0 394.801585 131.600528 1.517952 2.137666e-01\n","Residual 112.0 9709.960792 86.696078 NaN NaN\n"]}]},{"cell_type":"code","source":["import statsmodels.api as sm\n","from statsmodels.formula.api import ols\n","\n","model = ols('성장률 ~ C(나무) + C(비료) + C(나무):C(비료)', data=df).fit()\n","anova_table = sm.stats.anova_lm(model)\n","print(anova_table)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"kE4qc4wtkbj9","executionInfo":{"status":"ok","timestamp":1719883433968,"user_tz":-540,"elapsed":7,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"290d2038-c380-49d0-c2d1-394e56ded6af"},"execution_count":21,"outputs":[{"output_type":"stream","name":"stdout","text":[" df sum_sq mean_sq F PR(>F)\n","C(나무) 3.0 4783.353938 1594.451313 18.855528 6.600012e-10\n","C(비료) 2.0 1127.924259 563.962129 6.669256 1.857612e-03\n","C(나무):C(비료) 6.0 717.520672 119.586779 1.414199 2.157357e-01\n","Residual 108.0 9132.639448 84.561476 NaN NaN\n"]}]},{"cell_type":"code","source":["print(format(6.600012e-10, '.11f'))\n","print(format(1.857612e-03, '.11f'))\n","print(format(2.157357e-01, '.11f'))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"B6lwreZut_JA","executionInfo":{"status":"ok","timestamp":1719883433968,"user_tz":-540,"elapsed":6,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"091d18c5-5f87-4cd1-fdaf-5d3b76407cf3"},"execution_count":22,"outputs":[{"output_type":"stream","name":"stdout","text":["0.00000000066\n","0.00185761200\n","0.21573570000\n"]}]},{"cell_type":"code","source":["model = ols('성장률 ~ C(나무) * C(비료)', data=df).fit()\n","anova_table = sm.stats.anova_lm(model)\n","print(anova_table)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"zzz8pEIyjky7","executionInfo":{"status":"ok","timestamp":1719883433968,"user_tz":-540,"elapsed":5,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"b6c2a751-7bc0-4a49-9b69-876da2c465e0"},"execution_count":23,"outputs":[{"output_type":"stream","name":"stdout","text":[" df sum_sq mean_sq F PR(>F)\n","C(나무) 3.0 4783.353938 1594.451313 18.855528 6.600012e-10\n","C(비료) 2.0 1127.924259 563.962129 6.669256 1.857612e-03\n","C(나무):C(비료) 6.0 717.520672 119.586779 1.414199 2.157357e-01\n","Residual 108.0 9132.639448 84.561476 NaN NaN\n"]}]}]}
--------------------------------------------------------------------------------
/part3/ch2/fertilizer.csv:
--------------------------------------------------------------------------------
1 | 비료,성장
2 | A,10.5
3 | A,11.3
4 | A,10.8
5 | A,9.6
6 | A,11.1
7 | A,10.2
8 | A,10.9
9 | A,11.4
10 | A,10.5
11 | A,10.3
12 | B,11.9
13 | B,12.4
14 | B,12.1
15 | B,13.2
16 | B,12.5
17 | B,11.8
18 | B,12.2
19 | B,12.9
20 | B,12.4
21 | B,12.3
22 | C,11.2
23 | C,11.7
24 | C,11.6
25 | C,10.9
26 | C,11.3
27 | C,11.1
28 | C,10.8
29 | C,11.5
30 | C,11.4
31 | C,11.0
32 | D,9.8
33 | D,9.4
34 | D,9.1
35 | D,9.5
36 | D,9.6
37 | D,9.9
38 | D,9.2
39 | D,9.7
40 | D,9.3
41 | D,9.4
42 |
--------------------------------------------------------------------------------
/part3/ch2/tree.csv:
--------------------------------------------------------------------------------
1 | 나무,비료,성장률
2 | A,1,54.96714153011233
3 | A,1,48.61735698828815
4 | A,1,56.47688538100692
5 | A,1,65.23029856408026
6 | A,1,47.658466252766644
7 | A,1,47.6586304305082
8 | A,1,65.79212815507391
9 | A,1,57.67434729152909
10 | A,1,45.30525614065048
11 | A,1,55.42560043585965
12 | A,2,48.365823071875376
13 | A,2,48.34270246429743
14 | A,2,55.41962271566034
15 | A,2,33.86719755342202
16 | A,2,35.75082167486967
17 | A,2,47.37712470759027
18 | A,2,42.87168879665576
19 | A,2,56.142473325952736
20 | A,2,43.91975924478789
21 | A,2,38.87696298664709
22 | A,3,70.65648768921554
23 | A,3,53.74223699513465
24 | A,3,56.67528204687924
25 | A,3,41.75251813786544
26 | A,3,50.556172754748175
27 | A,3,57.10922589709866
28 | A,3,44.490064225776976
29 | A,3,59.75698018345672
30 | A,3,49.99361310081195
31 | A,3,53.083062502067236
32 | B,1,48.98293387770603
33 | B,1,73.52278184508938
34 | B,1,54.86502775262066
35 | B,1,44.422890710440996
36 | B,1,63.225449121031886
37 | B,1,42.791563500289776
38 | B,1,57.08863595004755
39 | B,1,35.403298761202244
40 | B,1,41.71813951101569
41 | B,1,56.96861235869123
42 | B,2,65.3846657999541
43 | B,2,59.7136828118997
44 | B,2,56.843517176117594
45 | B,2,54.98896304410711
46 | B,2,43.214780096325725
47 | B,2,50.801557916052914
48 | B,2,53.393612290402125
49 | B,2,68.57122226218915
50 | B,2,61.43618289568462
51 | B,2,40.36959844637266
52 | B,3,64.24083969394795
53 | B,3,57.149177195836835
54 | B,3,54.23077999694041
55 | B,3,67.11676288840869
56 | B,3,71.30999522495951
57 | B,3,70.31280119116198
58 | B,3,52.607824767773614
59 | B,3,57.907876241487855
60 | B,3,64.31263431403565
61 | B,3,70.75545127122359
62 | C,1,55.2082576215471
63 | C,1,58.14341023336183
64 | C,1,48.93665025993972
65 | C,1,48.037933759193294
66 | C,1,68.12525822394198
67 | C,1,73.56240028570824
68 | C,1,59.27989878419666
69 | C,1,70.03532897892023
70 | C,1,63.61636025047634
71 | C,1,53.54880245394876
72 | C,2,66.61395605508415
73 | C,2,78.38036566465969
74 | C,2,62.64173960890049
75 | C,2,78.64643655814007
76 | C,2,36.80254895910255
77 | C,2,71.21902504375224
78 | C,2,63.870470682381715
79 | C,2,60.009926495341325
80 | C,2,63.91760776535502
81 | C,2,43.124310853991076
82 | C,3,63.80328112162488
83 | C,3,69.57112571511746
84 | C,3,80.77894044741517
85 | C,3,60.81729781726352
86 | C,3,57.91506397106812
87 | C,3,60.982429564154636
88 | C,3,75.15402117702074
89 | C,3,69.28751109659684
90 | C,3,60.70239796232961
91 | C,3,71.13267433113356
92 | D,1,65.9707754934804
93 | D,1,74.68644990532889
94 | D,1,57.979469061226474
95 | D,1,61.723378534022316
96 | D,1,61.078918468678424
97 | D,1,50.364850518678814
98 | D,1,67.96120277064577
99 | D,1,67.6105527217989
100 | D,1,65.05113456642461
101 | D,1,62.65412866624853
102 | D,2,53.84629257949586
103 | D,2,63.79354677234641
104 | D,2,64.57285483473231
105 | D,2,59.97722730778381
106 | D,2,66.3871428833399
107 | D,2,72.04050856814538
108 | D,2,86.8618590121053
109 | D,2,69.74577812831839
110 | D,2,70.57550390722764
111 | D,2,67.25554084233832
112 | D,3,51.812287847009586
113 | D,3,70.73486124550783
114 | D,3,71.60230209941027
115 | D,3,95.63242112485287
116 | D,3,69.07639035218878
117 | D,3,74.01547342333612
118 | D,3,70.65288230294757
119 | D,3,59.31321962380468
120 | D,3,82.4282281451502
121 | D,3,78.51933032686775
122 |
--------------------------------------------------------------------------------
/part3/ch4/study.csv:
--------------------------------------------------------------------------------
1 | study_hours,material_type,score
2 | 71,강의,95
3 | 34,독학,63
4 | 91,도서,95
5 | 80,독학,80
6 | 40,강의,79
7 | 94,강의,100
8 | 94,도서,99
9 | 43,독학,63
10 | 22,강의,72
11 | 41,도서,68
12 | 72,강의,99
13 | 21,강의,66
14 | 49,독학,65
15 | 57,강의,93
16 | 21,독학,48
17 | 83,강의,100
18 | 79,도서,89
19 | 40,강의,78
20 | 52,독학,62
21 | 95,독학,87
22 | 77,도서,85
23 | 41,강의,81
24 | 68,강의,91
25 | 78,도서,92
26 | 61,강의,88
27 | 79,강의,98
28 | 99,독학,91
29 | 34,강의,73
30 | 81,강의,100
31 | 81,도서,93
32 | 66,도서,78
33 | 81,독학,80
34 | 70,강의,95
35 | 74,강의,98
36 | 83,강의,98
37 | 22,독학,47
38 | 70,독학,76
39 | 26,도서,63
40 | 40,독학,60
41 | 92,강의,100
42 | 58,강의,87
43 | 37,독학,59
44 | 23,강의,72
45 | 79,강의,97
46 | 33,독학,61
47 | 28,독학,55
48 | 72,강의,93
49 | 21,강의,72
50 | 79,강의,97
51 | 90,도서,96
52 | 63,도서,84
53 | 27,도서,61
54 | 66,독학,75
55 | 54,도서,78
56 | 97,독학,90
57 | 100,독학,94
58 | 55,도서,76
59 | 69,도서,82
60 | 23,도서,59
61 | 21,강의,68
62 | 25,도서,62
63 | 73,강의,97
64 | 23,독학,52
65 | 73,강의,98
66 | 82,도서,91
67 | 37,독학,62
68 | 63,독학,70
69 | 53,독학,73
70 | 93,강의,100
71 | 81,도서,88
72 | 33,독학,53
73 | 67,독학,74
74 | 34,독학,56
75 | 91,강의,100
76 | 97,강의,100
77 | 81,도서,90
78 | 59,강의,87
79 | 99,독학,85
80 | 72,도서,84
81 | 43,독학,63
82 | 45,독학,63
83 | 79,강의,96
84 | 60,도서,80
85 | 48,강의,84
86 | 34,강의,74
87 | 64,도서,82
88 | 84,독학,82
89 | 90,독학,82
90 | 28,도서,64
91 | 20,독학,51
92 | 27,도서,66
93 | 82,도서,93
94 | 30,강의,71
95 | 100,도서,97
96 | 27,강의,74
97 | 54,독학,68
98 | 54,독학,68
99 | 52,독학,75
100 | 24,독학,53
101 | 60,강의,92
102 |
--------------------------------------------------------------------------------
/part3/ch5/ch5_logistic_regression.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": []
7 | },
8 | "kernelspec": {
9 | "name": "python3",
10 | "display_name": "Python 3"
11 | },
12 | "language_info": {
13 | "name": "python"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "markdown",
19 | "source": [
20 | "[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part3/ch5/ch5_logistic_regression.ipynb)"
21 | ],
22 | "metadata": {
23 | "id": "nFYUmBR9hu_R"
24 | }
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "source": [
29 | "# 로지스틱 회귀분석"
30 | ],
31 | "metadata": {
32 | "id": "FwzB2swKZo2j"
33 | }
34 | },
35 | {
36 | "cell_type": "code",
37 | "source": [
38 | "import pandas as pd\n",
39 | "# df = pd.read_csv(\"health_survey.csv\")\n",
40 | "df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part3/ch5/health_survey.csv\")\n",
41 | "\n",
42 | "print(df.head())"
43 | ],
44 | "metadata": {
45 | "colab": {
46 | "base_uri": "https://localhost:8080/"
47 | },
48 | "id": "JuSKhrlWfVaI",
49 | "outputId": "d23e18d5-6aa4-4f44-ff22-5c2a6fce1a8e"
50 | },
51 | "execution_count": 1,
52 | "outputs": [
53 | {
54 | "output_type": "stream",
55 | "name": "stdout",
56 | "text": [
57 | " age bmi smoker activity_level disease\n",
58 | "0 62 35.179089 0 0 1\n",
59 | "1 65 18.576042 0 2 1\n",
60 | "2 71 33.178426 0 1 1\n",
61 | "3 18 37.063007 1 2 0\n",
62 | "4 21 17.613266 0 0 0\n"
63 | ]
64 | }
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "source": [
70 | "from statsmodels.formula.api import logit\n",
71 | "\n",
72 | "model = logit('disease ~ age + bmi', data=df).fit()\n",
73 | "print(model.summary())"
74 | ],
75 | "metadata": {
76 | "colab": {
77 | "base_uri": "https://localhost:8080/"
78 | },
79 | "id": "y00i8apYgU5b",
80 | "outputId": "40b4482e-f809-444e-9e38-54161c1190d0"
81 | },
82 | "execution_count": 2,
83 | "outputs": [
84 | {
85 | "output_type": "stream",
86 | "name": "stdout",
87 | "text": [
88 | "Optimization terminated successfully.\n",
89 | " Current function value: 0.643725\n",
90 | " Iterations 5\n",
91 | " Logit Regression Results \n",
92 | "==============================================================================\n",
93 | "Dep. Variable: disease No. Observations: 1000\n",
94 | "Model: Logit Df Residuals: 997\n",
95 | "Method: MLE Df Model: 2\n",
96 | "Date: Mon, 05 Aug 2024 Pseudo R-squ.: 0.04996\n",
97 | "Time: 15:30:22 Log-Likelihood: -643.72\n",
98 | "converged: True LL-Null: -677.58\n",
99 | "Covariance Type: nonrobust LLR p-value: 1.984e-15\n",
100 | "==============================================================================\n",
101 | " coef std err z P>|z| [0.025 0.975]\n",
102 | "------------------------------------------------------------------------------\n",
103 | "Intercept -1.8700 0.289 -6.482 0.000 -2.435 -1.305\n",
104 | "age 0.0177 0.004 4.747 0.000 0.010 0.025\n",
105 | "bmi 0.0563 0.009 6.418 0.000 0.039 0.074\n",
106 | "==============================================================================\n"
107 | ]
108 | }
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "source": [
114 | "import numpy as np\n",
115 | "print(model.params['bmi'])\n",
116 | "print(np.exp(model.params['bmi']))"
117 | ],
118 | "metadata": {
119 | "colab": {
120 | "base_uri": "https://localhost:8080/"
121 | },
122 | "id": "WBltnh1-gU8M",
123 | "outputId": "6bad8637-f1fc-4e26-813e-3d5b55271245"
124 | },
125 | "execution_count": 3,
126 | "outputs": [
127 | {
128 | "output_type": "stream",
129 | "name": "stdout",
130 | "text": [
131 | "0.056333879687088535\n",
132 | "1.057950853075076\n"
133 | ]
134 | }
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "source": [
140 | "print(model.llf)"
141 | ],
142 | "metadata": {
143 | "colab": {
144 | "base_uri": "https://localhost:8080/"
145 | },
146 | "id": "_icMMT0zfOan",
147 | "outputId": "8f53d796-0f9b-49a6-aa81-faeb1e290280"
148 | },
149 | "execution_count": 4,
150 | "outputs": [
151 | {
152 | "output_type": "stream",
153 | "name": "stdout",
154 | "text": [
155 | "-643.7246164682088\n"
156 | ]
157 | }
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "source": [
163 | "print(-2 * model.llf)"
164 | ],
165 | "metadata": {
166 | "colab": {
167 | "base_uri": "https://localhost:8080/"
168 | },
169 | "id": "Xl9ltoyZfYW6",
170 | "outputId": "06aae68f-95ed-4060-f3d7-ef33e6c31520"
171 | },
172 | "execution_count": 5,
173 | "outputs": [
174 | {
175 | "output_type": "stream",
176 | "name": "stdout",
177 | "text": [
178 | "1287.4492329364175\n"
179 | ]
180 | }
181 | ]
182 | },
183 | {
184 | "cell_type": "code",
185 | "source": [],
186 | "metadata": {
187 | "id": "aCxBIez0hLB8"
188 | },
189 | "execution_count": 5,
190 | "outputs": []
191 | }
192 | ]
193 | }
--------------------------------------------------------------------------------
/part3/ch6/customer_travel.csv:
--------------------------------------------------------------------------------
1 | age,service,social,booked,target
2 | 34,6,0,1,0
3 | 34,5,1,0,1
4 | 37,3,1,0,0
5 | 30,2,0,0,0
6 | 30,1,0,0,0
7 | 27,1,0,1,1
8 | 34,4,1,1,0
9 | 34,2,1,0,1
10 | 30,3,0,1,0
11 | 36,1,0,0,1
12 | 34,1,1,1,0
13 | 28,2,0,0,1
14 | 35,1,1,1,0
15 | 34,4,0,0,0
16 | 34,5,0,0,0
17 | 37,6,0,1,0
18 | 30,1,1,1,0
19 | 30,1,1,0,0
20 | 31,1,0,1,0
21 | 37,2,1,0,1
22 | 30,4,0,1,0
23 | 31,1,0,0,1
24 | 34,1,1,0,0
25 | 30,2,0,0,0
26 | 34,1,0,1,0
27 | 38,1,0,1,0
28 | 37,3,1,0,0
29 | 30,5,1,0,0
30 | 28,1,1,0,0
31 | 34,1,0,0,0
32 | 33,6,0,1,0
33 | 34,2,0,0,0
34 | 27,3,1,0,0
35 | 35,1,0,0,1
36 | 30,4,0,0,0
37 | 36,2,0,1,0
38 | 34,1,1,1,0
39 | 37,1,1,0,1
40 | 37,3,0,0,0
41 | 36,2,0,0,0
42 | 27,5,0,1,0
43 | 36,4,0,0,0
44 | 28,1,1,1,0
45 | 30,2,0,0,0
46 | 27,3,0,0,0
47 | 37,6,0,1,1
48 | 27,1,1,0,0
49 | 38,2,1,0,0
50 | 30,4,0,1,0
51 | 34,1,0,0,0
52 | 34,3,0,1,1
53 | 31,2,0,0,0
54 | 34,1,1,0,0
55 | 30,5,0,0,0
56 | 31,1,0,1,0
57 | 28,4,1,1,1
58 | 30,3,1,1,0
59 | 37,1,1,0,1
60 | 36,1,0,0,0
61 | 36,2,0,0,0
62 | 34,6,0,1,0
63 | 35,1,0,0,0
64 | 30,4,1,0,0
65 | 29,2,0,0,0
66 | 33,1,1,1,0
67 | 28,1,0,1,1
68 | 33,5,1,1,0
69 | 37,2,1,0,1
70 | 31,3,0,0,0
71 | 34,4,0,0,1
72 | 37,1,0,1,0
73 | 30,2,0,0,0
74 | 30,1,1,1,0
75 | 30,1,1,0,1
76 | 30,3,0,0,0
77 | 37,6,0,1,0
78 | 31,4,1,0,0
79 | 34,1,1,0,0
80 | 34,1,0,1,0
81 | 34,5,0,0,0
82 | 28,3,0,1,0
83 | 27,1,0,0,1
84 | 30,1,1,0,0
85 | 37,4,0,0,0
86 | 35,1,0,1,0
87 | 27,1,0,1,1
88 | 35,3,1,0,0
89 | 30,2,1,0,0
90 | 37,1,0,1,0
91 | 30,1,0,0,0
92 | 29,6,0,1,0
93 | 30,2,1,0,1
94 | 30,5,1,0,0
95 | 36,1,0,0,1
96 | 37,1,0,0,0
97 | 28,2,0,1,1
98 | 30,1,1,1,0
99 | 31,4,1,0,1
100 | 31,3,0,0,0
101 | 31,2,0,0,0
102 | 30,1,1,1,0
103 | 36,1,0,0,0
104 | 31,1,1,1,0
105 | 28,2,0,0,1
106 | 30,4,0,1,0
107 | 30,6,0,1,1
108 | 37,1,1,0,0
109 | 37,2,1,0,0
110 | 28,1,0,1,0
111 | 27,1,1,0,1
112 | 34,3,0,1,0
113 | 30,4,0,0,0
114 | 33,1,1,1,0
115 | 31,1,0,0,0
116 | 31,1,0,1,0
117 | 31,2,0,1,0
118 | 30,3,1,0,0
119 | 34,1,1,0,1
120 | 34,5,1,0,0
121 | 34,2,0,0,0
122 | 30,6,0,1,0
123 | 28,1,0,0,1
124 | 35,3,1,0,0
125 | 29,2,0,0,0
126 | 35,1,0,0,0
127 | 31,4,0,1,0
128 | 35,1,1,1,0
129 | 29,2,1,0,1
130 | 34,3,0,1,0
131 | 31,1,0,0,1
132 | 30,1,0,1,0
133 | 30,5,0,0,0
134 | 30,4,1,1,0
135 | 27,1,0,0,1
136 | 30,3,0,0,0
137 | 30,6,0,1,0
138 | 31,1,1,1,0
139 | 31,1,1,0,0
140 | 30,1,0,1,0
141 | 38,4,0,0,0
142 | 30,3,0,1,0
143 | 27,1,0,0,1
144 | 35,1,1,0,0
145 | 28,2,0,0,1
146 | 34,5,0,1,0
147 | 30,1,1,1,1
148 | 33,4,1,0,0
149 | 34,2,1,0,0
150 | 27,1,0,0,0
151 | 30,1,0,0,0
152 | 30,6,0,1,0
153 | 31,2,0,0,0
154 | 30,3,1,1,0
155 | 27,4,0,0,1
156 | 30,1,1,0,0
157 | 37,2,0,1,0
158 | 30,1,1,1,0
159 | 30,5,1,0,1
160 | 34,3,0,0,0
161 | 29,2,0,0,0
162 | 34,4,0,1,0
163 | 37,1,0,0,0
164 | 37,1,1,1,0
165 | 35,2,1,0,1
166 | 30,3,0,0,0
167 | 36,6,0,1,1
168 | 30,1,1,0,0
169 | 34,4,1,0,0
170 | 29,1,0,1,0
171 | 27,1,0,0,1
172 | 34,5,0,1,0
173 | 30,2,0,0,0
174 | 29,1,1,0,0
175 | 28,1,0,0,1
176 | 37,4,0,1,0
177 | 33,2,0,1,0
178 | 37,3,1,1,1
179 | 30,1,1,0,1
180 | 35,1,0,0,0
181 | 28,2,0,0,1
182 | 30,6,0,1,0
183 | 28,4,1,0,1
184 | 38,3,1,0,0
185 | 27,5,0,0,1
186 | 27,1,0,0,0
187 | 30,1,0,1,0
188 | 30,1,1,1,0
189 | 30,2,1,0,1
190 | 28,4,0,0,0
191 | 28,1,0,0,1
192 | 29,1,1,1,0
193 | 33,2,0,0,0
194 | 30,1,1,1,0
195 | 37,1,0,0,0
196 | 36,3,0,0,0
197 | 27,6,0,1,1
198 | 37,5,1,0,1
199 | 31,1,1,0,0
200 | 27,1,0,1,0
201 | 33,2,1,0,1
202 | 29,3,0,1,0
203 | 34,1,0,0,1
204 | 30,4,1,0,0
205 | 30,2,0,0,0
206 | 35,1,0,1,0
207 | 31,1,0,1,0
208 | 27,3,1,0,0
209 | 34,2,1,0,0
210 | 31,1,1,1,0
211 | 27,5,0,0,1
212 | 30,6,0,1,0
213 | 29,2,0,0,0
214 | 30,3,1,0,0
215 | 28,1,0,0,1
216 | 29,1,0,0,0
217 | 36,2,0,1,0
218 | 37,4,1,1,0
219 | 30,1,1,0,1
220 | 28,3,0,0,0
221 | 34,2,0,0,0
222 | 38,1,0,1,0
223 | 30,1,0,0,0
224 | 30,5,1,1,0
225 | 33,4,0,0,0
226 | 34,3,0,1,0
227 | 34,6,0,1,1
228 | 37,1,1,0,0
229 | 37,2,1,0,0
230 | 37,1,0,1,0
231 | 37,1,0,0,0
232 | 31,4,0,1,0
233 | 34,2,0,0,0
234 | 30,1,1,1,0
235 | 34,1,0,0,0
236 | 37,1,0,1,0
237 | 33,5,1,1,1
238 | 28,3,1,0,0
239 | 33,4,1,0,1
240 | 31,1,0,0,0
241 | 28,2,0,0,1
242 | 29,6,0,1,0
243 | 38,1,0,0,0
244 | 36,3,1,0,0
245 | 28,2,0,0,1
246 | 29,4,1,0,0
247 | 30,1,0,1,0
248 | 34,1,1,1,0
249 | 29,2,1,0,1
250 | 37,5,0,1,1
251 | 30,1,0,0,1
252 | 37,1,0,1,0
253 | 30,4,0,0,0
254 | 31,1,1,1,0
255 | 30,1,1,0,1
256 | 30,3,0,0,0
257 | 37,6,0,1,0
258 | 37,1,1,1,0
259 | 36,1,1,0,0
260 | 31,4,0,1,0
261 | 36,2,0,0,0
262 | 28,3,0,1,0
263 | 30,5,0,0,1
264 | 28,1,1,0,0
265 | 38,2,0,0,0
266 | 31,1,0,1,0
267 | 29,4,0,1,0
268 | 34,3,1,0,0
269 | 36,2,1,0,0
270 | 37,1,0,0,0
271 | 29,1,0,0,0
272 | 37,6,0,1,0
273 | 38,2,1,0,1
274 | 30,4,1,1,0
275 | 37,1,0,0,1
276 | 35,5,0,0,1
277 | 28,2,0,1,1
278 | 31,1,1,1,0
279 | 34,1,1,0,1
280 | 35,3,0,0,0
281 | 35,4,0,0,0
282 | 35,1,1,1,0
283 | 28,1,0,0,1
284 | 28,1,1,1,0
285 | 28,2,0,0,1
286 | 30,3,0,0,0
287 | 36,6,0,1,1
288 | 30,4,1,0,0
289 | 31,5,1,0,0
290 | 31,1,0,1,0
291 | 28,1,1,0,1
292 | 35,3,0,1,0
293 | 37,2,0,0,0
294 | 29,1,1,0,0
295 | 34,4,0,0,0
296 | 31,1,0,1,0
297 | 30,2,0,1,0
298 | 35,3,1,1,0
299 | 38,1,1,0,1
300 | 27,1,1,0,0
301 | 29,2,0,0,0
302 | 35,6,0,1,1
303 | 29,1,0,0,0
304 | 37,3,1,0,1
305 | 34,2,0,0,0
306 | 37,1,0,1,0
307 | 28,1,0,1,1
308 | 36,1,1,1,0
309 | 30,4,1,0,1
310 | 37,3,0,0,0
311 | 36,1,0,0,1
312 | 33,1,0,1,0
313 | 37,2,0,0,0
314 | 35,1,1,1,0
315 | 27,5,0,0,1
316 | 28,4,0,0,0
317 | 30,6,0,1,0
318 | 34,1,1,0,0
319 | 35,1,1,0,0
320 | 37,1,0,1,0
321 | 27,2,0,0,1
322 | 38,3,0,1,0
323 | 30,4,0,0,1
324 | 30,1,1,0,0
325 | 36,2,0,0,0
326 | 30,1,0,1,0
327 | 34,1,1,1,1
328 | 30,5,1,0,0
329 | 37,2,1,0,0
330 | 37,4,0,1,0
331 | 30,1,0,0,0
332 | 38,6,0,1,0
333 | 36,2,0,0,0
334 | 34,3,1,0,0
335 | 34,1,0,0,1
336 | 31,1,1,0,0
337 | 30,4,0,1,0
338 | 35,1,1,1,0
339 | 33,1,1,0,1
340 | 30,3,0,0,0
341 | 31,5,0,0,0
342 | 29,1,0,1,0
343 | 30,1,0,0,0
344 | 30,4,1,1,0
345 | 30,2,1,0,1
346 | 35,3,0,1,1
347 | 31,6,0,1,1
348 | 27,1,1,0,0
349 | 37,2,1,0,0
350 | 34,1,0,1,0
351 | 30,4,0,0,0
352 | 34,3,0,1,0
353 | 31,2,0,0,0
354 | 34,5,1,1,0
355 | 34,1,0,0,0
356 | 27,1,0,1,0
357 | 30,2,0,1,0
358 | 37,4,1,0,0
359 | 37,1,1,0,1
360 | 37,1,0,0,0
361 | 27,2,0,0,1
362 | 30,6,0,1,0
363 | 31,1,1,0,1
364 | 30,3,1,0,0
365 | 37,4,0,0,0
366 | 34,1,0,0,0
367 | 27,5,0,1,1
368 | 30,1,1,1,0
369 | 34,2,1,0,1
370 | 38,3,0,1,0
371 | 35,1,0,0,1
372 | 34,4,1,1,0
373 | 29,2,0,0,0
374 | 31,1,1,1,0
375 | 30,1,0,0,0
376 | 30,3,0,0,0
377 | 30,6,0,1,0
378 | 34,1,1,1,0
379 | 29,4,1,0,0
380 | 27,5,0,1,0
381 | 29,2,1,0,1
382 | 30,3,0,1,0
383 | 31,1,0,0,1
384 | 34,1,1,0,0
385 | 31,2,0,0,0
386 | 36,4,0,1,0
387 | 37,1,0,1,0
388 | 30,3,1,0,0
389 | 30,2,1,0,0
390 | 30,1,1,0,0
391 | 34,1,0,0,0
392 | 36,6,0,1,0
393 | 30,5,0,0,0
394 | 34,3,1,1,0
395 | 31,1,0,0,1
396 | 30,1,0,0,0
397 | 35,2,0,1,0
398 | 36,1,1,1,0
399 | 27,1,1,0,1
400 | 30,4,0,0,0
401 | 38,2,0,0,0
402 | 31,1,0,1,0
403 | 37,1,0,0,0
404 | 34,1,1,1,0
405 | 30,2,0,0,0
406 | 36,5,0,0,1
407 | 35,6,0,1,1
408 | 30,1,1,0,0
409 | 27,2,1,0,1
410 | 34,1,0,1,0
411 | 30,1,0,0,0
412 | 30,3,0,1,0
413 | 30,2,0,0,0
414 | 30,4,1,0,0
415 | 27,1,0,0,1
416 | 36,1,0,1,0
417 | 30,2,1,1,1
418 | 34,3,1,1,0
419 | 34,5,1,0,1
420 | 37,1,0,0,0
421 | 27,4,0,0,1
422 | 37,6,0,1,0
423 | 30,1,0,0,0
424 | 37,3,1,0,0
425 | 27,2,0,0,1
426 | 31,1,1,1,0
427 | 31,1,0,1,0
428 | 27,4,1,1,0
429 | 27,2,1,0,1
430 | 30,3,0,0,0
431 | 35,1,0,0,1
432 | 28,5,0,1,0
433 | 30,2,0,0,0
434 | 37,1,1,1,0
435 | 30,4,1,0,1
436 | 37,3,0,0,0
437 | 37,6,0,1,0
438 | 30,1,1,0,0
439 | 29,1,1,0,0
440 | 36,1,0,1,0
441 | 30,2,0,0,0
442 | 37,4,0,1,0
443 | 37,1,0,0,1
444 | 31,1,1,0,0
445 | 27,5,0,0,1
446 | 30,1,0,1,0
447 | 29,1,0,1,0
448 | 30,3,1,0,0
449 | 31,4,1,0,0
450 | 36,1,0,1,0
451 | 34,1,0,0,0
452 | 34,6,0,1,0
453 | 30,2,1,0,1
454 | 37,3,1,0,0
455 | 34,1,0,0,1
456 | 30,4,0,0,0
457 | 38,2,0,1,0
458 | 28,5,1,1,0
459 | 27,1,1,0,1
460 | 28,3,0,0,0
461 | 37,2,0,0,0
462 | 27,1,1,1,0
463 | 38,4,0,0,0
464 | 28,1,1,1,0
465 | 36,2,0,0,0
466 | 37,3,0,1,0
467 | 28,6,0,1,1
468 | 30,1,1,0,0
469 | 28,2,1,0,1
470 | 30,4,0,1,0
471 | 27,5,1,0,1
472 | 30,3,0,1,0
473 | 28,2,0,0,1
474 | 30,1,1,1,0
475 | 27,1,0,0,1
476 | 34,1,0,1,0
477 | 37,4,0,1,0
478 | 36,3,1,0,0
479 | 37,1,1,0,1
480 | 31,1,1,0,0
481 | 37,2,0,0,0
482 | 37,6,0,1,0
483 | 31,1,0,0,0
484 | 35,5,1,0,1
485 | 30,2,0,0,0
486 | 36,1,0,0,0
487 | 29,1,0,1,0
488 | 36,1,1,1,0
489 | 37,2,1,0,1
490 | 30,3,0,1,0
491 | 33,4,0,0,1
492 | 31,1,0,1,0
493 | 30,2,0,0,0
494 | 37,1,1,1,0
495 | 31,1,0,0,0
496 | 37,3,0,0,0
497 | 34,6,0,1,0
498 | 28,4,1,1,0
499 | 30,1,1,0,0
500 | 31,1,0,1,0
501 | 35,2,0,0,0
502 | 28,3,0,1,0
503 | 34,1,0,0,1
504 | 30,1,1,0,0
505 | 34,4,0,0,0
506 | 31,1,0,1,0
507 | 30,1,1,1,1
508 | 30,3,1,0,0
509 | 35,2,1,0,0
510 | 30,5,0,0,0
511 | 30,1,0,0,0
512 | 34,6,0,1,0
513 | 34,2,0,0,0
514 | 31,3,1,1,0
515 | 29,1,0,0,1
516 | 30,1,1,0,0
517 | 29,2,0,1,0
518 | 34,1,1,1,0
519 | 34,4,1,0,1
520 | 31,3,0,0,0
521 | 27,2,0,0,1
522 | 38,1,0,1,0
523 | 30,5,0,0,0
524 | 38,1,1,1,0
525 | 36,2,1,0,1
526 | 35,4,0,0,0
527 | 28,6,0,1,1
528 | 34,1,1,0,0
529 | 30,2,1,0,0
530 | 27,1,0,1,0
531 | 30,1,0,0,0
532 | 31,3,0,1,0
533 | 30,4,0,0,0
534 | 34,1,1,0,0
535 | 31,1,0,0,0
536 | 29,5,0,1,0
537 | 34,2,0,1,0
538 | 33,3,1,1,0
539 | 31,1,1,0,1
540 | 30,4,0,0,0
541 | 36,2,0,0,0
542 | 28,6,0,1,0
543 | 31,1,1,0,1
544 | 29,3,1,0,0
545 | 37,2,0,0,0
546 | 34,1,0,1,0
547 | 28,4,0,1,1
548 | 30,1,1,1,0
549 | 28,5,1,0,1
550 | 29,3,0,0,0
551 | 30,1,0,0,1
552 | 37,1,1,1,0
553 | 27,2,0,0,1
554 | 29,4,1,1,0
555 | 36,1,0,0,0
556 | 35,3,0,0,1
557 | 28,6,0,1,1
558 | 36,1,1,0,0
559 | 29,1,1,0,0
560 | 37,1,0,1,0
561 | 31,4,1,0,1
562 | 28,5,0,1,0
563 | 31,1,0,0,1
564 | 36,1,1,0,0
565 | 36,2,0,0,0
566 | 31,1,0,1,0
567 | 30,1,0,1,0
568 | 29,4,1,0,0
569 | 38,2,1,0,0
570 | 38,1,1,1,0
571 | 29,1,0,0,0
572 | 27,6,0,1,0
573 | 35,2,0,0,0
574 | 30,3,1,0,0
575 | 29,5,0,0,1
576 | 31,1,0,0,0
577 | 31,2,0,1,0
578 | 34,1,1,1,0
579 | 29,1,1,0,1
580 | 37,3,0,0,0
581 | 36,2,0,0,0
582 | 34,4,0,1,0
583 | 30,1,0,0,0
584 | 31,1,1,1,0
585 | 31,2,0,0,0
586 | 31,3,0,1,0
587 | 37,6,0,1,1
588 | 30,5,1,0,0
589 | 31,4,1,0,0
590 | 38,1,0,1,0
591 | 28,1,0,0,1
592 | 30,3,0,1,0
593 | 27,2,0,0,1
594 | 27,1,1,0,0
595 | 30,1,0,0,0
596 | 36,4,0,1,0
597 | 27,2,1,1,1
598 | 34,3,1,0,1
599 | 37,1,1,0,1
600 | 38,1,0,0,0
601 | 37,5,0,0,1
602 | 37,6,0,1,0
603 | 37,4,0,0,0
604 | 37,3,1,0,0
605 | 36,2,0,0,0
606 | 35,1,1,0,0
607 | 30,1,0,1,0
608 | 30,1,1,1,0
609 | 30,2,1,0,1
610 | 30,4,0,1,0
611 | 30,1,0,0,1
612 | 34,1,0,1,0
613 | 30,2,0,0,0
614 | 30,5,1,1,0
615 | 34,1,1,0,1
616 | 37,3,0,0,0
617 | 30,6,0,1,0
618 | 29,1,1,1,0
619 | 33,1,1,0,0
620 | 34,1,0,1,0
621 | 38,2,0,0,0
622 | 31,3,0,1,0
623 | 30,1,0,0,1
624 | 35,4,1,0,0
625 | 30,2,0,0,0
626 | 37,1,0,1,0
627 | 34,5,0,1,0
628 | 30,3,1,0,0
629 | 29,2,1,0,0
630 | 30,1,0,0,0
631 | 27,4,0,0,1
632 | 30,6,0,1,0
633 | 36,2,1,0,1
634 | 37,3,1,1,0
635 | 30,1,0,0,1
636 | 37,1,0,0,0
637 | 30,2,0,1,0
638 | 27,4,1,1,0
639 | 37,1,1,0,1
640 | 28,5,0,0,0
641 | 34,2,0,0,0
642 | 29,1,1,1,0
643 | 37,1,0,0,0
644 | 28,1,1,1,0
645 | 30,4,0,0,0
646 | 35,3,0,0,0
647 | 30,6,0,1,1
648 | 30,1,1,0,0
649 | 37,2,1,0,0
650 | 30,1,0,1,0
651 | 30,1,1,0,1
652 | 28,4,0,1,0
653 | 36,5,0,0,1
654 | 36,1,1,0,0
655 | 29,1,0,0,0
656 | 35,1,0,1,0
657 | 30,2,0,1,0
658 | 34,3,1,1,0
659 | 30,4,1,0,1
660 | 33,1,1,0,0
661 | 37,2,0,0,0
662 | 30,6,0,1,0
663 | 30,1,0,0,0
664 | 28,3,1,0,0
665 | 35,2,0,0,0
666 | 37,5,0,1,1
667 | 30,1,0,1,0
668 | 30,1,1,1,0
669 | 28,2,1,0,1
670 | 36,3,0,0,0
671 | 28,1,0,0,1
672 | 30,1,0,1,0
673 | 31,4,0,0,0
674 | 29,1,1,1,0
675 | 33,1,0,0,0
676 | 29,3,0,0,0
677 | 29,6,0,1,0
678 | 31,1,1,0,0
679 | 29,5,1,0,0
680 | 27,4,0,1,0
681 | 28,2,0,0,1
682 | 31,3,0,1,0
683 | 30,1,0,0,1
684 | 28,1,1,0,0
685 | 30,2,0,0,0
686 | 27,1,0,1,0
687 | 37,4,1,1,1
688 | 36,3,1,0,0
689 | 30,2,1,0,0
690 | 36,1,0,1,0
691 | 29,1,0,0,0
692 | 36,6,0,1,1
693 | 34,2,0,0,0
694 | 34,4,1,0,0
695 | 29,1,0,0,1
696 | 37,1,1,0,0
697 | 30,2,0,1,0
698 | 35,1,1,1,0
699 | 27,1,1,0,1
700 | 36,3,0,0,0
701 | 35,4,0,0,0
702 | 30,1,0,1,0
703 | 31,1,0,0,0
704 | 30,1,1,1,0
705 | 29,5,1,0,1
706 | 36,3,0,1,0
707 | 29,6,0,1,1
708 | 37,4,1,0,0
709 | 30,2,1,0,0
710 | 37,1,0,1,0
711 | 36,1,0,0,0
712 | 28,3,0,1,0
713 | 37,2,0,0,0
714 | 30,1,1,1,0
715 | 30,4,0,0,0
716 | 30,1,0,1,0
717 | 30,2,0,1,0
718 | 31,5,1,0,0
719 | 30,1,1,0,1
720 | 29,1,0,0,0
721 | 37,2,0,0,0
722 | 29,6,0,1,0
723 | 37,1,1,0,1
724 | 36,3,1,0,1
725 | 31,2,0,0,0
726 | 27,1,0,0,0
727 | 33,1,0,1,0
728 | 27,1,1,1,0
729 | 37,4,1,0,1
730 | 30,3,0,1,0
731 | 29,5,0,0,1
732 | 34,1,1,1,0
733 | 30,2,0,0,0
734 | 33,1,1,1,0
735 | 29,1,0,0,0
736 | 37,4,0,0,0
737 | 30,6,0,1,0
738 | 33,1,1,1,0
739 | 30,1,1,0,0
740 | 30,1,0,1,0
741 | 36,2,1,0,1
742 | 37,3,0,1,0
743 | 37,4,0,0,1
744 | 37,5,1,0,1
745 | 31,2,0,0,0
746 | 31,1,0,1,0
747 | 30,1,0,1,0
748 | 29,3,1,0,0
749 | 37,2,1,0,0
750 | 30,4,1,0,0
751 | 37,1,0,0,0
752 | 38,6,0,1,0
753 | 30,2,0,0,0
754 | 30,3,1,1,0
755 | 36,1,0,0,1
756 | 35,1,0,0,0
757 | 31,5,0,1,0
758 | 30,1,1,1,0
759 | 29,1,1,0,1
760 | 36,3,0,0,0
761 | 37,2,0,0,0
762 | 29,1,0,1,0
763 | 28,1,0,0,1
764 | 30,4,1,1,0
765 | 30,2,0,0,0
766 | 29,3,0,0,0
767 | 31,6,0,1,1
768 | 37,1,1,0,0
769 | 30,2,1,0,0
770 | 37,5,0,1,1
771 | 33,4,0,0,0
772 | 30,3,0,1,0
773 | 31,2,0,0,0
774 | 38,1,1,0,0
775 | 34,1,0,0,0
776 | 30,1,0,1,0
777 | 35,2,1,1,1
778 | 27,4,1,0,0
779 | 29,1,1,0,1
780 | 29,1,0,0,0
781 | 27,2,0,0,1
782 | 34,6,0,1,0
783 | 29,5,0,0,0
784 | 30,3,1,0,0
785 | 31,4,0,0,0
786 | 34,1,1,1,0
787 | 31,1,0,1,0
788 | 31,1,1,1,0
789 | 37,2,1,0,1
790 | 37,3,0,0,0
791 | 30,1,0,0,1
792 | 35,4,0,1,0
793 | 38,2,0,0,0
794 | 30,1,1,1,0
795 | 30,1,1,0,1
796 | 28,5,0,0,0
797 | 35,6,0,1,0
798 | 30,1,1,0,0
799 | 30,4,1,0,0
800 | 37,1,0,1,0
801 | 28,4,0,0,1
--------------------------------------------------------------------------------
/part3/ch6/math.csv:
--------------------------------------------------------------------------------
1 | groups,scores
2 | group_A,85
3 | group_A,88
4 | group_A,90
5 | group_A,82
6 | group_A,87
7 | group_A,89
8 | group_A,92
9 | group_A,86
10 | group_A,88
11 | group_A,90
12 | group_B,78
13 | group_B,80
14 | group_B,79
15 | group_B,81
16 | group_B,80
17 | group_B,78
18 | group_B,77
19 | group_B,79
20 | group_B,78
21 | group_B,82
22 | group_C,80
23 | group_C,82
24 | group_C,85
25 | group_C,84
26 | group_C,81
27 | group_C,86
28 | group_C,82
29 | group_C,81
30 | group_C,83
31 | group_C,80
32 | group_D,85
33 | group_D,84
34 | group_D,86
35 | group_D,87
36 | group_D,85
37 | group_D,86
38 | group_D,84
39 | group_D,85
40 | group_D,87
41 | group_D,86
42 |
--------------------------------------------------------------------------------
/part3/ch6/tomato2.csv:
--------------------------------------------------------------------------------
1 | 비료유형,물주기,수확량
2 | A,1,514
3 | A,1,480
4 | A,1,507
5 | A,2,452
6 | A,2,526
7 | A,2,457
8 | A,3,506
9 | A,3,502
10 | A,3,482
11 | A,4,595
12 | A,4,491
13 | A,4,523
14 | B,1,538
15 | B,1,469
16 | B,1,545
17 | B,2,504
18 | B,2,538
19 | B,2,481
20 | B,3,480
21 | B,3,547
22 | B,3,526
23 | B,4,518
24 | B,4,533
25 | B,4,530
26 | C,1,475
27 | C,1,444
28 | C,1,460
29 | C,2,459
30 | C,2,446
31 | C,2,494
32 | C,3,500
33 | C,3,515
34 | C,3,522
35 | C,4,507
36 | C,4,511
37 | C,4,521
38 |
--------------------------------------------------------------------------------
/part4/ch2/members.csv:
--------------------------------------------------------------------------------
1 | id,age,city,f1,f2,f3,f4,f5,subscribed,views
2 | id01,2,서울,,0,gold,ENFJ,91.29779092,2021-07-16,6820
3 | id02,9,서울,70,1,,ENFJ,60.33982554,2021-05-12,2534
4 | id03,27,서울,61,1,gold,ISTJ,17.25298557,2021-03-16,7312
5 | id04,75,서울,,2,,INFP,52.66707799,2021-07-21,493
6 | id05,24,서울,85,2,,ISFJ,29.26986926,2021-03-07,1338
7 | id06,22,서울,57,0,vip,INTP,20.1294441,2021-09-12,21550
8 | id07,36.3,서울,60,1,,ISFJ,9.796377581,2021-01-11,61
9 | id08,38,서울,101,1,silver,INFJ,83.68538032,2021-03-06,3260
10 | id09,3.3,서울,35,2,,ESFJ,17.25298557,2021-03-21,2764
11 | id10,95,서울,74,1,gold,ISFP,98.42989897,2021-04-03,9992
12 | id100,47,경기,53,0,vip,ESFP,33.30899901,2021-02-21,15535
13 | id11,40,서울,68,0,gold,ENFP,98.42989897,2021-10-29,6752
14 | id12,20,서울,,0,,ESTP,91.29779092,2021-11-30,1367
15 | id13,15,서울,68,0,gold,ESFJ,83.68538032,2021-12-30,5643
16 | id14,77,서울,50,1,gold,ENTJ,67.8863732,2021-09-19,5700
17 | id15,22,서울,67,1,gold,ENTP,9.796377581,2021-05-26,7676
18 | id16,68,서울,85,0,gold,ESFP,16.2838541,2021-07-25,9472
19 | id17,74,서울,,1,gold,ISTP,67.8863732,2021-10-26,9441
20 | id18,41,서울,87,2,gold,ISFJ,80.13828012,2021-03-03,7933
21 | id19,53,서울,,0,gold,ISFP,83.68538032,2021-12-24,5287
22 | id20,11,서울,51,1,,INTJ,91.29779092,2021-07-16,
23 | id21,90,부산,,1,gold,ISFP,29.26986926,2021-05-03,9690
24 | id22,-6.3,부산,72,1,gold,ENFP,52.66707799,2021-02-09,6147
25 | id23,34,부산,75,1,gold,ISTP,69.73031281,2021-05-21,6236
26 | id24,80,부산,44,0,gold,INFJ,73.58639712,2021-09-11,5976
27 | id25,34,부산,,0,gold,ESTP,60.33982554,2021-07-12,8954
28 | id26,55,부산,57,1,gold,ENFP,83.68538032,2021-05-01,5857
29 | id27,37,부산,60,0,silver,ESTP,73.58639712,2021-10-13,4255
30 | id28,38,부산,34,1,gold,ENTP,80.13828012,2021-10-31,5068
31 | id29,-13.5,부산,47,2,gold,ENTP,67.8863732,2021-08-28,6793
32 | id30,16,부산,,0,,ESTJ,17.25298557,2021-05-28,240
33 | id31,86,부산,77,0,gold,ESFJ,73.58639712,2021-02-11,8014
34 | id32,25,부산,64,0,vip,ISFJ,13.04992129,2021-05-24,17421
35 | id33,47,부산,94,0,silver,ENFJ,17.25298557,2021-04-02,3880
36 | id34,65,부산,,1,silver,INFP,48.43118381,2021-02-01,3163
37 | id35,30,부산,,2,silver,ESTJ,33.30899901,2021-06-10,3084
38 | id36,68,부산,77,1,gold,INTP,13.04992129,2021-07-20,9713
39 | id37,100,부산,,0,silver,ESTP,33.30899901,2021-07-08,4068
40 | id38,87,부산,,1,,ESTP,83.68538032,2021-06-21,1048
41 | id39,56,부산,50,0,,INFJ,33.30899901,2021-12-22,
42 | id40,56,대구,75,0,gold,ENFP,17.25298557,2021-01-22,8481
43 | id41,81,대구,55,0,gold,ENFJ,37.11373918,2021-10-04,8640
44 | id42,65,대구,48,2,gold,ESTP,33.30899901,2021-02-09,5999
45 | id43,23,대구,60,0,silver,ISTP,29.26986926,2021-05-18,3878
46 | id44,44,대구,,0,,INTP,16.2838541,2021-11-10,546
47 | id45,97,대구,88,0,gold,ENFJ,13.04992129,2021-06-21,8317
48 | id46,93,대구,,0,gold,ESTJ,67.8863732,2021-05-23,9711
49 | id47,34.6,대구,75,1,gold,ESTJ,90.49699927,2021-05-28,8628
50 | id48,18,대구,,0,,ENFP,20.1294441,2021-03-25,
51 | id49,75,대구,88,0,gold,INTP,37.11373918,2021-03-31,9737
52 | id50,86,대구,78,1,,ENFP,60.33982554,2021-12-05,1935
53 | id51,36,대구,,0,gold,ISTJ,83.68538032,2021-08-20,7217
54 | id52,97,대구,82,1,gold,ISFJ,90.49699927,2021-05-20,8518
55 | id53,52,대구,50,0,gold,ESTP,20.1294441,2021-09-09,7012
56 | id54,53,대구,,1,gold,ENFJ,69.73031281,2021-06-21,5872
57 | id55,75,대구,63,2,gold,ENTP,13.04992129,2021-02-06,6042
58 | id56,59,대구,,1,vip,ESTJ,73.58639712,2021-04-05,19589
59 | id57,3,대구,111,0,silver,ISFJ,29.26986926,2021-01-12,4421
60 | id58,0,대구,100,2,,ESTP,33.30899901,2021-04-18,1928
61 | id59,64,대구,,1,silver,ESFJ,20.1294441,2021-06-23,4994
62 | id60,56,경기,,0,gold,ESFP,52.66707799,2021-11-24,6794
63 | id61,87,경기,62,2,,INTP,69.73031281,2021-02-03,218
64 | id62,52,경기,,0,,INTP,60.33982554,2021-04-10,2100
65 | id63,88,경기,86,1,silver,ISFJ,73.58639712,2021-12-01,4053
66 | id64,43,경기,62,2,gold,ESFP,73.58639712,2021-02-22,5995
67 | id65,26.5,경기,,0,silver,ISFP,91.29779092,2021-01-10,3336
68 | id66,87,경기,,1,gold,ISFP,17.25298557,2021-08-05,8471
69 | id67,66,경기,52,1,,ISFJ,73.58639712,2021-06-17,1159
70 | id68,35,경기,45,2,gold,ISFP,67.8863732,2021-07-29,8599
71 | id69,75,경기,85,0,,ESTJ,69.73031281,2021-11-14,2708
72 | id70,-9,경기,96,1,silver,ISTP,48.43118381,2021-11-17,4442
73 | id71,35,경기,84,2,gold,ISFP,52.66707799,2021-07-15,8087
74 | id72,8,경기,97,0,,ESTJ,97.38103419,2021-01-30,602
75 | id73,90,경기,,1,,ISFJ,73.58639712,2021-08-12,512
76 | id74,45,경기,98,0,gold,ESTP,52.66707799,2021-05-27,7739
77 | id75,63,경기,47,0,gold,ESTP,20.1294441,2021-06-12,6779
78 | id76,71,경기,12,0,,ENTJ,83.68538032,2021-07-28,2872
79 | id77,77,경기,31,0,,INFP,98.42989897,2021-01-16,1518
80 | id78,92,경기,96,1,gold,INTJ,69.73031281,2021-10-27,7565
81 | id79,30,경기,,0,gold,INTJ,80.13828012,2021-08-14,8777
82 | id80,67,경기,60,0,silver,ISFP,83.68538032,2021-01-14,4381
83 | id81,86,경기,50,1,,ISFJ,37.11373918,2021-09-14,244
84 | id82,48,경기,,0,,ENTJ,37.11373918,2021-10-17,
85 | id83,73,경기,50,1,vip,ENTP,80.13828012,2021-09-26,19139
86 | id84,66,경기,44,0,gold,INTP,83.68538032,2021-12-19,5650
87 | id85,83.6,경기,55,0,gold,INFJ,80.13828012,2021-09-24,6719
88 | id86,2,경기,,0,,ESTP,29.26986926,2021-02-16,2155
89 | id87,19,경기,,1,gold,ISFP,97.38103419,2021-08-30,6516
90 | id88,89,경기,75,0,,ESTJ,60.33982554,2021-01-06,2713
91 | id89,34,경기,66,1,gold,ENTJ,33.30899901,2021-10-14,6119
92 | id90,54,경기,,0,silver,ENTP,29.26986926,2021-04-03,3818
93 | id91,6,경기,72,0,gold,INTP,9.796377581,2021-08-23,8988
94 | id92,97,경기,78,1,gold,INFP,97.38103419,2021-05-08,9625
95 | id93,21.8,경기,57,0,,ISFP,73.58639712,2021-06-07,42
96 | id94,84,경기,,1,silver,ESTJ,90.49699927,2021-08-16,3774
97 | id95,77,경기,43,1,gold,INTJ,91.29779092,2021-05-21,8697
98 | id96,92,경기,53,1,silver,ENTJ,52.66707799,2021-05-06,4336
99 | id97,100,경기,,0,gold,INFP,67.8863732,2021-03-18,6687
100 | id98,39,경기,58,2,,INFP,98.42989897,2021-10-02,865
101 | id99,1,경기,47,0,gold,ESFJ,97.38103419,2021-12-02,6090
102 |
--------------------------------------------------------------------------------
/part4/ch2/p2_type1.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyM8nWiAL6LDgoxZWnlkMmm+"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch2/p2_type1.ipynb)\n","\n","\n"],"metadata":{"id":"8B9gqkLvwvAs"}},{"cell_type":"markdown","source":["## 작업형1"],"metadata":{"id":"uO07g2QeKprH"}},{"cell_type":"markdown","source":["### 문제1"],"metadata":{"id":"6f9i0g9dKvQQ"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"members.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch2/members.csv\")"],"metadata":{"id":"P_qNqyD3W-6P"},"execution_count":null,"outputs":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ShDssNLn6Dw8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1713098382739,"user_tz":-540,"elapsed":9,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"3f4ce496-cc1f-4d92-952e-2e97172cd7bf"},"outputs":[{"output_type":"stream","name":"stdout","text":["5674.04\n"]}],"source":["import pandas as pd\n","# df = pd.read_csv(\"members.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch2/members.csv\")\n","\n","# 1) views 컬럼 내림차순 정렬\n","df.sort_values('views', ascending=False, inplace=True)\n","# 2) 상위 10개 중에서 10번째 (최소값) 값 구하기\n","min_value = df['views'][:10].min()\n","\n","# 3) 상위 10개 중에서 10번째 (최소값) 값 대체\n","df.iloc[:10,-1] = min_value\n","\n","# 4) age가 80 이상의 views 컬럼 평균\n","cond = df['age'] >= 80\n","result = df[cond]['views'].mean()\n","\n","# 5) 반올림하여 소수 둘째자리까지 계산\n","print(round(result, 2))"]},{"cell_type":"markdown","source":["### 문제2"],"metadata":{"id":"ccvjEw-aKy39"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"members.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch2/members.csv\")\n","\n","# 1) 앞에서 부터 80% 데이터 슬라이싱\n","line = int(len(df) * 0.8)\n","df = df.iloc[:line]\n","\n","# 2) 결측치 채우기 전 'f1' 컬럼 표준편차 구하기\n","std1 = df['f1'].std()\n","\n","# 3) 중앙값으로 결측치 채우기\n","med=df['f1'].median()\n","df['f1'] = df['f1'].fillna(med)\n","\n","# 4) 결측치를 채운 후 'f1' 컬럼 표준편차 구하기\n","std2 = df['f1'].std()\n","\n","# 5) 두 표준편차 차이 절대값 계산 (반올림하여 소수 둘째자리까지 계산)\n","result = abs(std1-std2)\n","print(round(result,2))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-3VwHCDKKjRl","executionInfo":{"status":"ok","timestamp":1713098382739,"user_tz":-540,"elapsed":7,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"8b5e4d6d-4fbf-4465-d485-f92da340375a"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["3.56\n"]}]},{"cell_type":"code","source":["# 표본 표준편차(판다스) vs 모 표준편차(넘파이)\n","import numpy as np\n","# 기본값\n","print(\"판다스 표본 표준편차\", df['f1'].std())\n","print(\"넘파이 모 표준편차\", np.std(df['f1']))\n","print(\"---------------------------------- \")\n","# ddof변경\n","print(\"판다스 모 표준편차\", df['f1'].std(ddof=0))\n","print(\"넘파이 표본 표준편차\", np.std(df['f1'],ddof=1))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"4pflsVkWnxfy","executionInfo":{"status":"ok","timestamp":1713098382739,"user_tz":-540,"elapsed":7,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"6aa140b2-2748-4b30-c6fe-69b02d658705"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["판다스 표본 표준편차 17.010788646613268\n","넘파이 모 표준편차 16.90413688272785\n","---------------------------------- \n","판다스 모 표준편차 16.90413688272785\n","넘파이 표본 표준편차 17.010788646613268\n"]}]},{"cell_type":"markdown","source":["### 문제3."],"metadata":{"id":"U2rmG2jaK_v3"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"members.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch2/members.csv\")\n","\n","# 1) 표준편차, 평균값 구하기\n","std = df['age'].std()\n","mean = df['age'].mean()\n","\n","# 2) 이상치 최저, 최고 기준 구하기\n","lower = mean - (std * 1.5)\n","upper = mean + (std * 1.5)\n","\n","# 3) 이상치를 벗어나는 값(조건) 찾기\n","cond1 = df['age'] < lower\n","cond2 = df['age'] > upper\n","\n","# 4) 조건에 만족하는 이상치 age합\n","print(df[cond1|cond2]['age'].sum())"],"metadata":{"id":"WgzH-3yB6I26","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1713098382739,"user_tz":-540,"elapsed":6,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"ac09c5da-b6d5-4462-a80b-d9d868cfefda"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["473.5\n"]}]}]}
--------------------------------------------------------------------------------
/part4/ch3/members.csv:
--------------------------------------------------------------------------------
1 | id,age,city,f1,f2,f3,f4,f5,subscribed,views
2 | id01,2,서울,,0,gold,ENFJ,91.29779092,2021-07-16,6820
3 | id02,9,서울,70,1,,ENFJ,60.33982554,2021-05-12,2534
4 | id03,27,서울,61,1,gold,ISTJ,17.25298557,2021-03-16,7312
5 | id04,75,서울,,2,,INFP,52.66707799,2021-07-21,493
6 | id05,24,서울,85,2,,ISFJ,29.26986926,2021-03-07,1338
7 | id06,22,서울,57,0,vip,INTP,20.1294441,2021-09-12,21550
8 | id07,36.3,서울,60,1,,ISFJ,9.796377581,2021-01-11,61
9 | id08,38,서울,101,1,silver,INFJ,83.68538032,2021-03-06,3260
10 | id09,3.3,서울,35,2,,ESFJ,17.25298557,2021-03-21,2764
11 | id10,95,서울,74,1,gold,ISFP,98.42989897,2021-04-03,9992
12 | id100,47,경기,53,0,vip,ESFP,33.30899901,2021-02-21,15535
13 | id11,40,서울,68,0,gold,ENFP,98.42989897,2021-10-29,6752
14 | id12,20,서울,,0,,ESTP,91.29779092,2021-11-30,1367
15 | id13,15,서울,68,0,gold,ESFJ,83.68538032,2021-12-30,5643
16 | id14,77,서울,50,1,gold,ENTJ,67.8863732,2021-09-19,5700
17 | id15,22,서울,67,1,gold,ENTP,9.796377581,2021-05-26,7676
18 | id16,68,서울,85,0,gold,ESFP,16.2838541,2021-07-25,9472
19 | id17,74,서울,,1,gold,ISTP,67.8863732,2021-10-26,9441
20 | id18,41,서울,87,2,gold,ISFJ,80.13828012,2021-03-03,7933
21 | id19,53,서울,,0,gold,ISFP,83.68538032,2021-12-24,5287
22 | id20,11,서울,51,1,,INTJ,91.29779092,2021-07-16,
23 | id21,90,부산,,1,gold,ISFP,29.26986926,2021-05-03,9690
24 | id22,-6.3,부산,72,1,gold,ENFP,52.66707799,2021-02-09,6147
25 | id23,34,부산,75,1,gold,ISTP,69.73031281,2021-05-21,6236
26 | id24,80,부산,44,0,gold,INFJ,73.58639712,2021-09-11,5976
27 | id25,34,부산,,0,gold,ESTP,60.33982554,2021-07-12,8954
28 | id26,55,부산,57,1,gold,ENFP,83.68538032,2021-05-01,5857
29 | id27,37,부산,60,0,silver,ESTP,73.58639712,2021-10-13,4255
30 | id28,38,부산,34,1,gold,ENTP,80.13828012,2021-10-31,5068
31 | id29,-13.5,부산,47,2,gold,ENTP,67.8863732,2021-08-28,6793
32 | id30,16,부산,,0,,ESTJ,17.25298557,2021-05-28,240
33 | id31,86,부산,77,0,gold,ESFJ,73.58639712,2021-02-11,8014
34 | id32,25,부산,64,0,vip,ISFJ,13.04992129,2021-05-24,17421
35 | id33,47,부산,94,0,silver,ENFJ,17.25298557,2021-04-02,3880
36 | id34,65,부산,,1,silver,INFP,48.43118381,2021-02-01,3163
37 | id35,30,부산,,2,silver,ESTJ,33.30899901,2021-06-10,3084
38 | id36,68,부산,77,1,gold,INTP,13.04992129,2021-07-20,9713
39 | id37,100,부산,,0,silver,ESTP,33.30899901,2021-07-08,4068
40 | id38,87,부산,,1,,ESTP,83.68538032,2021-06-21,1048
41 | id39,56,부산,50,0,,INFJ,33.30899901,2021-12-22,
42 | id40,56,대구,75,0,gold,ENFP,17.25298557,2021-01-22,8481
43 | id41,81,대구,55,0,gold,ENFJ,37.11373918,2021-10-04,8640
44 | id42,65,대구,48,2,gold,ESTP,33.30899901,2021-02-09,5999
45 | id43,23,대구,60,0,silver,ISTP,29.26986926,2021-05-18,3878
46 | id44,44,대구,,0,,INTP,16.2838541,2021-11-10,546
47 | id45,97,대구,88,0,gold,ENFJ,13.04992129,2021-06-21,8317
48 | id46,93,대구,,0,gold,ESTJ,67.8863732,2021-05-23,9711
49 | id47,34.6,대구,75,1,gold,ESTJ,90.49699927,2021-05-28,8628
50 | id48,18,대구,,0,,ENFP,20.1294441,2021-03-25,
51 | id49,75,대구,88,0,gold,INTP,37.11373918,2021-03-31,9737
52 | id50,86,대구,78,1,,ENFP,60.33982554,2021-12-05,1935
53 | id51,36,대구,,0,gold,ISTJ,83.68538032,2021-08-20,7217
54 | id52,97,대구,82,1,gold,ISFJ,90.49699927,2021-05-20,8518
55 | id53,52,대구,50,0,gold,ESTP,20.1294441,2021-09-09,7012
56 | id54,53,대구,,1,gold,ENFJ,69.73031281,2021-06-21,5872
57 | id55,75,대구,63,2,gold,ENTP,13.04992129,2021-02-06,6042
58 | id56,59,대구,,1,vip,ESTJ,73.58639712,2021-04-05,19589
59 | id57,3,대구,111,0,silver,ISFJ,29.26986926,2021-01-12,4421
60 | id58,0,대구,100,2,,ESTP,33.30899901,2021-04-18,1928
61 | id59,64,대구,,1,silver,ESFJ,20.1294441,2021-06-23,4994
62 | id60,56,경기,,0,gold,ESFP,52.66707799,2021-11-24,6794
63 | id61,87,경기,62,2,,INTP,69.73031281,2021-02-03,218
64 | id62,52,경기,,0,,INTP,60.33982554,2021-04-10,2100
65 | id63,88,경기,86,1,silver,ISFJ,73.58639712,2021-12-01,4053
66 | id64,43,경기,62,2,gold,ESFP,73.58639712,2021-02-22,5995
67 | id65,26.5,경기,,0,silver,ISFP,91.29779092,2021-01-10,3336
68 | id66,87,경기,,1,gold,ISFP,17.25298557,2021-08-05,8471
69 | id67,66,경기,52,1,,ISFJ,73.58639712,2021-06-17,1159
70 | id68,35,경기,45,2,gold,ISFP,67.8863732,2021-07-29,8599
71 | id69,75,경기,85,0,,ESTJ,69.73031281,2021-11-14,2708
72 | id70,-9,경기,96,1,silver,ISTP,48.43118381,2021-11-17,4442
73 | id71,35,경기,84,2,gold,ISFP,52.66707799,2021-07-15,8087
74 | id72,8,경기,97,0,,ESTJ,97.38103419,2021-01-30,602
75 | id73,90,경기,,1,,ISFJ,73.58639712,2021-08-12,512
76 | id74,45,경기,98,0,gold,ESTP,52.66707799,2021-05-27,7739
77 | id75,63,경기,47,0,gold,ESTP,20.1294441,2021-06-12,6779
78 | id76,71,경기,12,0,,ENTJ,83.68538032,2021-07-28,2872
79 | id77,77,경기,31,0,,INFP,98.42989897,2021-01-16,1518
80 | id78,92,경기,96,1,gold,INTJ,69.73031281,2021-10-27,7565
81 | id79,30,경기,,0,gold,INTJ,80.13828012,2021-08-14,8777
82 | id80,67,경기,60,0,silver,ISFP,83.68538032,2021-01-14,4381
83 | id81,86,경기,50,1,,ISFJ,37.11373918,2021-09-14,244
84 | id82,48,경기,,0,,ENTJ,37.11373918,2021-10-17,
85 | id83,73,경기,50,1,vip,ENTP,80.13828012,2021-09-26,19139
86 | id84,66,경기,44,0,gold,INTP,83.68538032,2021-12-19,5650
87 | id85,83.6,경기,55,0,gold,INFJ,80.13828012,2021-09-24,6719
88 | id86,2,경기,,0,,ESTP,29.26986926,2021-02-16,2155
89 | id87,19,경기,,1,gold,ISFP,97.38103419,2021-08-30,6516
90 | id88,89,경기,75,0,,ESTJ,60.33982554,2021-01-06,2713
91 | id89,34,경기,66,1,gold,ENTJ,33.30899901,2021-10-14,6119
92 | id90,54,경기,,0,silver,ENTP,29.26986926,2021-04-03,3818
93 | id91,6,경기,72,0,gold,INTP,9.796377581,2021-08-23,8988
94 | id92,97,경기,78,1,gold,INFP,97.38103419,2021-05-08,9625
95 | id93,21.8,경기,57,0,,ISFP,73.58639712,2021-06-07,42
96 | id94,84,경기,,1,silver,ESTJ,90.49699927,2021-08-16,3774
97 | id95,77,경기,43,1,gold,INTJ,91.29779092,2021-05-21,8697
98 | id96,92,경기,53,1,silver,ENTJ,52.66707799,2021-05-06,4336
99 | id97,100,경기,,0,gold,INFP,67.8863732,2021-03-18,6687
100 | id98,39,경기,58,2,,INFP,98.42989897,2021-10-02,865
101 | id99,1,경기,47,0,gold,ESFJ,97.38103419,2021-12-02,6090
102 |
--------------------------------------------------------------------------------
/part4/ch3/p3_type1.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyOF0eLeFT4xZzvFsbWZ42Bt"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch3/p3_type1.ipynb)\n"],"metadata":{"id":"LNmnV8r3xZYl"}},{"cell_type":"markdown","source":["## 작업형1"],"metadata":{"id":"uO07g2QeKprH"}},{"cell_type":"markdown","source":["### 문제1"],"metadata":{"id":"6f9i0g9dKvQQ"}},{"cell_type":"code","execution_count":1,"metadata":{"id":"ShDssNLn6Dw8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1717333939429,"user_tz":-540,"elapsed":578,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"9871913f-3b0c-47a0-a3ab-9d2726f36123"},"outputs":[{"output_type":"stream","name":"stdout","text":["57\n"]}],"source":["import pandas as pd\n","# df = pd.read_csv(\"members.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch3/members.csv\")\n","\n","# 1) 결측치가 있는 행 제거 (기본값 axis=0)\n","df = df.dropna()\n","\n","# 2) 앞에서부터 70% 데이터 슬라이싱\n","df = df.iloc[:int(len(df)*0.7)]\n","\n","# 3) 1사분위 값 계산(정수 출력)\n","print(int(df['f1'].quantile(.25)))"]},{"cell_type":"markdown","source":["### 문제2"],"metadata":{"id":"ccvjEw-aKy39"}},{"cell_type":"code","source":["# 방법1\n","import pandas as pd\n","# df = pd.read_csv('year.csv', index_col='Unnamed: 0')\n","df = pd.read_csv('https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch3/year.csv', index_col='Unnamed: 0')\n","\n","# 1) 행을 기준으로 평균을 계산\n","m = df.loc[2000].mean()\n","\n","# 2) 평균보다 큰 값의 합 계산\n","print(sum(df.loc[2000,:] > m))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-3VwHCDKKjRl","executionInfo":{"status":"ok","timestamp":1717333939787,"user_tz":-540,"elapsed":360,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"70c9238f-89cf-41b1-f615-8e46b5408266"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["100\n"]}]},{"cell_type":"code","source":["# 방법2\n","# df = pd.read_csv('year.csv', index_col='Unnamed: 0')\n","df = pd.read_csv('https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch3/year.csv', index_col='Unnamed: 0')\n","\n","# 1) 행과 열을 변경함\n","df = df.T\n","\n","# 2) 평균 계산\n","m = df[2000].mean()\n","\n","# 3) 평균보다 큰 값의 합 계산\n","print(sum(df[2000] > m))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"iYnbTrVNYbjs","executionInfo":{"status":"ok","timestamp":1717333939787,"user_tz":-540,"elapsed":5,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"671a595f-2ecc-465c-b513-d7e68927bb1d"},"execution_count":3,"outputs":[{"output_type":"stream","name":"stdout","text":["100\n"]}]},{"cell_type":"markdown","source":["### 문제3"],"metadata":{"id":"U2rmG2jaK_v3"}},{"cell_type":"code","source":["# 방법1\n","import pandas as pd\n","# df = pd.read_csv(\"members.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch3/members.csv\")\n","\n","# 1) 결측치 수 컬럼별로 확인 및 변수에 대입(시리즈)\n","df_cntNull = df.isnull().sum()\n","\n","# 2) 내림차순 정렬\n","df_cntNull = df_cntNull.sort_values(ascending=False)\n","\n","# 3) 가장 상위에 있는 인덱스명 출력\n","print(df_cntNull.index[0])"],"metadata":{"id":"WgzH-3yB6I26","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1717333939788,"user_tz":-540,"elapsed":5,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"dd746db0-5cc0-42b6-9225-3a6bed976754"},"execution_count":4,"outputs":[{"output_type":"stream","name":"stdout","text":["f1\n"]}]},{"cell_type":"code","source":["# 방법2\n","# df = pd.read_csv(\"members.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch3/members.csv\")\n","\n","# 1) 결측치 수 컬럼별로 확인 및 변수에 대입(시리즈)\n","df_cntNull = df.isnull().sum()\n","\n","# 2) 인덱스 리셋를 활용해 기존 인덱스는 새로운 컬럼으로 변경)\n","df_cntNull = df_cntNull.reset_index()\n","\n","# 3) 출력하고자하는 값을 찾아 출력\n","print(df_cntNull.loc[3, 'index'])"],"metadata":{"id":"L9JYUM9N9u_z","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1717333939788,"user_tz":-540,"elapsed":4,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"dcb941dd-cc0d-4b2d-a9d5-0567eaba4cd0"},"execution_count":5,"outputs":[{"output_type":"stream","name":"stdout","text":["f1\n"]}]}]}
--------------------------------------------------------------------------------
/part4/ch3/year.csv:
--------------------------------------------------------------------------------
1 | ,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199
2 | 1999,137,74,114,140,80,150,16,133,178,181,106,176,163,81,4,195,111,80,166,161,117,141,134,146,10,191,68,11,200,95,74,104,18,107,68,114,43,4,14,59,173,124,108,18,175,30,174,28,144,100,162,116,71,9,138,162,83,112,66,1,111,35,8,180,191,7,108,174,180,189,83,139,109,73,24,197,28,148,47,194,26,106,103,196,115,200,145,33,182,70,158,112,196,41,27,50,34,19,120,33,103,26,43,140,184,159,16,111,34,191,192,168,93,53,95,141,184,85,68,81,164,129,27,119,128,172,103,105,100,162,55,139,193,114,128,57,165,38,39,175,184,119,98,73,23,62,67,180,163,145,68,48,75,187,17,199,74,48,35,54,117,173,76,51,1,40,81,5,165,175,176,72,103,7,175,178,172,152,104,195,15,153,44,111,176,55,39,40,80,85,124,94,118,12,50,191,137,174,56,128
3 | 2000,176,87,64,110,128,16,8,4,123,87,190,146,53,52,21,55,75,131,76,181,72,82,121,182,97,162,86,179,68,36,77,146,155,13,133,134,28,14,108,4,194,197,153,96,16,53,172,125,57,50,184,122,3,3,168,32,99,189,197,27,7,188,120,181,23,172,56,45,68,191,109,14,66,101,58,1,33,72,74,55,74,87,62,112,14,47,68,15,172,173,196,190,79,117,137,141,171,105,186,128,159,194,29,8,147,24,199,120,94,94,171,158,115,141,92,13,86,54,182,117,193,186,171,198,136,38,77,35,93,80,88,74,199,9,85,78,176,25,137,54,94,181,35,156,82,175,74,15,16,52,122,162,106,36,17,190,135,128,169,78,160,137,147,188,130,146,97,7,37,25,133,134,162,155,19,112,170,154,170,103,197,8,55,131,48,117,112,68,57,117,59,22,3,108,17,104,101,161,156,43
4 | 2001,128,132,123,112,134,160,180,47,115,155,50,152,13,137,89,193,156,122,137,88,173,173,57,59,114,99,77,27,41,4,191,172,11,165,45,39,38,181,178,23,151,76,188,19,110,28,39,89,88,100,174,12,116,142,70,141,91,112,72,147,195,142,89,34,179,3,101,171,144,33,122,196,142,145,88,159,167,44,84,173,52,31,123,187,99,187,5,95,193,101,186,30,176,134,14,164,9,122,29,126,38,137,110,130,153,167,14,22,185,54,162,124,100,155,98,3,18,193,199,168,126,121,152,174,131,61,148,16,171,64,169,95,109,45,139,86,179,17,186,151,88,100,147,74,31,200,91,16,32,142,137,162,74,31,20,131,77,106,96,187,102,166,85,179,178,37,39,158,186,17,36,122,6,125,190,156,90,116,68,146,39,136,123,54,186,103,154,46,22,87,160,111,116,75,48,51,81,101,194,73
5 |
--------------------------------------------------------------------------------
/part4/ch4/data4-1.csv:
--------------------------------------------------------------------------------
1 | id,age,city,f1,f2,f3,f4,f5
2 | id01,2,서울,,0,,ENFJ,91.29779092
3 | id02,9,서울,70,1,,ENFJ,60.33982554
4 | id03,27,서울,61,1,,ISTJ,17.25298557
5 | id04,75,서울,,2,,INFP,52.66707799
6 | id05,24,서울,85,2,,ISFJ,29.26986926
7 | id06,22,서울,57,0,vip,INTP,20.1294441
8 | id07,36.3,서울,60,1,,ISFJ,9.796377581
9 | id08,38,서울,101,1,,INFJ,83.68538032
10 | id09,3.3,서울,35,2,,ESFJ,17.25298557
11 | id10,95,서울,74,1,,ISFP,98.42989897
12 | id11,40,서울,68,0,,ENFP,98.42989897
13 | id12,20,서울,,0,,ESTP,91.29779092
14 | id13,15,서울,68,0,,ESFJ,83.68538032
15 | id14,77,서울,50,1,,ENTJ,67.8863732
16 | id15,22,서울,67,1,,ENTP,9.796377581
17 | id16,68,서울,85,0,,ESFP,16.2838541
18 | id17,74,서울,,1,,ISTP,67.8863732
19 | id18,41,서울,87,2,,ISFJ,80.13828012
20 | id19,53,서울,,0,,ISFP,83.68538032
21 | id20,11,서울,51,1,,INTJ,91.29779092
22 | id21,90,부산,,1,,ISFP,29.26986926
23 | id22,-6.3,부산,72,1,,ENFP,52.66707799
24 | id23,34,부산,75,1,,ISTP,69.73031281
25 | id24,80,부산,44,0,,INFJ,73.58639712
26 | id25,34,부산,,0,,ESTP,60.33982554
27 | id26,55,부산,57,1,,ENFP,83.68538032
28 | id27,37,부산,60,0,,ESTP,73.58639712
29 | id28,38,부산,34,1,,ENTP,80.13828012
30 | id29,-13.5,부산,47,2,,ENTP,67.8863732
31 | id30,16,부산,,0,,ESTJ,17.25298557
32 | id31,86,부산,77,0,,ESFJ,73.58639712
33 | id32,25,부산,64,0,vip,ISFJ,13.04992129
34 | id33,47,부산,94,0,,ENFJ,17.25298557
35 | id34,65,부산,,1,,INFP,48.43118381
36 | id35,30,부산,,2,,ESTJ,33.30899901
37 | id36,68,부산,77,1,,INTP,13.04992129
38 | id37,100,부산,,0,,ESTP,33.30899901
39 | id38,87,부산,,1,,ESTP,83.68538032
40 | id39,56,부산,50,0,,INFJ,33.30899901
41 | id40,56,대구,75,0,,ENFP,17.25298557
42 | id41,81,대구,55,0,,ENFJ,37.11373918
43 | id42,65,대구,48,2,,ESTP,33.30899901
44 | id43,23,대구,60,0,,ISTP,29.26986926
45 | id44,44,대구,,0,,INTP,16.2838541
46 | id45,97,대구,88,0,,ENFJ,13.04992129
47 | id46,93,대구,,0,,ESTJ,67.8863732
48 | id47,34.6,대구,75,1,,ESTJ,90.49699927
49 | id48,18,대구,,0,,ENFP,20.1294441
50 | id49,75,대구,88,0,,INTP,37.11373918
51 | id50,86,대구,78,1,,ENFP,60.33982554
52 | id51,36,대구,,0,,ISTJ,83.68538032
53 | id52,97,대구,82,1,,ISFJ,90.49699927
54 | id53,52,대구,50,0,,ESTP,20.1294441
55 | id54,53,대구,,1,,ENFJ,69.73031281
56 | id55,75,대구,63,2,,ENTP,13.04992129
57 | id56,59,대구,,1,vip,ESTJ,73.58639712
58 | id57,3,대구,111,0,,ISFJ,29.26986926
59 | id58,0,대구,100,2,,ESTP,33.30899901
60 | id59,64,대구,,1,,ESFJ,20.1294441
61 | id60,56,경기,,0,,ESFP,52.66707799
62 | id61,87,경기,62,2,,INTP,69.73031281
63 | id62,52,경기,,0,,INTP,60.33982554
64 | id63,88,경기,86,1,,ISFJ,73.58639712
65 | id64,43,경기,62,2,,ESFP,73.58639712
66 | id65,26.5,경기,,0,,ISFP,91.29779092
67 | id66,87,경기,,1,,ISFP,17.25298557
68 | id67,66,경기,52,1,,ISFJ,73.58639712
69 | id68,35,경기,45,2,,ISFP,67.8863732
70 | id69,75,경기,85,0,,ESTJ,69.73031281
71 | id70,-9,경기,96,1,,ISTP,48.43118381
72 | id71,35,경기,84,2,,ISFP,52.66707799
73 | id72,8,경기,97,0,,ESTJ,97.38103419
74 | id73,90,경기,,1,,ISFJ,73.58639712
75 | id74,45,경기,98,0,,ESTP,52.66707799
76 | id75,63,경기,47,0,,ESTP,20.1294441
77 | id76,71,경기,12,0,,ENTJ,83.68538032
78 | id77,77,경기,31,0,,INFP,98.42989897
79 | id78,92,경기,96,1,,INTJ,69.73031281
80 | id79,30,경기,,0,,INTJ,80.13828012
81 | id80,67,경기,60,0,,ISFP,83.68538032
82 | id81,86,경기,50,1,,ISFJ,37.11373918
83 | id82,48,경기,,0,,ENTJ,37.11373918
84 | id83,73,경기,50,1,vip,ENTP,80.13828012
85 | id84,66,경기,44,0,,INTP,83.68538032
86 | id85,83.6,경기,55,0,,INFJ,80.13828012
87 | id86,2,경기,,0,,ESTP,29.26986926
88 | id87,19,경기,,1,,ISFP,97.38103419
89 | id88,89,경기,75,0,,ESTJ,60.33982554
90 | id89,34,경기,66,1,,ENTJ,33.30899901
91 | id90,54,경기,,0,,ENTP,29.26986926
92 | id91,6,경기,72,0,,INTP,9.796377581
93 | id92,97,경기,78,1,,INFP,97.38103419
94 | id93,21.8,경기,57,0,,ISFP,73.58639712
95 | id94,84,경기,,1,,ESTJ,90.49699927
96 | id95,77,경기,43,1,,INTJ,91.29779092
97 | id96,92,경기,53,1,,ENTJ,52.66707799
98 | id97,100,경기,,0,,INFP,67.8863732
99 | id98,39,경기,58,2,,INFP,98.42989897
100 | id99,1,경기,47,0,,ESFJ,97.38103419
101 | id100,47,경기,53,0,vip,ESFP,33.30899901
102 |
--------------------------------------------------------------------------------
/part4/ch4/p4_type1.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyNMmI6oMjxT37FgfVx31SGC"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch4/p4_type1.ipynb)"],"metadata":{"id":"LPh19ptmmhPP"}},{"cell_type":"markdown","source":["## 작업형1"],"metadata":{"id":"uO07g2QeKprH"}},{"cell_type":"markdown","source":["### 문제1"],"metadata":{"id":"6f9i0g9dKvQQ"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"ShDssNLn6Dw8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1713450424035,"user_tz":-540,"elapsed":1325,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"939c2853-d95d-4d42-cc80-3b6b8cb1f509"},"outputs":[{"output_type":"stream","name":"stdout","text":["50\n"]}],"source":["import pandas as pd\n","# df = pd.read_csv(\"data4-1.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch4/data4-1.csv\")\n","\n","# 1) 3사분위수와 1사분위수의 차이를 절대값으로 계산\n","q1 = df['age'].quantile(0.25)\n","q3 = df['age'].quantile(0.75)\n","result = abs(q1 - q3)\n","\n","# 2) 소수점 이하를 버리고, 정수로 출력\n","print(int(result))"]},{"cell_type":"markdown","source":["### 문제2"],"metadata":{"id":"ccvjEw-aKy39"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"data4-2.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch4/data4-2.csv\")\n","\n","# 1) 비율이 40%보다 크고 50%보다 작은 조건\n","cond1 = (df['loves'] + df['wows']) / df['reactions'] > 0.4\n","cond2 = (df['loves'] + df['wows']) / df['reactions'] < 0.5\n","\n","# 2) type이 video인 조건\n","cond3 = df['type'] == 'video'\n","\n","# 3) 조건에 맞는 데이터 수 구하기\n","print(len(df[cond1 & cond2 & cond3]))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-3VwHCDKKjRl","executionInfo":{"status":"ok","timestamp":1713450424036,"user_tz":-540,"elapsed":4,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"717997e1-2e5c-458d-f140-d2ab276cb662"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["90\n"]}]},{"cell_type":"markdown","source":["### 문제3"],"metadata":{"id":"U2rmG2jaK_v3"}},{"cell_type":"code","source":["# 방법1\n","import pandas as pd\n","# df = pd.read_csv(\"data4-3.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch4/data4-3.csv\")\n","\n","# 1) datetime으로 형변환\n","df['date_added'] = pd.to_datetime(df['date_added'])\n","\n","# 2) dt를 활용해 year과 month 파생변수 생성\n","df['year'] = df['date_added'].dt.year\n","df['month'] = df['date_added'].dt.month\n","\n","# 3) 조건\n","cond1 = df['country'] == \"United Kingdom\"\n","cond2 = df['year'] == 2018\n","cond3 = df['month'] == 1\n","\n","# 4) 조건에 맞는 데이터 수 출력\n","print(len(df[cond1 & cond2 & cond3]))"],"metadata":{"id":"WgzH-3yB6I26","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1713450424545,"user_tz":-540,"elapsed":512,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"56aafe5e-9d9b-475c-a422-c69552f3a55e"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["6\n"]}]},{"cell_type":"code","source":["# 방법2\n","# df = pd.read_csv(\"data4-3.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch4/data4-3.csv\")\n","\n","# 1) datetime으로 형변환\n","df['date_added'] = pd.to_datetime(df['date_added'])\n","\n","# 2) 조건\n","cond1 = df['country'] == \"United Kingdom\"\n","cond2 = df['date_added'] >= '2018-1-1'\n","cond3 = df['date_added'] <= '2018-1-31'\n","\n","# 3) 조건에 맞는 데이터 수 출력\n","print(len(df[cond1 & cond2 & cond3]))"],"metadata":{"id":"L9JYUM9N9u_z","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1713450425001,"user_tz":-540,"elapsed":458,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"949eac59-5e2a-449e-a86d-49fb1d08cf32"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["6\n"]}]}]}
--------------------------------------------------------------------------------
/part4/ch5/p5_type1.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyNVXuORZIebZMIq5tLwPhb8"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch5/p5_type1.ipynb)"],"metadata":{"id":"p_I02eB4m9z9"}},{"cell_type":"markdown","source":["## 작업형1"],"metadata":{"id":"uO07g2QeKprH"}},{"cell_type":"markdown","source":["### 문제1"],"metadata":{"id":"6f9i0g9dKvQQ"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"ShDssNLn6Dw8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1713100901774,"user_tz":-540,"elapsed":459,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"db3ca790-0d92-4902-9759-bf9a7e7d75a3"},"outputs":[{"output_type":"stream","name":"stdout","text":["118\n"]}],"source":["import pandas as pd\n","# df = pd.read_csv(\"data5-1.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch5/data5-1.csv\")\n","\n","# 1) 조건1.종량제 봉투 종류 ‘규격봉투’\n","cond1 = df['종량제봉투종류'] == '규격봉투'\n","\n","# 2) 조건2.종량제 봉투 용도 ‘음식물쓰레기’\n","cond2 = df['종량제봉투용도'] == '음식물쓰레기'\n","\n","# 3) 조건3. 2l가격이 0이면 제외\n","cond3 = df['2ℓ가격'] != 0\n","\n","# 4) 조건 적용한 데이터\n","df = df[cond1 & cond2 & cond3]\n","\n","# 5) 2l 가격 평균 계산, 반올림, 정수 출력\n","print(round(df['2ℓ가격'].mean()))"]},{"cell_type":"markdown","source":["### 문제2"],"metadata":{"id":"ccvjEw-aKy39"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"data5-2.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch5/data5-2.csv\")\n","\n","# 1) bmi 파생변수 계산\n","df['bmi'] = df['Weight'] / (df['Height']/100)**2\n","\n","# 2) 조건1. 정상체중 bmi\n","cond1 = (df['bmi'] >= 18.5) & (df['bmi'] < 23)\n","\n","# 3) 조건2. 위험체중 bmi\n","cond2 = (df['bmi'] >= 23) & (df['bmi'] < 25)\n","\n","# 4) 조건1의 인원와 조건2의 인원 차이계산, 절대값 처리\n","print(abs(len(df[cond1]) - len(df[cond2])))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-3VwHCDKKjRl","executionInfo":{"status":"ok","timestamp":1719909009812,"user_tz":-540,"elapsed":333,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"a9ed122d-e44a-44e3-89b4-77b5887cd3cf"},"execution_count":4,"outputs":[{"output_type":"stream","name":"stdout","text":["144\n"]}]},{"cell_type":"markdown","source":["### 문제3"],"metadata":{"id":"U2rmG2jaK_v3"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"data5-3.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch5/data5-3.csv\")\n","\n","# 1) '순전입' 파생변수 계산\n","df['순전입'] = df['전입학생수(계)'] - df['전출학생수(계)']\n","\n","# 2) '순전입' 컬럼 기준으로 내림차순 정렬\n","df = df.sort_values('순전입', ascending=False)\n","\n","# 3) 첫번째 행의 전체 학생 수 값 선택\n","print(int(df.iloc[0,-2]))"],"metadata":{"id":"WgzH-3yB6I26","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1713100950339,"user_tz":-540,"elapsed":426,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"36dbc6da-2150-43b9-dc56-21b9dca328e9"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["230\n"]}]}]}
--------------------------------------------------------------------------------
/part4/ch6/data6-1-1.csv:
--------------------------------------------------------------------------------
1 | 소방서,출동시간,도착시간
2 | 소방서1,2023-09-16 06:12:29,2023-09-16 07:51:55
3 | 소방서5,2023-09-19 06:11:41,2023-09-19 08:08:36
4 | 소방서1,2023-09-14 06:12:00,2023-09-14 07:24:21
5 | 소방서6,2023-09-20 06:12:24,2023-09-20 06:30:45
6 | 소방서9,2023-09-15 06:11:50,2023-09-15 06:44:00
7 | 소방서1,2023-09-14 06:11:45,2023-09-14 06:24:41
8 | 소방서10,2023-09-14 06:12:30,2023-09-14 06:13:50
9 | 소방서8,2023-09-19 06:11:37,2023-09-19 06:17:42
10 | 소방서3,2023-09-18 06:11:44,2023-09-18 08:08:45
11 | 소방서5,2023-09-14 06:11:58,2023-09-14 07:14:14
12 | 소방서8,2023-09-20 06:11:45,2023-09-20 06:52:06
13 | 소방서9,2023-09-13 06:12:21,2023-09-13 07:58:59
14 | 소방서9,2023-09-11 06:11:43,2023-09-11 06:52:43
15 | 소방서7,2023-09-10 06:12:02,2023-09-10 07:08:45
16 | 소방서9,2023-09-10 06:11:44,2023-09-10 07:38:22
17 | 소방서8,2023-09-12 06:11:52,2023-09-12 07:06:15
18 | 소방서9,2023-09-20 06:11:40,2023-09-20 07:49:21
19 | 소방서3,2023-09-19 06:11:36,2023-09-19 07:15:18
20 | 소방서5,2023-09-18 06:12:13,2023-09-18 06:56:48
21 | 소방서7,2023-09-19 06:11:48,2023-09-19 07:11:12
22 | 소방서10,2023-09-15 06:12:33,2023-09-15 06:38:10
23 | 소방서2,2023-09-10 06:12:11,2023-09-10 06:15:08
24 | 소방서6,2023-09-17 06:12:09,2023-09-17 07:21:41
25 | 소방서1,2023-09-10 06:12:16,2023-09-10 07:17:04
26 | 소방서4,2023-09-12 06:12:04,2023-09-12 06:35:13
27 | 소방서9,2023-09-13 06:12:10,2023-09-13 07:49:39
28 | 소방서1,2023-09-20 06:12:26,2023-09-20 07:11:38
29 | 소방서1,2023-09-16 06:11:43,2023-09-16 06:58:26
30 | 소방서5,2023-09-14 06:11:54,2023-09-14 07:05:54
31 | 소방서2,2023-09-11 06:12:30,2023-09-11 07:18:52
32 | 소방서2,2023-09-19 06:12:07,2023-09-19 08:07:07
33 | 소방서9,2023-09-14 06:12:30,2023-09-14 07:23:14
34 | 소방서7,2023-09-20 06:12:14,2023-09-20 06:46:44
35 | 소방서1,2023-09-12 06:12:13,2023-09-12 07:06:35
36 | 소방서1,2023-09-10 06:11:54,2023-09-10 06:33:50
37 | 소방서7,2023-09-19 06:11:49,2023-09-19 08:04:32
38 | 소방서3,2023-09-15 06:11:52,2023-09-15 06:43:39
39 | 소방서2,2023-09-10 06:12:31,2023-09-10 07:51:04
40 | 소방서7,2023-09-12 06:12:15,2023-09-12 06:32:13
41 | 소방서5,2023-09-14 06:12:11,2023-09-14 06:29:45
42 | 소방서6,2023-09-12 06:12:27,2023-09-12 07:33:37
43 | 소방서10,2023-09-14 06:12:07,2023-09-14 06:20:54
44 | 소방서3,2023-09-13 06:11:52,2023-09-13 06:15:14
45 | 소방서8,2023-09-10 06:12:06,2023-09-10 06:51:33
46 | 소방서1,2023-09-18 06:12:01,2023-09-18 06:54:56
47 | 소방서9,2023-09-12 06:11:59,2023-09-12 06:30:20
48 | 소방서9,2023-09-19 06:12:00,2023-09-19 07:45:23
49 | 소방서4,2023-09-11 06:12:13,2023-09-11 06:24:30
50 | 소방서9,2023-09-17 06:12:21,2023-09-17 08:06:41
51 | 소방서5,2023-09-14 06:12:32,2023-09-14 07:17:37
52 | 소방서4,2023-09-12 06:12:22,2023-09-12 07:51:19
53 | 소방서1,2023-09-17 06:12:17,2023-09-17 07:25:31
54 | 소방서2,2023-09-11 06:12:14,2023-09-11 06:23:35
55 | 소방서9,2023-09-12 06:12:26,2023-09-12 08:12:22
56 | 소방서9,2023-09-18 06:11:44,2023-09-18 08:08:14
57 | 소방서8,2023-09-14 06:11:45,2023-09-14 06:16:01
58 | 소방서10,2023-09-13 06:12:13,2023-09-13 07:04:11
59 | 소방서4,2023-09-12 06:11:35,2023-09-12 06:29:14
60 | 소방서1,2023-09-11 06:11:56,2023-09-11 07:16:17
61 | 소방서10,2023-09-19 06:11:50,2023-09-19 06:19:13
62 | 소방서8,2023-09-15 06:11:52,2023-09-15 08:02:18
63 | 소방서6,2023-09-15 06:12:21,2023-09-15 07:51:49
64 | 소방서6,2023-09-17 06:11:42,2023-09-17 06:36:06
65 | 소방서1,2023-09-14 06:12:10,2023-09-14 07:17:26
66 | 소방서3,2023-09-12 06:12:10,2023-09-12 07:00:14
67 | 소방서5,2023-09-19 06:11:36,2023-09-19 06:46:08
68 | 소방서10,2023-09-15 06:12:31,2023-09-15 07:01:34
69 | 소방서10,2023-09-14 06:12:15,2023-09-14 06:26:44
70 | 소방서10,2023-09-15 06:12:19,2023-09-15 07:35:47
71 | 소방서5,2023-09-13 06:11:58,2023-09-13 07:26:20
72 | 소방서4,2023-09-13 06:12:02,2023-09-13 07:26:59
73 | 소방서5,2023-09-15 06:12:31,2023-09-15 06:36:38
74 | 소방서1,2023-09-14 06:11:52,2023-09-14 07:16:38
75 | 소방서3,2023-09-13 06:12:00,2023-09-13 06:18:30
76 | 소방서9,2023-09-18 06:12:14,2023-09-18 08:04:37
77 | 소방서4,2023-09-11 06:11:39,2023-09-11 06:17:08
78 | 소방서9,2023-09-11 06:11:45,2023-09-11 06:49:30
79 | 소방서10,2023-09-20 06:12:14,2023-09-20 07:46:38
80 | 소방서4,2023-09-19 06:11:39,2023-09-19 06:44:57
81 | 소방서3,2023-09-14 06:12:12,2023-09-14 07:07:14
82 | 소방서10,2023-09-20 06:12:03,2023-09-20 07:39:13
83 | 소방서4,2023-09-20 06:12:17,2023-09-20 07:20:27
84 | 소방서9,2023-09-17 06:12:15,2023-09-17 07:17:10
85 | 소방서8,2023-09-17 06:12:16,2023-09-17 07:20:21
86 | 소방서2,2023-09-13 06:11:55,2023-09-13 07:22:30
87 | 소방서5,2023-09-17 06:12:01,2023-09-17 08:11:05
88 | 소방서7,2023-09-14 06:11:43,2023-09-14 07:54:11
89 | 소방서2,2023-09-20 06:11:58,2023-09-20 07:08:46
90 | 소방서7,2023-09-12 06:12:12,2023-09-12 06:32:43
91 | 소방서3,2023-09-17 06:11:35,2023-09-17 06:39:50
92 | 소방서1,2023-09-17 06:11:36,2023-09-17 06:38:40
93 | 소방서7,2023-09-20 06:12:34,2023-09-20 07:06:15
94 | 소방서6,2023-09-12 06:12:33,2023-09-12 06:19:32
95 | 소방서4,2023-09-14 06:11:50,2023-09-14 06:50:49
96 | 소방서3,2023-09-16 06:12:27,2023-09-16 06:50:09
97 | 소방서6,2023-09-17 06:12:02,2023-09-17 06:16:49
98 | 소방서6,2023-09-18 06:12:05,2023-09-18 06:41:31
99 | 소방서4,2023-09-12 06:11:52,2023-09-12 08:10:19
100 | 소방서7,2023-09-12 06:12:22,2023-09-12 07:02:26
101 | 소방서8,2023-09-20 06:12:16,2023-09-20 06:30:46
102 |
--------------------------------------------------------------------------------
/part4/ch6/data6-1-2.csv:
--------------------------------------------------------------------------------
1 | 학교명,교사수,1학년,2학년,3학년,4학년,5학년,6학년
2 | 학교1,20,118,72,154,118,77,59
3 | 학교2,31,187,193,135,137,63,90
4 | 학교3,23,136,193,135,112,91,50
5 | 학교4,32,72,146,67,51,130,164
6 | 학교5,22,61,73,86,82,55,124
7 | 학교6,32,196,172,117,170,59,128
8 | 학교7,29,183,173,102,184,194,130
9 | 학교8,19,151,181,161,187,174,183
10 | 학교9,40,157,145,180,58,96,71
11 | 학교10,34,117,93,135,136,191,150
12 | 학교11,21,166,149,196,136,54,98
13 | 학교12,37,72,143,53,140,108,153
14 | 학교13,36,178,65,123,176,109,189
15 | 학교14,34,95,89,187,163,146,168
16 | 학교15,20,52,84,166,99,55,115
17 | 학교16,30,119,161,157,50,77,66
18 | 학교17,35,140,72,71,160,52,187
19 | 학교18,31,68,189,153,57,132,117
20 | 학교19,34,58,193,135,156,139,56
21 | 학교20,39,91,157,66,86,145,110
22 | 학교21,42,70,64,182,149,187,128
23 | 학교22,23,124,149,144,82,186,136
24 | 학교23,35,80,92,200,148,160,65
25 | 학교24,42,84,166,55,139,165,165
26 | 학교25,28,159,57,90,183,133,182
27 | 학교26,35,192,85,137,178,76,189
28 | 학교27,42,144,107,195,137,73,119
29 | 학교28,35,112,102,132,117,151,61
30 | 학교29,35,70,100,179,101,54,99
31 | 학교30,27,194,109,71,131,71,137
32 | 학교31,36,187,84,186,84,173,176
33 | 학교32,42,151,57,115,197,174,137
34 | 학교33,31,107,178,82,51,176,137
35 | 학교34,41,199,81,63,142,167,142
36 | 학교35,40,157,130,141,105,164,137
37 | 학교36,25,96,147,65,156,146,178
38 | 학교37,27,94,193,146,144,67,125
39 | 학교38,22,117,178,132,64,147,63
40 | 학교39,37,149,126,77,169,196,133
41 | 학교40,26,164,118,162,194,196,138
42 | 학교41,26,166,178,197,120,136,62
43 | 학교42,24,81,55,159,177,83,172
44 | 학교43,36,60,170,188,86,133,144
45 | 학교44,26,58,168,179,123,198,53
46 | 학교45,29,148,108,66,114,122,93
47 | 학교46,32,141,158,61,53,177,92
48 | 학교47,25,61,118,184,92,185,99
49 | 학교48,28,177,169,107,122,185,73
50 | 학교49,22,163,189,192,121,99,183
51 | 학교50,21,150,154,164,78,65,195
52 | 학교51,32,161,182,141,89,176,84
53 | 학교52,25,104,111,59,112,103,68
54 | 학교53,31,56,52,155,132,190,57
55 | 학교54,20,113,161,124,97,127,78
56 | 학교55,28,133,104,181,56,142,89
57 | 학교56,33,107,156,106,180,161,150
58 | 학교57,36,98,149,69,177,60,123
59 | 학교58,23,171,77,82,81,167,140
60 | 학교59,36,54,163,183,140,123,142
61 | 학교60,25,149,62,77,180,74,133
62 | 학교61,42,71,102,148,96,157,82
63 | 학교62,34,85,51,91,72,134,58
64 | 학교63,22,87,106,167,160,101,153
65 | 학교64,20,64,88,79,168,73,84
66 | 학교65,40,60,141,162,102,111,110
67 | 학교66,32,137,86,72,52,124,133
68 | 학교67,40,152,138,108,65,64,190
69 | 학교68,21,78,113,123,110,82,192
70 | 학교69,19,189,117,157,192,144,144
71 | 학교70,39,83,156,58,124,139,130
72 | 학교71,41,50,139,179,50,125,70
73 | 학교72,31,97,133,163,76,186,100
74 | 학교73,31,68,99,102,141,159,173
75 | 학교74,28,191,189,91,149,56,116
76 | 학교75,42,57,91,99,143,97,75
77 | 학교76,23,75,119,81,68,142,64
78 | 학교77,27,142,83,114,155,73,116
79 | 학교78,35,191,80,186,80,134,123
80 | 학교79,39,128,162,110,51,73,78
81 | 학교80,38,75,162,149,141,96,83
82 | 학교81,31,50,156,103,111,51,156
83 | 학교82,23,71,94,187,118,157,93
84 | 학교83,31,128,66,56,81,195,150
85 | 학교84,27,195,63,189,98,129,129
86 | 학교85,38,190,136,55,87,128,122
87 | 학교86,25,159,104,177,61,158,66
88 | 학교87,23,93,62,190,106,66,164
89 | 학교88,37,98,152,195,161,186,143
90 | 학교89,21,105,72,162,60,186,129
91 | 학교90,41,57,103,59,89,184,75
92 | 학교91,41,62,155,170,148,135,96
93 | 학교92,39,153,159,76,72,61,105
94 | 학교93,23,84,199,127,143,180,157
95 | 학교94,41,117,122,155,97,186,185
96 | 학교95,40,190,73,50,198,86,183
97 | 학교96,38,75,166,113,191,172,57
98 | 학교97,30,125,169,145,122,187,194
99 | 학교98,37,136,100,93,60,145,63
100 | 학교99,37,74,175,145,157,95,76
101 | 학교100,40,68,109,196,148,84,179
102 |
--------------------------------------------------------------------------------
/part4/ch6/data6-1-3.csv:
--------------------------------------------------------------------------------
1 | 날짜,강력범죄,절도범죄,폭력범죄,지능범죄,풍속범죄,교통범죄,경찰서명
2 | 2020년 01월,22,102,86,62,28,212,B경찰서
3 | 2020년 02월,26,138,80,61,31,183,E경찰서
4 | 2020년 03월,14,129,76,60,29,202,C경찰서
5 | 2020년 04월,26,142,83,71,33,182,B경찰서
6 | 2020년 05월,28,131,80,72,28,212,B경찰서
7 | 2020년 06월,22,104,84,72,34,201,A경찰서
8 | 2020년 07월,14,134,84,72,32,182,A경찰서
9 | 2020년 08월,27,101,71,67,31,198,B경찰서
10 | 2020년 09월,21,108,87,70,33,187,B경찰서
11 | 2020년 10월,21,114,89,61,32,212,E경찰서
12 | 2020년 11월,22,134,79,64,35,198,D경찰서
13 | 2020년 12월,13,118,82,76,37,236,C경찰서
14 | 2021년 01월,16,104,82,60,25,206,C경찰서
15 | 2021년 02월,19,145,88,60,30,204,B경찰서
16 | 2021년 03월,12,136,79,72,32,233,B경찰서
17 | 2021년 04월,26,106,87,71,28,216,C경찰서
18 | 2021년 05월,13,140,75,80,29,187,E경찰서
19 | 2021년 06월,12,131,78,65,30,201,E경찰서
20 | 2021년 07월,12,116,79,65,31,202,D경찰서
21 | 2021년 08월,25,129,77,69,33,185,A경찰서
22 | 2021년 09월,28,107,71,71,32,203,D경찰서
23 | 2021년 10월,18,130,71,77,30,206,C경찰서
24 | 2021년 11월,12,140,82,73,32,187,A경찰서
25 | 2021년 12월,27,108,90,89,20,213,E경찰서
26 | 2022년 01월,23,140,83,60,26,219,A경찰서
27 | 2022년 02월,11,111,73,60,31,211,B경찰서
28 | 2022년 03월,17,125,87,76,25,220,C경찰서
29 | 2022년 04월,10,112,78,78,28,185,E경찰서
30 | 2022년 05월,20,122,76,74,30,192,B경찰서
31 | 2022년 06월,24,139,87,77,29,192,E경찰서
32 | 2022년 07월,19,107,79,80,30,193,E경찰서
33 | 2022년 08월,27,124,81,62,29,187,C경찰서
34 | 2022년 09월,25,108,76,66,28,182,B경찰서
35 | 2022년 10월,25,134,71,75,33,189,C경찰서
36 | 2022년 11월,26,122,72,62,31,180,E경찰서
37 | 2022년 12월,22,104,87,80,31,181,C경찰서
38 | 2023년 01월,28,127,90,73,33,202,B경찰서
39 | 2023년 02월,10,102,96,73,33,166,A경찰서
40 | 2023년 03월,29,140,70,79,34,211,C경찰서
41 | 2023년 04월,17,123,100,62,32,201,B경찰서
42 | 2023년 05월,11,138,70,75,35,190,B경찰서
43 | 2023년 06월,30,128,78,74,34,216,C경찰서
44 | 2023년 07월,30,144,74,73,31,248,D경찰서
45 | 2023년 08월,26,120,86,76,33,188,C경찰서
46 | 2023년 09월,11,117,86,65,33,192,C경찰서
47 | 2023년 10월,12,117,86,67,28,200,C경찰서
48 | 2023년 11월,22,103,83,72,33,196,B경찰서
49 | 2023년 12월,28,124,83,61,27,209,A경찰서
50 | 2024년 01월,10,140,90,65,31,184,B경찰서
51 | 2024년 02월,16,137,72,80,30,180,C경찰서
52 | 2024년 03월,17,144,84,76,34,197,C경찰서
53 | 2024년 04월,14,150,85,75,31,181,C경찰서
54 | 2024년 05월,22,119,73,74,34,200,B경찰서
55 | 2024년 06월,30,122,70,70,24,161,B경찰서
56 | 2024년 07월,21,115,76,62,35,151,C경찰서
57 | 2024년 08월,17,104,98,85,24,180,E경찰서
58 | 2024년 09월,21,128,84,69,31,181,B경찰서
59 | 2024년 10월,20,105,80,77,33,188,E경찰서
60 | 2024년 11월,25,148,90,67,33,210,B경찰서
61 | 2024년 12월,21,142,77,62,35,183,E경찰서
62 |
--------------------------------------------------------------------------------
/part4/ch6/data6-3-2.csv:
--------------------------------------------------------------------------------
1 | solar,wind,o3,temperature
2 | 89.14,6.28,33.52,23.0
3 | 109.97,1.04,27.01,20.7
4 | 102.83,6.42,41.0,20.5
5 | 84.94,10.2,33.44,22.2
6 | 94.21,4.95,29.97,21.4
7 | 116.51,5.07,28.97,24.7
8 | 75.73,5.36,29.57,19.9
9 | 95.71,1.28,25.42,23.5
10 | 112.66,5.85,29.52,22.6
11 | 91.33,1.79,31.39,21.7
12 | 93.21,4.14,32.9,24.1
13 | 99.05,7.49,32.9,22.9
14 | 114.91,3.53,28.63,20.2
15 | 93.61,6.0,22.92,22.8
16 | 95.56,7.03,26.65,24.2
17 | 95.66,5.56,38.06,21.5
18 | 122.06,2.26,34.48,22.1
19 | 121.87,4.34,31.85,21.0
20 | 110.04,8.92,26.19,21.5
21 | 103.86,0.95,30.02,23.7
22 | 107.37,4.45,23.72,20.3
23 | 114.91,3.9,27.24,19.5
24 | 90.64,5.24,28.77,20.8
25 | 111.76,6.5,28.19,19.1
26 | 87.46,8.22,34.78,22.3
27 | 93.62,4.46,22.91,20.8
28 | 109.07,6.62,25.67,18.3
29 | 85.71,6.0,23.13,20.6
30 | 98.6,5.95,23.81,21.2
31 | 91.38,3.87,30.62,22.6
32 | 97.44,3.01,22.0,21.6
33 | 72.01,2.8,33.77,25.2
34 | 82.28,3.49,28.77,20.5
35 | 93.0,5.64,30.34,21.5
36 | 109.27,6.52,31.61,21.6
37 | 98.26,5.65,27.83,21.1
38 | 100.03,3.9,35.16,22.2
39 | 106.88,8.61,29.03,22.6
40 | 91.2,8.04,32.97,24.0
41 | 102.84,4.29,29.0,18.8
42 | 91.95,3.35,31.45,20.8
43 | 82.72,5.26,31.4,20.5
44 | 96.09,7.53,31.25,23.4
45 | 105.74,5.67,25.13,21.1
46 | 103.39,6.11,32.18,18.0
47 | 99.88,4.58,28.41,22.0
48 | 123.92,5.91,33.15,22.2
49 | 104.13,8.09,19.24,21.8
50 | 109.79,4.52,22.67,19.9
51 | 122.38,5.29,31.82,25.7
52 | 87.06,5.51,39.31,20.9
53 | 89.61,5.57,34.18,21.2
54 | 117.44,2.18,26.59,20.6
55 | 92.02,1.25,21.54,20.1
56 | 100.3,2.96,33.71,19.6
57 | 110.69,5.34,29.6,21.2
58 | 108.91,6.11,32.95,20.0
59 | 117.55,3.94,30.58,26.4
60 | 114.96,7.75,30.15,18.0
61 | 110.69,4.71,44.79,19.9
62 | 92.27,5.04,29.97,20.8
63 | 107.95,4.61,29.2,23.3
64 | 103.14,5.27,29.39,21.0
65 | 86.74,6.41,27.08,22.7
66 | 114.17,6.33,34.95,20.0
67 | 108.07,3.2,28.23,22.2
68 | 100.45,8.05,33.18,25.0
69 | 97.67,2.81,31.42,20.1
70 | 88.02,5.16,36.09,22.4
71 | 102.0,4.45,32.1,22.3
72 | 104.68,2.9,23.93,17.5
73 | 91.69,4.85,23.37,20.4
74 | 111.62,3.52,37.04,24.5
75 | 89.03,5.15,26.96,21.8
76 | 78.77,5.81,23.4,18.8
77 | 110.4,7.94,26.65,23.1
78 | 95.97,5.61,36.32,20.8
79 | 98.74,3.78,22.9,22.5
80 | 91.62,4.22,25.67,22.4
81 | 83.94,5.28,26.67,19.2
82 | 112.55,5.19,23.74,23.5
83 | 93.11,7.92,24.08,19.3
84 | 116.61,7.79,22.41,19.2
85 | 108.07,4.28,27.69,20.2
86 | 96.85,3.9,28.23,21.1
87 | 89.14,-0.11,26.59,22.3
88 | 92.68,3.9,21.73,20.1
89 | 87.87,3.04,36.27,23.3
90 | 120.87,4.29,23.35,22.7
91 | 101.64,5.78,31.39,20.4
92 | 111.5,5.35,24.63,23.7
93 | 87.33,4.94,33.34,23.9
94 | 101.81,5.4,34.78,21.6
95 | 111.78,4.75,25.61,21.6
96 | 96.65,5.39,20.38,18.0
97 | 110.31,-1.46,33.48,21.9
98 | 89.15,4.46,39.38,21.1
99 | 86.37,4.78,32.08,22.1
100 | 103.79,4.32,30.8,21.6
101 | 96.21,4.56,34.1,22.3
102 |
--------------------------------------------------------------------------------
/part4/ch6/energy_test.csv:
--------------------------------------------------------------------------------
1 | Compac,Surf_Area,Wall_Area,Roof,Height,Orient,Glaze_Area,Glaze_Distr,Cool_Load
2 | 0.64,784.0,343.0,220.5,Short,South,0.4,4,22.25
3 | 0.82,612.5,318.5,Large,Tall,North,0.4,3,32.43
4 | 0.76,661.5,416.5,Medium,Tall,South,0.1,5,33.64
5 | 0.74,686.0,245.0,220.5,Short,East,0.4,1,17.25
6 | 0.64,784.0,343.0,220.5,Short,North,0.25,5,20.13
7 | 0.69,735.0,294.0,220.5,Short,South,0.1,2,13.32
8 | 0.74,686.0,245.0,220.5,Short,East,0.1,2,13.72
9 | 0.71,710.5,269.5,220.5,Short,South,0.4,5,15.33
10 | 0.76,661.5,416.5,Medium,Tall,South,0.1,4,33.89
11 | 0.66,759.5,318.5,220.5,Short,East,0.4,4,17.82
12 | 0.79,637.0,343.0,Large,Tall,North,0.1,5,34.99
13 | 0.64,784.0,343.0,220.5,Short,North,0.4,2,21.72
14 | 0.74,686.0,245.0,220.5,Short,West,0.25,3,14.76
15 | 0.82,612.5,318.5,Large,Tall,North,0.25,1,30.17
16 | 0.76,661.5,416.5,Medium,Tall,East,0.4,4,39.67
17 | 0.64,784.0,343.0,220.5,Short,West,0.4,5,21.4
18 | 0.74,686.0,245.0,220.5,Short,East,0.25,2,15.1
19 | 0.69,735.0,294.0,220.5,Short,West,0.4,1,17.2
20 | 0.71,710.5,269.5,220.5,Short,North,0.1,4,13.67
21 | 0.62,808.5,367.5,220.5,Short,East,0.1,3,14.23
22 | 0.64,784.0,343.0,220.5,Short,South,0.0,0,16.75
23 | 0.79,637.0,343.0,Large,Tall,West,0.4,4,39.56
24 | 0.76,661.5,416.5,Medium,Tall,North,0.1,1,33.87
25 | 0.66,759.5,318.5,220.5,Short,North,0.1,4,14.86
26 | 0.82,612.5,318.5,Large,Tall,South,0.4,4,31.14
27 | 0.64,784.0,343.0,220.5,Short,West,0.25,2,20.43
28 | 0.9,563.5,318.5,Medium,Tall,South,0.1,3,30.08
29 | 0.98,514.5,294.0,Small,Tall,West,0.4,5,34.01
30 | 0.64,784.0,343.0,220.5,Short,East,0.4,5,20.21
31 | 0.66,759.5,318.5,220.5,Short,North,0.4,3,17.04
32 | 0.86,588.0,294.0,Large,Tall,East,0.25,3,32.93
33 | 0.71,710.5,269.5,220.5,Short,West,0.25,3,14.94
34 | 0.64,784.0,343.0,220.5,Short,North,0.1,5,19.24
35 | 0.82,612.5,318.5,Large,Tall,West,0.25,4,27.34
36 | 0.79,637.0,343.0,Large,Tall,South,0.4,3,38.81
37 | 0.86,588.0,294.0,Large,Tall,South,0.4,4,36.21
38 | 0.79,637.0,343.0,Large,Tall,East,0.1,5,34.18
39 | 0.79,637.0,343.0,Large,Tall,North,0.25,2,43.86
40 | 0.62,808.5,367.5,220.5,Short,West,0.4,3,16.0
41 | 0.79,637.0,343.0,Large,Tall,East,0.25,4,45.48
42 | 0.74,686.0,245.0,220.5,Short,West,0.25,2,15.44
43 | 0.86,588.0,294.0,Large,Tall,North,0.1,3,31.73
44 | 0.74,686.0,245.0,220.5,Short,South,0.0,0,10.94
45 | 0.76,661.5,416.5,Medium,Tall,South,0.25,1,37.45
46 | 0.98,514.5,294.0,Small,Tall,West,0.4,4,33.88
47 | 0.74,686.0,245.0,220.5,Short,West,0.4,3,16.6
48 | 0.71,710.5,269.5,220.5,Short,North,0.1,1,13.8
49 | 0.62,808.5,367.5,220.5,Short,West,0.25,1,15.76
50 | 0.71,710.5,269.5,220.5,Short,North,0.4,1,17.1
51 | 0.9,563.5,318.5,Medium,Tall,West,0.25,3,32.46
52 | 0.86,588.0,294.0,Large,Tall,North,0.25,5,28.02
53 | 0.98,514.5,294.0,Small,Tall,East,0.1,4,25.72
54 | 0.74,686.0,245.0,220.5,Short,North,0.1,5,13.65
55 | 0.74,686.0,245.0,220.5,Short,East,0.4,5,16.62
56 | 0.64,784.0,343.0,220.5,Short,North,0.0,0,16.78
57 | 0.86,588.0,294.0,Large,Tall,East,0.25,1,28.61
58 | 0.64,784.0,343.0,220.5,Short,East,0.0,0,16.8
59 | 0.9,563.5,318.5,Medium,Tall,East,0.1,3,32.85
60 | 0.76,661.5,416.5,Medium,Tall,North,0.4,5,39.37
61 | 0.9,563.5,318.5,Medium,Tall,North,0.0,0,28.28
62 | 0.74,686.0,245.0,220.5,Short,North,0.25,4,14.92
63 | 0.62,808.5,367.5,220.5,Short,South,0.25,4,15.07
64 | 0.71,710.5,269.5,220.5,Short,North,0.25,1,15.42
65 | 0.98,514.5,294.0,Small,Tall,South,0.25,3,30.1
66 | 0.69,735.0,294.0,220.5,Short,North,0.25,3,14.92
67 | 0.64,784.0,343.0,220.5,Short,North,0.4,4,21.68
68 | 0.9,563.5,318.5,Medium,Tall,West,0.4,1,40.99
69 | 0.66,759.5,318.5,220.5,Short,North,0.4,2,17.85
70 | 0.62,808.5,367.5,220.5,Short,South,0.1,3,14.14
71 | 0.79,637.0,343.0,Large,Tall,North,0.1,3,43.12
72 | 0.76,661.5,416.5,Medium,Tall,South,0.1,1,34.14
73 | 0.71,710.5,269.5,220.5,Short,East,0.4,2,17.37
74 | 0.64,784.0,343.0,220.5,Short,West,0.4,3,21.53
75 | 0.98,514.5,294.0,Small,Tall,West,0.1,5,26.18
76 | 0.74,686.0,245.0,220.5,Short,West,0.1,2,13.79
77 | 0.86,588.0,294.0,Large,Tall,North,0.4,4,31.53
78 | 0.98,514.5,294.0,Small,Tall,East,0.4,5,33.23
79 | 0.76,661.5,416.5,Medium,Tall,East,0.1,1,34.07
80 | 0.71,710.5,269.5,220.5,Short,South,0.1,5,14.26
81 | 0.62,808.5,367.5,220.5,Short,South,0.1,5,13.99
82 | 0.98,514.5,294.0,Small,Tall,North,0.25,4,29.61
83 | 0.62,808.5,367.5,220.5,Short,West,0.25,2,15.3
84 | 0.82,612.5,318.5,Large,Tall,East,0.1,1,24.91
85 | 0.71,710.5,269.5,220.5,Short,South,0.4,3,16.7
86 | 0.64,784.0,343.0,220.5,Short,North,0.1,2,19.23
87 | 0.66,759.5,318.5,220.5,Short,East,0.1,3,13.7
88 | 0.66,759.5,318.5,220.5,Short,South,0.4,2,18.36
89 | 0.64,784.0,343.0,220.5,Short,South,0.25,5,20.19
90 | 0.98,514.5,294.0,Small,Tall,East,0.4,2,33.13
91 | 0.69,735.0,294.0,220.5,Short,East,0.0,0,12.05
92 | 0.71,710.5,269.5,220.5,Short,North,0.25,4,14.67
93 | 0.79,637.0,343.0,Large,Tall,North,0.25,3,45.13
94 | 0.76,661.5,416.5,Medium,Tall,South,0.1,2,34.17
95 | 0.66,759.5,318.5,220.5,Short,West,0.1,4,14.83
96 | 0.9,563.5,318.5,Medium,Tall,South,0.1,2,29.36
97 | 0.9,563.5,318.5,Medium,Tall,East,0.25,2,32.64
98 | 0.98,514.5,294.0,Small,Tall,North,0.25,1,29.79
99 | 0.66,759.5,318.5,220.5,Short,North,0.25,5,15.83
100 | 0.82,612.5,318.5,Large,Tall,South,0.1,3,25.35
101 | 0.64,784.0,343.0,220.5,Short,West,0.4,2,21.93
102 | 0.98,514.5,294.0,Small,Tall,East,0.25,1,29.79
103 | 0.64,784.0,343.0,220.5,Short,West,0.25,5,20.29
104 | 0.76,661.5,416.5,Medium,Tall,North,0.0,0,29.79
105 | 0.82,612.5,318.5,Large,Tall,East,0.4,1,29.13
106 | 0.76,661.5,416.5,Medium,Tall,North,0.25,3,36.07
107 | 0.86,588.0,294.0,Large,Tall,East,0.1,4,31.76
108 | 0.82,612.5,318.5,Large,Tall,North,0.25,5,26.53
109 | 0.86,588.0,294.0,Large,Tall,West,0.4,5,35.71
110 | 0.86,588.0,294.0,Large,Tall,West,0.0,0,27.87
111 | 0.74,686.0,245.0,220.5,Short,West,0.25,5,14.03
112 | 0.71,710.5,269.5,220.5,Short,East,0.25,1,15.85
113 | 0.62,808.5,367.5,220.5,Short,North,0.1,1,14.34
114 | 0.82,612.5,318.5,Large,Tall,North,0.1,5,25.11
115 | 0.79,637.0,343.0,Large,Tall,West,0.25,1,45.52
116 | 0.66,759.5,318.5,220.5,Short,North,0.25,4,15.95
117 | 0.76,661.5,416.5,Medium,Tall,West,0.4,5,38.18
118 | 0.76,661.5,416.5,Medium,Tall,North,0.1,5,34.25
119 | 0.86,588.0,294.0,Large,Tall,South,0.25,3,28.38
120 | 0.79,637.0,343.0,Large,Tall,East,0.25,2,37.41
121 | 0.69,735.0,294.0,220.5,Short,South,0.25,5,15.22
122 | 0.71,710.5,269.5,220.5,Short,West,0.1,4,14.2
123 | 0.74,686.0,245.0,220.5,Short,South,0.4,3,16.57
124 | 0.74,686.0,245.0,220.5,Short,North,0.1,3,13.6
125 | 0.74,686.0,245.0,220.5,Short,East,0.25,5,14.58
126 | 0.9,563.5,318.5,Medium,Tall,West,0.0,0,29.6
127 | 0.62,808.5,367.5,220.5,Short,South,0.4,1,17.15
128 | 0.62,808.5,367.5,220.5,Short,East,0.25,5,14.61
129 | 0.86,588.0,294.0,Large,Tall,East,0.0,0,21.97
130 | 0.62,808.5,367.5,220.5,Short,North,0.4,2,17.36
131 | 0.86,588.0,294.0,Large,Tall,East,0.4,5,31.2
132 | 0.76,661.5,416.5,Medium,Tall,North,0.4,4,40.36
133 | 0.71,710.5,269.5,220.5,Short,East,0.1,5,13.75
134 | 0.74,686.0,245.0,220.5,Short,South,0.1,1,13.48
135 | 0.9,563.5,318.5,Medium,Tall,West,0.1,1,34.33
136 | 0.64,784.0,343.0,220.5,Short,South,0.25,3,20.46
137 | 0.79,637.0,343.0,Large,Tall,North,0.25,1,43.8
138 | 0.79,637.0,343.0,Large,Tall,North,0.25,4,36.26
139 | 0.74,686.0,245.0,220.5,Short,West,0.1,5,13.5
140 | 0.62,808.5,367.5,220.5,Short,East,0.25,3,14.96
141 | 0.74,686.0,245.0,220.5,Short,East,0.25,4,15.24
142 | 0.69,735.0,294.0,220.5,Short,East,0.25,2,15.85
143 | 0.76,661.5,416.5,Medium,Tall,West,0.25,2,36.81
144 | 0.82,612.5,318.5,Large,Tall,East,0.0,0,21.46
145 | 0.62,808.5,367.5,220.5,Short,East,0.1,2,14.57
146 | 0.71,710.5,269.5,220.5,Short,East,0.4,3,15.47
147 | 0.64,784.0,343.0,220.5,Short,East,0.1,4,19.25
148 | 0.9,563.5,318.5,Medium,Tall,South,0.4,4,39.22
149 | 0.74,686.0,245.0,220.5,Short,North,0.4,5,16.69
150 | 0.69,735.0,294.0,220.5,Short,North,0.25,5,15.14
151 | 0.9,563.5,318.5,Medium,Tall,North,0.1,3,34.14
152 | 0.69,735.0,294.0,220.5,Short,East,0.1,2,13.54
153 | 0.69,735.0,294.0,220.5,Short,North,0.4,3,16.35
154 | 0.79,637.0,343.0,Large,Tall,East,0.4,4,47.59
155 | 0.76,661.5,416.5,Medium,Tall,East,0.1,5,34.35
156 | 0.71,710.5,269.5,220.5,Short,West,0.1,2,14.21
157 | 0.62,808.5,367.5,220.5,Short,North,0.4,4,17.1
158 | 0.9,563.5,318.5,Medium,Tall,South,0.1,4,32.83
159 | 0.98,514.5,294.0,Small,Tall,South,0.1,3,25.84
160 | 0.71,710.5,269.5,220.5,Short,West,0.4,4,17.74
161 | 0.64,784.0,343.0,220.5,Short,West,0.4,1,22.53
162 | 0.66,759.5,318.5,220.5,Short,West,0.25,4,16.14
163 | 0.64,784.0,343.0,220.5,Short,North,0.4,3,20.78
164 | 0.74,686.0,245.0,220.5,Short,West,0.4,1,17.25
165 | 0.71,710.5,269.5,220.5,Short,West,0.4,5,15.31
166 | 0.82,612.5,318.5,Large,Tall,West,0.25,1,31.39
167 | 0.98,514.5,294.0,Small,Tall,East,0.1,1,26.37
168 | 0.74,686.0,245.0,220.5,Short,South,0.4,4,16.9
169 | 0.9,563.5,318.5,Medium,Tall,North,0.25,2,35.56
170 | 0.74,686.0,245.0,220.5,Short,West,0.1,1,13.7
171 | 0.9,563.5,318.5,Medium,Tall,North,0.25,4,32.12
172 | 0.9,563.5,318.5,Medium,Tall,North,0.1,4,29.34
173 | 0.62,808.5,367.5,220.5,Short,West,0.25,5,14.75
174 | 0.79,637.0,343.0,Large,Tall,East,0.1,3,41.22
175 | 0.64,784.0,343.0,220.5,Short,South,0.25,4,20.37
176 | 0.64,784.0,343.0,220.5,Short,West,0.25,1,21.08
177 | 0.76,661.5,416.5,Medium,Tall,North,0.1,2,33.91
178 | 0.76,661.5,416.5,Medium,Tall,East,0.1,2,34.07
179 | 0.82,612.5,318.5,Large,Tall,West,0.1,5,27.69
180 | 0.62,808.5,367.5,220.5,Short,East,0.4,2,17.38
181 | 0.98,514.5,294.0,Small,Tall,North,0.1,5,25.64
182 | 0.82,612.5,318.5,Large,Tall,East,0.4,2,27.93
183 | 0.82,612.5,318.5,Large,Tall,North,0.1,1,27.31
184 | 0.74,686.0,245.0,220.5,Short,East,0.1,1,13.71
185 | 0.62,808.5,367.5,220.5,Short,North,0.25,3,14.89
186 | 0.82,612.5,318.5,Large,Tall,East,0.1,2,25.02
187 | 0.66,759.5,318.5,220.5,Short,North,0.1,2,14.86
188 | 0.62,808.5,367.5,220.5,Short,South,0.4,3,16.56
189 | 0.64,784.0,343.0,220.5,Short,North,0.4,5,20.82
190 | 0.69,735.0,294.0,220.5,Short,East,0.25,1,15.63
191 | 0.66,759.5,318.5,220.5,Short,South,0.1,2,15.0
192 | 0.98,514.5,294.0,Small,Tall,North,0.4,1,33.37
193 | 0.64,784.0,343.0,220.5,Short,South,0.4,1,22.72
194 | 0.66,759.5,318.5,220.5,Short,North,0.1,5,14.54
195 | 0.62,808.5,367.5,220.5,Short,South,0.1,4,14.28
196 | 0.62,808.5,367.5,220.5,Short,East,0.25,2,15.64
197 | 0.79,637.0,343.0,Large,Tall,South,0.4,1,38.35
198 | 0.74,686.0,245.0,220.5,Short,South,0.1,3,13.65
199 | 0.76,661.5,416.5,Medium,Tall,South,0.4,2,40.47
200 | 0.79,637.0,343.0,Large,Tall,East,0.4,2,39.41
201 | 0.66,759.5,318.5,220.5,Short,East,0.4,2,17.89
202 | 0.82,612.5,318.5,Large,Tall,North,0.0,0,23.77
203 | 0.76,661.5,416.5,Medium,Tall,North,0.4,2,39.48
204 | 0.74,686.0,245.0,220.5,Short,North,0.0,0,10.9
205 | 0.98,514.5,294.0,Small,Tall,North,0.4,5,32.88
206 | 0.69,735.0,294.0,220.5,Short,North,0.1,4,14.29
207 | 0.69,735.0,294.0,220.5,Short,North,0.4,2,16.44
208 | 0.86,588.0,294.0,Large,Tall,North,0.0,0,27.3
209 | 0.69,735.0,294.0,220.5,Short,East,0.4,4,16.88
210 | 0.98,514.5,294.0,Small,Tall,West,0.1,4,25.87
211 | 0.62,808.5,367.5,220.5,Short,East,0.1,1,14.5
212 | 0.82,612.5,318.5,Large,Tall,North,0.1,3,28.68
213 | 0.69,735.0,294.0,220.5,Short,West,0.4,2,17.22
214 | 0.71,710.5,269.5,220.5,Short,East,0.4,4,17.2
215 | 0.62,808.5,367.5,220.5,Short,North,0.1,4,14.37
216 | 0.66,759.5,318.5,220.5,Short,North,0.25,1,16.39
217 | 0.82,612.5,318.5,Large,Tall,North,0.1,4,24.61
218 | 0.66,759.5,318.5,220.5,Short,South,0.0,0,12.4
219 | 0.74,686.0,245.0,220.5,Short,East,0.1,4,13.36
220 | 0.64,784.0,343.0,220.5,Short,South,0.25,2,20.48
221 | 0.86,588.0,294.0,Large,Tall,South,0.4,1,31.7
222 | 0.79,637.0,343.0,Large,Tall,West,0.0,0,39.44
223 | 0.64,784.0,343.0,220.5,Short,East,0.25,2,20.56
224 | 0.64,784.0,343.0,220.5,Short,East,0.25,3,20.03
225 | 0.74,686.0,245.0,220.5,Short,East,0.0,0,11.19
226 | 0.9,563.5,318.5,Medium,Tall,East,0.25,1,33.17
227 | 0.98,514.5,294.0,Small,Tall,West,0.25,4,30.12
228 | 0.66,759.5,318.5,220.5,Short,East,0.25,5,16.03
229 | 0.82,612.5,318.5,Large,Tall,South,0.1,1,24.61
230 | 0.74,686.0,245.0,220.5,Short,South,0.25,4,15.03
231 | 0.64,784.0,343.0,220.5,Short,North,0.1,3,19.14
232 | 0.79,637.0,343.0,Large,Tall,North,0.1,4,34.25
233 |
--------------------------------------------------------------------------------
/part4/ch6/p6_type1.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyPPdI+WdJzLJVLF3SZNKApC"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch6/p6_type1.ipynb)"],"metadata":{"id":"MY9rC_Qhnqkc"}},{"cell_type":"markdown","source":["## 작업형1"],"metadata":{"id":"uO07g2QeKprH"}},{"cell_type":"markdown","source":["### 문제1"],"metadata":{"id":"6f9i0g9dKvQQ"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"ShDssNLn6Dw8","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1713101751899,"user_tz":-540,"elapsed":1575,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"0621c31e-a180-4caa-e978-d9acb4f4358c"},"outputs":[{"output_type":"stream","name":"stdout","text":["81\n"]}],"source":["import pandas as pd\n","# df = pd.read_csv(\"data6-1-1.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch6/data6-1-1.csv\")\n","\n","# 1) datetime형태로 변경\n","df['도착시간'] = pd.to_datetime(df['도착시간'])\n","df['출동시간'] = pd.to_datetime(df['출동시간'])\n","\n","# 2) 출동시간과 도착시간의 차이를 분으로 계산\n","df['시간차이(분)'] = (df['도착시간'] - df['출동시간']).dt.total_seconds() / 60\n","\n","# 3) 소방서별 평균 시간차이 계산\n","avg_diff = df.groupby('소방서')['시간차이(분)'].mean()\n","\n","# 4) 평균 차이가 가장 큰 소방서의 시간을 찾고 출력\n","idx = avg_diff.idxmax()\n","result = avg_diff[idx]\n","print(round(result))"]},{"cell_type":"code","source":["# 참고 (시간 반올림)\n","min = 5.5\n","print(int(min),\"분\")\n","print((min-int(min))*60,\"초\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"J4T5TS-KaMOa","executionInfo":{"status":"ok","timestamp":1692598354240,"user_tz":-540,"elapsed":279,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"1ecb0908-06c2-43a2-f435-f06acacdf287"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["5 분\n","30.0 초\n"]}]},{"cell_type":"markdown","source":["### 문제2"],"metadata":{"id":"ccvjEw-aKy39"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"data6-1-2.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch6/data6-1-2.csv\")\n","\n","# 1) 총 학생 수 계산\n","df['총학생수'] = df.iloc[:, 2:].sum(axis=1)\n","\n","# 2) 교사 한 명당 맡은 학생 수\n","df['학생/교사'] = df['총학생수'] / df['교사수']\n","\n","# 3) 학생/교사 최대값의 인덱스명\n","idx = df['학생/교사'].idxmax()\n","\n","# 4) 학생/교사 최대값의 학교 교사 수 출력\n","print(df.loc[idx, '교사수'])"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-3VwHCDKKjRl","executionInfo":{"status":"ok","timestamp":1713101761101,"user_tz":-540,"elapsed":534,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"937a7711-535c-4609-bc82-1db54a46f8cb"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["19\n"]}]},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"data6-1-2.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch6/data6-1-2.csv\")\n","\n","# 1) 교사 한 명당 맡은 학생 수\n","df['학생/교사'] = (df['1학년'] + df['2학년'] + df['3학년'] + df['4학년'] + df['5학년'] + df['6학년']) / df['교사수']\n","\n","# 2) 학생/교사 컬럼을 내림차순으로 정렬\n","df = df.sort_values('학생/교사', ascending=False)\n","\n","# 3) 최상단 행의 교사수 값 출력\n","print(df.iloc[0, 1])"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"qk3l6Y6WjwPU","executionInfo":{"status":"ok","timestamp":1713101789183,"user_tz":-540,"elapsed":6,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"cfb10b97-90c3-41df-9f93-d148337ea056"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["19\n"]}]},{"cell_type":"markdown","source":["### 문제3"],"metadata":{"id":"U2rmG2jaK_v3"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"data6-1-3.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch6/data6-1-3.csv\")\n","\n","# 1) 총 범죄 건수 계산\n","df['총 범죄 건수'] = df.iloc[:, 1:7].sum(axis=1)\n","\n","# 2) 연도 슬라이싱\n","df['연도'] = df[\"날짜\"].str[:4]\n","\n","# 3) 연도별 총 범죄 건수 합 계산\n","result = df['총 범죄 건수'].groupby(df[\"연도\"]).sum()\n","\n","# 4) 가장 큰 값의 월 평균 계산\n","print(round(result.max()/12))"],"metadata":{"id":"WgzH-3yB6I26","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1719908294893,"user_tz":-540,"elapsed":311,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"aeee2772-0a12-4d56-d8e9-10987054ba81"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["533\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"61CtkR_Dg1t9"},"execution_count":null,"outputs":[]}]}
--------------------------------------------------------------------------------
/part4/ch6/p6_type3.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyPkyAXeTKMY80JLb1CoEssb"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch6/p6_type3.ipynb)"],"metadata":{"id":"6OZW46EDn6tl"}},{"cell_type":"markdown","source":["## 작업형3"],"metadata":{"id":"aH-zAFBloETg"}},{"cell_type":"markdown","source":["### 문제1"],"metadata":{"id":"xcmkuNKkYZyN"}},{"cell_type":"code","source":["import pandas as pd\n","df = pd.DataFrame({\n"," \"항암약\":[4,4,3,4,1,4,1,4,1,4,4,2,1,4,2,3,2,4,4,4]\n"," })\n","print(df.head(3))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"2aa_3YEGOnoq","executionInfo":{"status":"ok","timestamp":1722874971322,"user_tz":-540,"elapsed":339,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"2c4f0767-55f3-4080-ebb6-be5109d52479"},"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":[" 항암약\n","0 4\n","1 4\n","2 3\n"]}]},{"cell_type":"code","source":["# 1) 이상 없음(4)의 빈도 계산\n","cnt = sum(df['항암약']==4)\n","\n","# 2) 항암약을 투여 받은 환자 중 '이상 없음' 비율 계산\n","ratio = cnt / len(df)\n","print(ratio)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"oxcQ8AZndVzC","executionInfo":{"status":"ok","timestamp":1722874971322,"user_tz":-540,"elapsed":3,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"bcc76e1f-74e2-4fbd-f1de-3a3c80a6e9cf"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["0.55\n"]}]},{"cell_type":"code","source":["# 1) 카테고리별 비율 계산\n","print(df['항암약'].value_counts(normalize=True))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"i_iSXeDgkYy7","executionInfo":{"status":"ok","timestamp":1722874971322,"user_tz":-540,"elapsed":2,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"ea7cccec-c4df-448a-8575-e7b5473a25ab"},"execution_count":3,"outputs":[{"output_type":"stream","name":"stdout","text":["항암약\n","4 0.55\n","1 0.20\n","2 0.15\n","3 0.10\n","Name: proportion, dtype: float64\n"]}]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cpGaGRyGAZAF","executionInfo":{"status":"ok","timestamp":1722874973077,"user_tz":-540,"elapsed":1757,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"1d6cdeb8-fd90-4a5f-d5e9-3144d8d29856"},"outputs":[{"output_type":"stream","name":"stdout","text":["데이터 수: 20\n","[2.0, 1.0, 3.0, 14.0]\n","[2.0, 1.0, 3.0, 14.0]\n"]}],"source":["from scipy.stats import chisquare\n","\n","# 1) 각 카테고리의 비율을 리스트로 만들기\n","prob = [0.1, 0.05, 0.15, 0.7]\n","\n","# 2-1) 기대 빈도수 계산\n","print(\"데이터 수: \", len(df))\n","expected_counts = [0.1*20, 0.05*20, 0.15*20, 0.7*20]\n","print(expected_counts)\n","\n","# 2-2)기대 빈도수 계산 (다른 방법)\n","expected_counts = [x*len(df) for x in prob]\n","print(expected_counts)"]},{"cell_type":"code","source":["# 3) 관찰 빈도수 계산\n","observed_counts = df['항암약'].value_counts().sort_index().to_list()\n","print(observed_counts)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"K0KzbM8ijM4a","executionInfo":{"status":"ok","timestamp":1722874973077,"user_tz":-540,"elapsed":7,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"de2bc798-af8b-4f06-94da-211a9e2e9c6a"},"execution_count":5,"outputs":[{"output_type":"stream","name":"stdout","text":["[4, 3, 2, 11]\n"]}]},{"cell_type":"code","source":["# 4) 카이제곱 검정 수행\n","print(chisquare(f_obs=observed_counts, f_exp=expected_counts))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"naDXhxrCjPdL","executionInfo":{"status":"ok","timestamp":1722874973077,"user_tz":-540,"elapsed":5,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"a982c52f-a3d7-49d3-eebe-e5885f42fd60"},"execution_count":6,"outputs":[{"output_type":"stream","name":"stdout","text":["Power_divergenceResult(statistic=6.976190476190476, pvalue=0.07266054733847573)\n"]}]},{"cell_type":"code","source":["# 참고 - f_obs, f_exp 생략 가능\n","print(chisquare(observed_counts, expected_counts))"],"metadata":{"id":"NmCzsf-UfZz4","executionInfo":{"status":"ok","timestamp":1722874973077,"user_tz":-540,"elapsed":4,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"e473187a-50c8-42a7-d3d7-ce6fc35d60b0","colab":{"base_uri":"https://localhost:8080/"}},"execution_count":7,"outputs":[{"output_type":"stream","name":"stdout","text":["Power_divergenceResult(statistic=6.976190476190476, pvalue=0.07266054733847573)\n"]}]},{"cell_type":"markdown","source":["### 문제2"],"metadata":{"id":"t6IB5WUEq-SI"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"data6-3-2.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch6/data6-3-2.csv\")\n","\n","print(df.head(3))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Hy0NI7InTB5F","executionInfo":{"status":"ok","timestamp":1722874973585,"user_tz":-540,"elapsed":511,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"473d57d8-9067-4942-ffd8-aed054c4d8da"},"execution_count":8,"outputs":[{"output_type":"stream","name":"stdout","text":[" solar wind o3 temperature\n","0 89.14 6.28 33.52 23.0\n","1 109.97 1.04 27.01 20.7\n","2 102.83 6.42 41.00 20.5\n"]}]},{"cell_type":"code","source":["from statsmodels.formula.api import ols\n","\n","# 1) R스타일 formula\n","formula = 'temperature ~ solar + wind + o3'\n","\n","# 2) 회귀 모델 학습\n","model= ols(formula, data=df).fit()\n","\n","# 3) 회귀 모델 요약 정보\n","summary = model.summary()\n","print(summary)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"2bjnOPN6gCLw","executionInfo":{"status":"ok","timestamp":1722874974443,"user_tz":-540,"elapsed":860,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"d08030f1-9f49-44bd-969a-2df9134841f8"},"execution_count":9,"outputs":[{"output_type":"stream","name":"stdout","text":[" OLS Regression Results \n","==============================================================================\n","Dep. Variable: temperature R-squared: 0.044\n","Model: OLS Adj. R-squared: 0.014\n","Method: Least Squares F-statistic: 1.464\n","Date: Mon, 05 Aug 2024 Prob (F-statistic): 0.229\n","Time: 16:22:54 Log-Likelihood: -195.45\n","No. Observations: 100 AIC: 398.9\n","Df Residuals: 96 BIC: 409.3\n","Df Model: 3 \n","Covariance Type: nonrobust \n","==============================================================================\n"," coef std err t P>|t| [0.025 0.975]\n","------------------------------------------------------------------------------\n","Intercept 19.0507 1.994 9.555 0.000 15.093 23.008\n","solar 0.0039 0.015 0.251 0.802 -0.027 0.035\n","wind -0.0252 0.090 -0.280 0.780 -0.204 0.153\n","o3 0.0749 0.036 2.079 0.040 0.003 0.146\n","==============================================================================\n","Omnibus: 0.654 Durbin-Watson: 2.328\n","Prob(Omnibus): 0.721 Jarque-Bera (JB): 0.672\n","Skew: 0.187 Prob(JB): 0.715\n","Kurtosis: 2.855 Cond. No. 1.20e+03\n","==============================================================================\n","\n","Notes:\n","[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n","[2] The condition number is large, 1.2e+03. This might indicate that there are\n","strong multicollinearity or other numerical problems.\n"]}]},{"cell_type":"code","source":["# 2-1. o3에 대한 회귀계수\n","print(\"2-1. o3의 회귀계수:\", model.params['o3'])"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"xELLjouUFXoq","executionInfo":{"status":"ok","timestamp":1722874974443,"user_tz":-540,"elapsed":5,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"09b7896f-9d5a-4dfd-e198-76dcdfee296d"},"execution_count":10,"outputs":[{"output_type":"stream","name":"stdout","text":["2-1. o3의 회귀계수: 0.0749385437813658\n"]}]},{"cell_type":"code","source":["# 2-2. wind에 대한 p-value\n","print(\"2-2. wind의 p-value:\", model.pvalues['wind'])"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"9M316CMLTsjQ","executionInfo":{"status":"ok","timestamp":1722874974907,"user_tz":-540,"elapsed":466,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"7ca51dca-c1a0-447e-84b2-10aa6a4e7120"},"execution_count":11,"outputs":[{"output_type":"stream","name":"stdout","text":["2-2. wind의 p-value: 0.7797177202071661\n"]}]},{"cell_type":"code","source":["# 2-3. 예측값\n","# 1) 새 데이터를 데이터프레임으로 만들기\n","new_data = pd.DataFrame({\n"," 'solar': [100],\n"," 'wind': [5],\n"," 'o3': [30]\n","})\n","print(new_data)"],"metadata":{"id":"RqikMfLeTvYZ","executionInfo":{"status":"ok","timestamp":1722874974907,"user_tz":-540,"elapsed":5,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"colab":{"base_uri":"https://localhost:8080/"},"outputId":"6c788931-5572-4607-c7d0-0dda481bb748"},"execution_count":12,"outputs":[{"output_type":"stream","name":"stdout","text":[" solar wind o3\n","0 100 5 30\n"]}]},{"cell_type":"code","source":["# 2) 구축된 모델을 사용해 예측\n","pred = model.predict(new_data)\n","print(pred)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"jGr4psT2ESrc","executionInfo":{"status":"ok","timestamp":1722874974907,"user_tz":-540,"elapsed":4,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"2be9cf97-3a21-4fc4-a75b-28e4e038caf4"},"execution_count":13,"outputs":[{"output_type":"stream","name":"stdout","text":["0 21.56163\n","dtype: float64\n"]}]}]}
--------------------------------------------------------------------------------
/part4/ch7/p7_type1.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyMewqKFn73wqBc13lAIZSRT"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch7/p7_type1.ipynb)"],"metadata":{"id":"zNG24pNSof-O"}},{"cell_type":"markdown","source":["## 작업형1"],"metadata":{"id":"ImS-bayEojdA"}},{"cell_type":"markdown","source":["### 문제1"],"metadata":{"id":"JO-2zLFZsse1"}},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"QfWBB-yjCuX8","executionInfo":{"status":"ok","timestamp":1713104075940,"user_tz":-540,"elapsed":308,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"fcef071e-5a6b-4ff3-d92c-00c98c0f4fa1"},"outputs":[{"output_type":"stream","name":"stdout","text":["2.183\n"]}],"source":["import pandas as pd\n","# df = pd.read_csv(\"student_assessment.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch7/student_assessment.csv\")\n","\n","# 1) 결측치 제거\n","# print(df.shape)\n","df = df.dropna()\n","# print(df.shape)\n","\n","# 2) 가장 많이 수강한 과목 필터링\n","id = df['id_assessment'].value_counts().idxmax()\n","cond = df['id_assessment'] == id\n","df = df[cond]\n","\n","# 3) 과목 점수 스탠다드 스케일\n","from sklearn.preprocessing import StandardScaler\n","scaler = StandardScaler()\n","df['score'] = scaler.fit_transform(df[['score']])\n","\n","# 4) 가장 큰 값\n","print(round(df['score'].max(), 3))"]},{"cell_type":"markdown","source":["### 문제2"],"metadata":{"id":"sHC4blpRswAJ"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"stock_market.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch7/stock_market.csv\")\n","\n","# 1) close와의 상관관계(절대값)\n","df_corr = df.corr()['close'].abs()\n","\n","# 2) 상관관계가 높은 변수명\n","col = df_corr.loc['DE1':'DE77'].idxmax()\n","\n","# 3) '2)'에서 구한 변수명의 평균값\n","print(round(df[col].mean(), 4))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"usqyPcI4J0eL","executionInfo":{"status":"ok","timestamp":1713104076541,"user_tz":-540,"elapsed":316,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"f64aefee-35d8-40e0-8dc4-3b3e144cc73a"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["-0.0004\n"]}]},{"cell_type":"markdown","source":["### 문제3"],"metadata":{"id":"E9QDz8l-sy4D"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"air_quality.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch7/air_quality.csv\")\n","\n","# 1) IQR 계산\n","Q1 = df['CO2'].quantile(0.25)\n","Q3 = df['CO2'].quantile(0.75)\n","IQR = Q3 - Q1\n","\n","# 2) 상한 및 하한 계산\n","upper = Q3 + 1.5 * IQR\n","lower = Q1 - 1.5 * IQR\n","\n","# 3) 이상치 식별\n","outliers = df[(df['CO2'] < lower) | (df['CO2'] > upper)]\n","\n","# 4) 이상치 수\n","print(len(outliers))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"LipHWwikUBL-","executionInfo":{"status":"ok","timestamp":1713104076542,"user_tz":-540,"elapsed":3,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"7887d4ce-375f-46de-8a51-978d5ff6af97"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["304\n"]}]}]}
--------------------------------------------------------------------------------
/part4/ch7/p7_type3.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "provenance": []
7 | },
8 | "kernelspec": {
9 | "name": "python3",
10 | "display_name": "Python 3"
11 | },
12 | "language_info": {
13 | "name": "python"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "markdown",
19 | "source": [
20 | "[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch7/p7_type3.ipynb)"
21 | ],
22 | "metadata": {
23 | "id": "znZZZKmko0kX"
24 | }
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "source": [
29 | "## 작업형3"
30 | ],
31 | "metadata": {
32 | "id": "H3N9Ou_Oo2mV"
33 | }
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "source": [
38 | "### 문제1-1"
39 | ],
40 | "metadata": {
41 | "id": "paHiKoCrtKnv"
42 | }
43 | },
44 | {
45 | "cell_type": "code",
46 | "source": [
47 | "import pandas as pd\n",
48 | "# df = pd.read_csv(\"clam.csv\")\n",
49 | "df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch7/clam.csv\")\n",
50 | "\n",
51 | "# 데이터셋 분할\n",
52 | "print(df.shape)\n",
53 | "train = df.iloc[:210]\n",
54 | "test = df.iloc[210:]\n",
55 | "print(train.shape)\n",
56 | "\n",
57 | "print(train.head())"
58 | ],
59 | "metadata": {
60 | "colab": {
61 | "base_uri": "https://localhost:8080/"
62 | },
63 | "id": "H38dKQCTrB_5",
64 | "outputId": "733154f4-f39c-4261-d606-77a8023ae92d"
65 | },
66 | "execution_count": 6,
67 | "outputs": [
68 | {
69 | "output_type": "stream",
70 | "name": "stdout",
71 | "text": [
72 | "(300, 6)\n",
73 | "(210, 6)\n",
74 | " age length diameter height weight gender\n",
75 | "0 6 0.474627 0.211352 0.178189 78.971766 1\n",
76 | "1 1 0.465847 0.339388 0.170522 98.781960 1\n",
77 | "2 4 0.122807 0.238691 0.106924 88.792625 0\n",
78 | "3 4 0.204579 0.360543 0.034261 1.028847 0\n",
79 | "4 8 0.243458 0.358037 0.128080 6.503367 0\n"
80 | ]
81 | }
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "source": [
87 | "from statsmodels.formula.api import logit\n",
88 | "import numpy as np\n",
89 | "\n",
90 | "# 1) 로지스틱 회귀 모델 생성 및 학습\n",
91 | "model = logit(\"gender ~ weight\", data=train).fit()\n",
92 | "\n",
93 | "# 2) 오즈비 계산\n",
94 | "odds_ratio = np.exp(model.params['weight'])\n",
95 | "print(round(odds_ratio, 4))"
96 | ],
97 | "metadata": {
98 | "colab": {
99 | "base_uri": "https://localhost:8080/"
100 | },
101 | "id": "m9Y_2uBYFnot",
102 | "outputId": "13a98535-a066-4be7-a676-4fc793b836ba"
103 | },
104 | "execution_count": 7,
105 | "outputs": [
106 | {
107 | "output_type": "stream",
108 | "name": "stdout",
109 | "text": [
110 | "Optimization terminated successfully.\n",
111 | " Current function value: 0.690045\n",
112 | " Iterations 4\n",
113 | "1.0047\n"
114 | ]
115 | }
116 | ]
117 | },
118 | {
119 | "cell_type": "markdown",
120 | "source": [
121 | "### 문제1-2"
122 | ],
123 | "metadata": {
124 | "id": "yeKVhetBtS8N"
125 | }
126 | },
127 | {
128 | "cell_type": "code",
129 | "source": [
130 | "# 1) 로지스틱 회귀 모델 생성 및 학습\n",
131 | "formula = \"gender ~ age + length + diameter + height + weight\"\n",
132 | "model = logit(formula, data=train).fit()\n",
133 | "\n",
134 | "# 2) 잔차이탈도\n",
135 | "print(round(-2 * model.llf,2))"
136 | ],
137 | "metadata": {
138 | "colab": {
139 | "base_uri": "https://localhost:8080/"
140 | },
141 | "id": "4rCVKX-t_L61",
142 | "outputId": "2b6b377e-7d1a-4952-8021-75e4c1c12fea"
143 | },
144 | "execution_count": 8,
145 | "outputs": [
146 | {
147 | "output_type": "stream",
148 | "name": "stdout",
149 | "text": [
150 | "Optimization terminated successfully.\n",
151 | " Current function value: 0.683173\n",
152 | " Iterations 4\n",
153 | "286.93\n"
154 | ]
155 | }
156 | ]
157 | },
158 | {
159 | "cell_type": "markdown",
160 | "source": [
161 | "### 문제1-3"
162 | ],
163 | "metadata": {
164 | "id": "x4u7dfFTtUs9"
165 | }
166 | },
167 | {
168 | "cell_type": "code",
169 | "source": [
170 | "from sklearn.metrics import accuracy_score\n",
171 | "\n",
172 | "# 1) test데이터를 사용해 예측 (0.5 미만: 0, 0.5 이상 1)\n",
173 | "model = logit(\"gender ~ weight\", data=train).fit()\n",
174 | "target = test.pop('gender')\n",
175 | "pred = model.predict(test)\n",
176 | "pred = (pred > 0.5).astype(int)\n",
177 | "\n",
178 | "# 2) 실제 값과 예측값을 사용하여 정확도 계산\n",
179 | "accuracy = accuracy_score(target, pred)\n",
180 | "\n",
181 | "# 3) 오류율 계산\n",
182 | "error_rate = 1 - accuracy\n",
183 | "print(round(error_rate, 3))\n",
184 | "# 0.478"
185 | ],
186 | "metadata": {
187 | "colab": {
188 | "base_uri": "https://localhost:8080/"
189 | },
190 | "id": "7x8XLDu4HksT",
191 | "outputId": "5a5e5d32-f0ea-4e62-9cbc-7765ae478b6e"
192 | },
193 | "execution_count": 9,
194 | "outputs": [
195 | {
196 | "output_type": "stream",
197 | "name": "stdout",
198 | "text": [
199 | "Optimization terminated successfully.\n",
200 | " Current function value: 0.690045\n",
201 | " Iterations 4\n",
202 | "0.478\n"
203 | ]
204 | }
205 | ]
206 | },
207 | {
208 | "cell_type": "markdown",
209 | "source": [
210 | "### 문제2-1"
211 | ],
212 | "metadata": {
213 | "id": "4MSQ0eB1tWsh"
214 | }
215 | },
216 | {
217 | "cell_type": "code",
218 | "source": [
219 | "import pandas as pd\n",
220 | "\n",
221 | "# df = pd.read_csv(\"system_cpu.csv\")\n",
222 | "df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch7/system_cpu.csv\")\n",
223 | "\n",
224 | "print(df.head())"
225 | ],
226 | "metadata": {
227 | "colab": {
228 | "base_uri": "https://localhost:8080/"
229 | },
230 | "id": "OlvzgzRDcNt8",
231 | "outputId": "424000fc-c180-4157-8975-81a9f87ce6cd"
232 | },
233 | "execution_count": null,
234 | "outputs": [
235 | {
236 | "output_type": "stream",
237 | "name": "stdout",
238 | "text": [
239 | " ERP Feature1 Feature2 Feature3 CPU\n",
240 | "0 30.6 235.1 44.5 44.0 112.3\n",
241 | "1 40.3 36.6 46.4 36.1 58.6\n",
242 | "2 57.7 52.2 66.5 2.0 55.3\n",
243 | "3 128.3 196.2 59.8 57.4 103.2\n",
244 | "4 80.3 75.2 59.6 58.2 104.1\n"
245 | ]
246 | }
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "source": [
252 | "import pandas as pd\n",
253 | "# df = pd.read_csv(\"system_cpu.csv\")\n",
254 | "df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch7/system_cpu.csv\")\n",
255 | "\n",
256 | "# 1) ERP와 각 변수 사이의 상관계수 계산\n",
257 | "corr_matrix = df.corr()\n",
258 | "\n",
259 | "# 2) ERP와 다른 변수들과의 상관계수 출력\n",
260 | "erp_corr = corr_matrix['ERP'].sort_values(ascending=False)\n",
261 | "print(erp_corr)"
262 | ],
263 | "metadata": {
264 | "colab": {
265 | "base_uri": "https://localhost:8080/"
266 | },
267 | "id": "6POPUMi3Axb6",
268 | "outputId": "f33741aa-f50a-4364-80a6-e544491fc239"
269 | },
270 | "execution_count": null,
271 | "outputs": [
272 | {
273 | "output_type": "stream",
274 | "name": "stdout",
275 | "text": [
276 | "ERP 1.000000\n",
277 | "Feature3 0.882194\n",
278 | "CPU 0.092455\n",
279 | "Feature2 0.092432\n",
280 | "Feature1 -0.053848\n",
281 | "Name: ERP, dtype: float64\n"
282 | ]
283 | }
284 | ]
285 | },
286 | {
287 | "cell_type": "markdown",
288 | "source": [
289 | "### 문제2-2, 2-3"
290 | ],
291 | "metadata": {
292 | "id": "EVAJRDHi6gAG"
293 | }
294 | },
295 | {
296 | "cell_type": "code",
297 | "source": [
298 | "from statsmodels.formula.api import ols\n",
299 | "\n",
300 | "# 1) CPU가 100 미만인 데이터 필터링\n",
301 | "filtered_df = df[df['CPU'] < 100]\n",
302 | "\n",
303 | "# 2) 선형회귀 모델 생성: ERP를 종속 변수로, 나머지 변수들을 독립 변수로 설정\n",
304 | "model = ols('ERP ~ Feature1 + Feature2 + Feature3 + CPU', data=filtered_df).fit()\n",
305 | "\n",
306 | "# 3) 모델 요약 정보 출력\n",
307 | "print(model.summary())"
308 | ],
309 | "metadata": {
310 | "colab": {
311 | "base_uri": "https://localhost:8080/"
312 | },
313 | "id": "QW0SyhImA1l0",
314 | "outputId": "e654d8d5-ada0-4f4d-f30e-a8e555466034"
315 | },
316 | "execution_count": null,
317 | "outputs": [
318 | {
319 | "output_type": "stream",
320 | "name": "stdout",
321 | "text": [
322 | " OLS Regression Results \n",
323 | "==============================================================================\n",
324 | "Dep. Variable: ERP R-squared: 0.755\n",
325 | "Model: OLS Adj. R-squared: 0.736\n",
326 | "Method: Least Squares F-statistic: 39.30\n",
327 | "Date: Mon, 05 Aug 2024 Prob (F-statistic): 5.36e-15\n",
328 | "Time: 16:26:10 Log-Likelihood: -260.40\n",
329 | "No. Observations: 56 AIC: 530.8\n",
330 | "Df Residuals: 51 BIC: 540.9\n",
331 | "Df Model: 4 \n",
332 | "Covariance Type: nonrobust \n",
333 | "==============================================================================\n",
334 | " coef std err t P>|t| [0.025 0.975]\n",
335 | "------------------------------------------------------------------------------\n",
336 | "Intercept 51.4133 19.112 2.690 0.010 13.045 89.782\n",
337 | "Feature1 -0.0242 0.059 -0.409 0.684 -0.143 0.094\n",
338 | "Feature2 0.0602 0.106 0.569 0.572 -0.152 0.273\n",
339 | "Feature3 1.4126 0.113 12.458 0.000 1.185 1.640\n",
340 | "CPU -0.4651 0.234 -1.985 0.053 -0.936 0.005\n",
341 | "==============================================================================\n",
342 | "Omnibus: 3.758 Durbin-Watson: 1.762\n",
343 | "Prob(Omnibus): 0.153 Jarque-Bera (JB): 2.757\n",
344 | "Skew: 0.436 Prob(JB): 0.252\n",
345 | "Kurtosis: 3.648 Cond. No. 780.\n",
346 | "==============================================================================\n",
347 | "\n",
348 | "Notes:\n",
349 | "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
350 | ]
351 | }
352 | ]
353 | },
354 | {
355 | "cell_type": "code",
356 | "source": [],
357 | "metadata": {
358 | "id": "vf9FgVSOZKhd"
359 | },
360 | "execution_count": null,
361 | "outputs": []
362 | }
363 | ]
364 | }
--------------------------------------------------------------------------------
/part4/ch7/system_cpu.csv:
--------------------------------------------------------------------------------
1 | ERP,Feature1,Feature2,Feature3,CPU
2 | 30.6,235.1,44.5,44.0,112.3
3 | 40.3,36.6,46.4,36.1,58.6
4 | 57.7,52.2,66.5,2.0,55.3
5 | 128.3,196.2,59.8,57.4,103.2
6 | 80.3,75.2,59.6,58.2,104.1
7 | 49.8,183.5,25.7,7.0,113.8
8 | -14.6,97.1,49.6,4.3,122.6
9 | 113.3,57.4,38.9,66.7,147.6
10 | -27.1,199.9,54.2,-24.0,101.7
11 | 56.5,186.7,48.5,28.1,82.4
12 | 107.5,201.9,63.7,22.4,129.5
13 | 90.8,177.1,54.8,72.0,77.2
14 | -0.6,69.6,61.8,-4.9,94.0
15 | 50.7,203.7,43.0,37.4,57.9
16 | -7.2,94.7,35.8,5.6,52.6
17 | 10.1,149.2,43.8,25.4,146.3
18 | 76.0,180.1,49.7,31.5,133.6
19 | 12.2,87.7,55.7,9.2,119.6
20 | 34.7,138.8,83.9,25.7,91.0
21 | -4.5,131.8,49.4,10.8,67.4
22 | 100.2,68.9,35.7,67.4,65.7
23 | 67.7,54.8,44.8,26.7,75.1
24 | 74.2,133.1,43.0,27.1,105.0
25 | 130.2,155.5,57.2,85.5,121.5
26 | -24.3,108.6,26.9,-42.7,116.1
27 | 82.2,63.0,50.9,64.7,78.1
28 | 98.1,77.9,52.3,44.5,145.5
29 | 38.5,194.3,53.5,19.2,123.8
30 | 119.3,166.3,41.0,49.4,105.5
31 | 7.2,151.0,46.4,2.9,111.2
32 | 99.2,62.3,28.6,68.7,92.0
33 | 72.9,136.1,42.6,48.0,74.8
34 | 34.8,46.5,41.9,24.8,85.7
35 | 50.5,62.1,56.2,26.6,125.8
36 | 100.9,59.3,32.7,28.8,51.5
37 | 16.6,139.0,61.7,-4.4,61.7
38 | 60.0,155.2,72.4,24.9,54.7
39 | 48.8,113.0,19.0,31.3,54.2
40 | 87.9,113.2,56.4,45.0,135.6
41 | 16.8,36.0,60.2,23.2,120.4
42 | 13.1,-0.0,40.4,13.7,97.5
43 | 86.5,94.9,44.0,44.1,59.9
44 | 84.2,74.7,48.0,76.2,99.2
45 | 82.6,172.7,45.5,47.4,97.4
46 | 87.4,216.1,45.4,37.2,67.4
47 | -3.5,141.3,24.9,26.1,93.4
48 | 133.5,26.6,67.3,84.8,89.9
49 | 127.8,174.8,66.2,91.1,111.6
50 | 43.9,-2.0,37.8,20.3,113.5
51 | 129.9,71.3,28.0,79.6,54.6
52 | 73.2,78.5,57.8,14.8,87.5
53 | 127.2,198.5,41.4,75.4,112.6
54 | 137.5,50.3,52.1,66.4,100.4
55 | 111.3,32.6,45.2,35.3,135.7
56 | 95.3,94.2,60.4,63.1,115.9
57 | -13.5,73.4,60.4,-33.9,66.4
58 | 4.7,161.0,39.1,-26.3,57.1
59 | 45.1,48.8,29.2,28.0,114.3
60 | 121.1,26.5,26.3,69.1,52.7
61 | 41.5,69.5,59.2,35.3,108.6
62 | 35.7,60.2,32.2,29.0,144.0
63 | 106.6,195.1,42.4,75.4,107.6
64 | 82.6,134.1,41.1,51.8,88.9
65 | 103.7,70.1,49.2,49.9,114.4
66 | 73.6,38.6,21.0,35.7,95.9
67 | 91.8,136.7,52.8,41.7,104.6
68 | 35.7,14.4,57.9,22.3,144.2
69 | -12.3,27.4,51.3,-22.6,88.7
70 | 93.2,144.8,45.3,51.7,146.1
71 | 25.2,117.8,51.5,11.3,140.5
72 | 19.1,163.4,56.0,13.7,69.7
73 | 16.7,119.5,8.4,12.5,57.0
74 | 5.9,168.2,79.3,-33.8,60.2
75 | 92.0,40.8,55.9,32.5,51.9
76 | 58.5,53.2,40.2,27.6,59.5
77 | 98.8,122.4,44.1,48.6,118.3
78 | 97.9,40.0,57.4,52.8,57.2
79 | 67.8,51.9,48.3,38.4,82.0
80 | 11.6,69.6,19.5,28.5,134.5
81 | 123.2,102.0,81.0,66.3,52.4
82 | 51.1,57.9,48.3,20.9,131.5
83 | 38.8,45.1,65.3,-4.6,78.3
84 | 48.4,75.2,39.6,3.7,61.9
85 | 118.8,-48.6,73.0,67.4,119.7
86 | -1.9,162.9,54.3,15.0,112.9
87 | 46.5,53.0,59.1,46.4,137.8
88 | 92.8,65.0,34.3,34.0,123.5
89 | 29.2,99.2,68.2,10.0,130.4
90 | 93.9,39.4,60.3,50.6,78.3
91 | 29.8,202.9,69.5,45.6,67.8
92 | 15.3,23.4,40.6,9.4,125.1
93 | 38.9,138.6,42.8,42.7,130.7
94 | 72.1,102.1,84.6,38.6,149.1
95 | 54.5,57.6,34.1,44.5,91.3
96 | 131.9,134.9,48.0,81.3,87.3
97 | 51.9,105.0,67.1,28.4,127.7
98 | 139.1,141.1,51.5,54.2,84.1
99 | 35.0,179.4,58.7,13.5,143.1
100 | 24.7,217.2,44.0,34.7,135.9
101 | 34.9,178.9,55.6,30.6,93.0
102 | 211.7,171.5,57.3,99.3,125.1
103 | 128.7,206.7,-47.8,59.2,125.5
104 | 206.2,196.7,-36.0,77.1,60.4
105 | 225.6,223.5,223.7,134.5,140.3
106 | 158.6,19.0,43.7,92.6,100.6
107 | 197.6,21.5,-10.5,96.7,132.7
108 | 159.7,-18.8,118.4,82.9,82.1
109 | 241.0,-37.2,-34.9,87.9,139.6
110 | 137.2,97.0,238.0,61.3,89.0
111 | 120.3,-6.4,72.3,80.2,51.2
112 | 207.9,178.6,32.4,118.7,140.5
113 | 150.7,215.0,8.3,80.3,59.2
114 | 186.3,68.0,-7.2,92.9,82.0
115 | 141.0,73.0,158.5,82.4,145.0
116 | 155.8,35.9,249.6,73.7,145.1
117 |
--------------------------------------------------------------------------------
/part4/ch8/chem.csv:
--------------------------------------------------------------------------------
1 | sample,co,nmhc,etc
2 | 샘플1,79,54,31
3 | 샘플2,84,57,58
4 | 샘플3,109,74,113
5 | 샘플4,15,77,21
6 | 샘플5,65,77,115
7 | 샘플6,51,19,64
8 | 샘플7,49,93,108
9 | 샘플8,63,31,44
10 | 샘플9,108,46,87
11 | 샘플10,18,97,102
12 | 샘플11,57,80,23
13 | 샘플12,92,98,96
14 | 샘플13,36,98,55
15 | 샘플14,88,22,106
16 | 샘플15,15,68,81
17 | 샘플16,25,75,89
18 | 샘플17,58,49,107
19 | 샘플18,73,97,63
20 | 샘플19,38,56,52
21 | 샘플20,74,98,31
22 | 샘플21,17,91,104
23 | 샘플22,77,47,30
24 | 샘플23,50,35,74
25 | 샘플24,109,87,57
26 | 샘플25,82,82,48
27 | 샘플26,97,19,22
28 | 샘플27,61,30,47
29 | 샘플28,35,90,103
30 | 샘플29,96,79,109
31 | 샘플30,65,89,43
32 | 샘플31,42,57,73
33 | 샘플32,29,74,71
34 | 샘플33,56,92,66
35 | 샘플34,73,98,40
36 | 샘플35,80,59,73
37 | 샘플36,51,39,49
38 | 샘플37,25,29,87
39 | 샘플38,101,29,55
40 | 샘플39,58,24,59
41 | 샘플40,26,49,29
42 | 샘플41,17,42,93
43 | 샘플42,66,75,61
44 | 샘플43,95,19,43
45 | 샘플44,47,67,23
46 | 샘플45,69,42,66
47 | 샘플46,15,41,110
48 | 샘플47,53,84,70
49 | 샘플48,34,33,23
50 | 샘플49,61,45,51
51 | 샘플50,57,85,29
52 | 샘플51,71,65,30
53 | 샘플52,75,38,47
54 | 샘플53,92,44,65
55 | 샘플54,45,10,91
56 | 샘플55,39,10,59
57 | 샘플56,17,46,81
58 | 샘플57,18,63,105
59 | 샘플58,109,15,117
60 | 샘플59,28,48,64
61 | 샘플60,55,27,54
62 | 샘플61,87,89,54
63 | 샘플62,34,14,108
64 | 샘플63,87,52,53
65 | 샘플64,41,68,25
66 | 샘플65,81,41,56
67 | 샘플66,67,11,20
68 | 샘플67,82,75,95
69 | 샘플68,76,51,54
70 | 샘플69,29,67,89
71 | 샘플70,19,45,73
72 | 샘플71,82,21,100
73 | 샘플72,26,56,82
74 | 샘플73,101,92,28
75 | 샘플74,92,10,81
76 | 샘플75,90,24,21
77 | 샘플76,71,63,101
78 | 샘플77,31,22,55
79 | 샘플78,39,52,111
80 | 샘플79,44,94,60
81 | 샘플80,36,85,56
82 | 샘플81,40,78,68
83 | 샘플82,95,16,45
84 | 샘플83,75,78,87
85 | 샘플84,76,57,55
86 | 샘플85,98,13,50
87 | 샘플86,48,86,49
88 | 샘플87,47,62,53
89 | 샘플88,85,88,38
90 | 샘플89,100,25,37
91 | 샘플90,46,30,113
92 | 샘플91,28,68,104
93 | 샘플92,86,33,22
94 | 샘플93,71,89,89
95 | 샘플94,39,23,32
96 | 샘플95,94,95,64
97 | 샘플96,56,58,86
98 | 샘플97,33,59,111
99 | 샘플98,55,79,105
100 | 샘플99,69,51,59
101 | 샘플100,94,45,59
--------------------------------------------------------------------------------
/part4/ch8/drinks.csv:
--------------------------------------------------------------------------------
1 | country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
2 | Afghanistan,0,0,0,0.0,Asia
3 | Albania,89,132,54,4.9,Europe
4 | Algeria,25,0,14,0.7,Africa
5 | Andorra,245,138,312,12.4,Europe
6 | Angola,217,57,45,5.9,Africa
7 | Antigua & Barbuda,102,128,45,4.9,North America
8 | Argentina,193,25,221,8.3,South America
9 | Armenia,21,179,11,3.8,Europe
10 | Australia,261,72,212,10.4,Oceania
11 | Austria,279,75,191,9.7,Europe
12 | Azerbaijan,21,46,5,1.3,Europe
13 | Bahamas,122,176,51,6.3,North America
14 | Bahrain,42,63,7,2.0,Asia
15 | Bangladesh,0,0,0,0.0,Asia
16 | Barbados,143,173,36,6.3,North America
17 | Belarus,142,373,42,14.4,Europe
18 | Belgium,295,84,212,10.5,Europe
19 | Belize,263,114,8,6.8,North America
20 | Benin,34,4,13,1.1,Africa
21 | Bhutan,23,0,0,0.4,Asia
22 | Bolivia,167,41,8,3.8,South America
23 | Bosnia-Herzegovina,76,173,8,4.6,Europe
24 | Botswana,173,35,35,5.4,Africa
25 | Brazil,245,145,16,7.2,South America
26 | Brunei,31,2,1,0.6,Asia
27 | Bulgaria,231,252,94,10.3,Europe
28 | Burkina Faso,25,7,7,4.3,Africa
29 | Burundi,88,0,0,6.3,Africa
30 | Cote d'Ivoire,37,1,7,4.0,Africa
31 | Cabo Verde,144,56,16,4.0,Africa
32 | Cambodia,57,65,1,2.2,Asia
33 | Cameroon,147,1,4,5.8,Africa
34 | Canada,240,122,100,8.2,North America
35 | Central African Republic,17,2,1,1.8,Africa
36 | Chad,15,1,1,0.4,Africa
37 | Chile,130,124,172,7.6,South America
38 | China,79,192,8,5.0,Asia
39 | Colombia,159,76,3,4.2,South America
40 | Comoros,1,3,1,0.1,Africa
41 | Congo,76,1,9,1.7,Africa
42 | Cook Islands,0,254,74,5.9,Oceania
43 | Costa Rica,149,87,11,4.4,North America
44 | Croatia,230,87,254,10.2,Europe
45 | Cuba,93,137,5,4.2,North America
46 | Cyprus,192,154,113,8.2,Europe
47 | Czech Republic,361,170,134,11.8,Europe
48 | North Korea,0,0,0,0.0,Asia
49 | DR Congo,32,3,1,2.3,Africa
50 | Denmark,224,81,278,10.4,Europe
51 | Djibouti,15,44,3,1.1,Africa
52 | Dominica,52,286,26,6.6,North America
53 | Dominican Republic,193,147,9,6.2,North America
54 | Ecuador,162,74,3,4.2,South America
55 | Egypt,6,4,1,0.2,Africa
56 | El Salvador,52,69,2,2.2,North America
57 | Equatorial Guinea,92,0,233,5.8,Africa
58 | Eritrea,18,0,0,0.5,Africa
59 | Estonia,224,194,59,9.5,Europe
60 | Ethiopia,20,3,0,0.7,Africa
61 | Fiji,77,35,1,2.0,Oceania
62 | Finland,263,133,97,10.0,Europe
63 | France,127,151,370,11.8,Europe
64 | Gabon,347,98,59,8.9,Africa
65 | Gambia,8,0,1,2.4,Africa
66 | Georgia,52,100,149,5.4,Europe
67 | Germany,346,117,175,11.3,Europe
68 | Ghana,31,3,10,1.8,Africa
69 | Greece,133,112,218,8.3,Europe
70 | Grenada,199,438,28,11.9,North America
71 | Guatemala,53,69,2,2.2,North America
72 | Guinea,9,0,2,0.2,Africa
73 | Guinea-Bissau,28,31,21,2.5,Africa
74 | Guyana,93,302,1,7.1,South America
75 | Haiti,1,326,1,5.9,North America
76 | Honduras,69,98,2,3.0,North America
77 | Hungary,234,215,185,11.3,Europe
78 | Iceland,233,61,78,6.6,Europe
79 | India,9,114,0,2.2,Asia
80 | Indonesia,5,1,0,0.1,Asia
81 | Iran,0,0,0,0.0,Asia
82 | Iraq,9,3,0,0.2,Asia
83 | Ireland,313,118,165,11.4,Europe
84 | Israel,63,69,9,2.5,Asia
85 | Italy,85,42,237,6.5,Europe
86 | Jamaica,82,97,9,3.4,North America
87 | Japan,77,202,16,7.0,Asia
88 | Jordan,6,21,1,0.5,Asia
89 | Kazakhstan,124,246,12,6.8,Asia
90 | Kenya,58,22,2,1.8,Africa
91 | Kiribati,21,34,1,1.0,Oceania
92 | Kuwait,0,0,0,0.0,Asia
93 | Kyrgyzstan,31,97,6,2.4,Asia
94 | Laos,62,0,123,6.2,Asia
95 | Latvia,281,216,62,10.5,Europe
96 | Lebanon,20,55,31,1.9,Asia
97 | Lesotho,82,29,0,2.8,Africa
98 | Liberia,19,152,2,3.1,Africa
99 | Libya,0,0,0,0.0,Africa
100 | Lithuania,343,244,56,12.9,Europe
101 | Luxembourg,236,133,271,11.4,Europe
102 | Madagascar,26,15,4,0.8,Africa
103 | Malawi,8,11,1,1.5,Africa
104 | Malaysia,13,4,0,0.3,Asia
105 | Maldives,0,0,0,0.0,Asia
106 | Mali,5,1,1,0.6,Africa
107 | Malta,149,100,120,6.6,Europe
108 | Marshall Islands,0,0,0,0.0,Oceania
109 | Mauritania,0,0,0,0.0,Africa
110 | Mauritius,98,31,18,2.6,Africa
111 | Mexico,238,68,5,5.5,North America
112 | Micronesia,62,50,18,2.3,Oceania
113 | Monaco,0,0,0,0.0,Europe
114 | Mongolia,77,189,8,4.9,Asia
115 | Montenegro,31,114,128,4.9,Europe
116 | Morocco,12,6,10,0.5,Africa
117 | Mozambique,47,18,5,1.3,Africa
118 | Myanmar,5,1,0,0.1,Asia
119 | Namibia,376,3,1,6.8,Africa
120 | Nauru,49,0,8,1.0,Oceania
121 | Nepal,5,6,0,0.2,Asia
122 | Netherlands,251,88,190,9.4,Europe
123 | New Zealand,203,79,175,9.3,Oceania
124 | Nicaragua,78,118,1,3.5,North America
125 | Niger,3,2,1,0.1,Africa
126 | Nigeria,42,5,2,9.1,Africa
127 | Niue,188,200,7,7.0,Oceania
128 | Norway,169,71,129,6.7,Europe
129 | Oman,22,16,1,0.7,Asia
130 | Pakistan,0,0,0,0.0,Asia
131 | Palau,306,63,23,6.9,Oceania
132 | Panama,285,104,18,7.2,North America
133 | Papua New Guinea,44,39,1,1.5,Oceania
134 | Paraguay,213,117,74,7.3,South America
135 | Peru,163,160,21,6.1,South America
136 | Philippines,71,186,1,4.6,Asia
137 | Poland,343,215,56,10.9,Europe
138 | Portugal,194,67,339,11.0,Europe
139 | Qatar,1,42,7,0.9,Asia
140 | South Korea,140,16,9,9.8,Asia
141 | Moldova,109,226,18,6.3,Europe
142 | Romania,297,122,167,10.4,Europe
143 | Russian Federation,247,326,73,11.5,Asia
144 | Rwanda,43,2,0,6.8,Africa
145 | St. Kitts & Nevis,194,205,32,7.7,North America
146 | St. Lucia,171,315,71,10.1,North America
147 | St. Vincent & the Grenadines,120,221,11,6.3,North America
148 | Samoa,105,18,24,2.6,Oceania
149 | San Marino,0,0,0,0.0,Europe
150 | Sao Tome & Principe,56,38,140,4.2,Africa
151 | Saudi Arabia,0,5,0,0.1,Asia
152 | Senegal,9,1,7,0.3,Africa
153 | Serbia,283,131,127,9.6,Europe
154 | Seychelles,157,25,51,4.1,Africa
155 | Sierra Leone,25,3,2,6.7,Africa
156 | Singapore,60,12,11,1.5,Asia
157 | Slovakia,196,293,116,11.4,Europe
158 | Slovenia,270,51,276,10.6,Europe
159 | Solomon Islands,56,11,1,1.2,Oceania
160 | Somalia,0,0,0,0.0,Africa
161 | South Africa,225,76,81,8.2,Africa
162 | Spain,284,157,112,10.0,Europe
163 | Sri Lanka,16,104,0,2.2,Asia
164 | Sudan,8,13,0,1.7,Africa
165 | Suriname,128,178,7,5.6,South America
166 | Swaziland,90,2,2,4.7,Africa
167 | Sweden,152,60,186,7.2,Europe
168 | Switzerland,185,100,280,10.2,Europe
169 | Syria,5,35,16,1.0,Asia
170 | Tajikistan,2,15,0,0.3,Asia
171 | Thailand,99,258,1,6.4,Asia
172 | Macedonia,106,27,86,3.9,Europe
173 | Timor-Leste,1,1,4,0.1,Asia
174 | Togo,36,2,19,1.3,Africa
175 | Tonga,36,21,5,1.1,Oceania
176 | Trinidad & Tobago,197,156,7,6.4,North America
177 | Tunisia,51,3,20,1.3,Africa
178 | Turkey,51,22,7,1.4,Asia
179 | Turkmenistan,19,71,32,2.2,Asia
180 | Tuvalu,6,41,9,1.0,Oceania
181 | Uganda,45,9,0,8.3,Africa
182 | Ukraine,206,237,45,8.9,Europe
183 | United Arab Emirates,16,135,5,2.8,Asia
184 | United Kingdom,219,126,195,10.4,Europe
185 | Tanzania,36,6,1,5.7,Africa
186 | USA,249,158,84,8.7,North America
187 | Uruguay,115,35,220,6.6,South America
188 | Uzbekistan,25,101,8,2.4,Asia
189 | Vanuatu,21,18,11,0.9,Oceania
190 | Venezuela,333,100,3,7.7,South America
191 | Vietnam,111,2,1,2.0,Asia
192 | Yemen,6,0,0,0.1,Asia
193 | Zambia,32,19,4,2.5,Africa
194 | Zimbabwe,64,18,4,4.7,Africa
195 |
--------------------------------------------------------------------------------
/part4/ch8/p8_type1.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"mount_file_id":"1iHXiVOq_xtN9hptWv34tb6d6jd_TcLNT","authorship_tag":"ABX9TyN1U0z3LW6NfwEWG86wbE4e"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch8/p8_type1.ipynb)"],"metadata":{"id":"i49zMTluEvHo"}},{"cell_type":"markdown","source":["## 작업형1"],"metadata":{"id":"j9jYUsDdNwKF"}},{"cell_type":"markdown","source":["### 문제1"],"metadata":{"id":"YAyh1xITWurF"}},{"cell_type":"code","source":["import pandas as pd\n","# df = pd.read_csv(\"drinks.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch8/drinks.csv\")\n","\n","# 1) 대륙별 맥주 소비량의 평균\n","continent = df.groupby(\"continent\")['beer_servings'].mean() # Europe\n","top_continent = continent.idxmax()\n","\n","# 2) 국가별 맥주 소비량\n","cond = df['continent'] == top_continent\n","df = df[cond]\n","df = df.sort_values('beer_servings', ascending=False)\n","df.iloc[4, 1] # 또는 df.iloc[4]['beer_servings']"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cKnyz8hjYNp0","executionInfo":{"status":"ok","timestamp":1722875838970,"user_tz":-540,"elapsed":1697,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"f9445be8-e018-4656-9c7a-8c1573420b7c"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["313"]},"metadata":{},"execution_count":1}]},{"cell_type":"markdown","source":["### 문제2"],"metadata":{"id":"HPyFlnMZWwFs"}},{"cell_type":"code","source":["# 방법1\n","import pandas as pd\n","# df = pd.read_csv(\"tourist.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch8/tourist.csv\")\n","\n","# 1) 방문객 합계 및 관관객 비율 계산\n","df['방문객합계'] = df['관광'] + df['공무'] + df['사업'] + df['기타']\n","df['관광객비율'] = df['관광'] / df['방문객합계']\n","\n","# 2) 조건에 맞는 값 찾기\n","a = df.sort_values('관광객비율', ascending=False).iloc[1, 3] # 또는 iloc[1]['사업']\n","b = df.sort_values('관광', ascending=False).iloc[1, 2] # 또는 iloc[1]['공무']\n","\n","print(a+b)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"EnVLX3BqKyIZ","executionInfo":{"status":"ok","timestamp":1720249582333,"user_tz":-540,"elapsed":372,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"17395b35-cf31-4980-bfc0-53a9d02a04a7"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["441\n"]}]},{"cell_type":"code","source":["# 방법2\n","import pandas as pd\n","# df = pd.read_csv(\"tourist.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch8/tourist.csv\")\n","\n","# 1) 방문객 합계 및 관관객 비율 계산\n","df['방문객합계'] = df['관광'] + df['공무'] + df['사업'] + df['기타']\n","df['관광객비율'] = df['관광'] / df['방문객합계']\n","\n","# 2) 조건에 맞는 값 찾기\n","a = df.nlargest(2, '관광객비율').iloc[1]['사업']\n","b = df.nlargest(2, '관광').iloc[1]['공무']\n","\n","print(a+b)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"8Y0L2dJXYllX","executionInfo":{"status":"ok","timestamp":1720249590584,"user_tz":-540,"elapsed":429,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"440cc59b-59de-44a1-ad0c-37aae19814ee"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["441\n"]}]},{"cell_type":"markdown","source":["### 문제3"],"metadata":{"id":"GE7Ggj0KZMOx"}},{"cell_type":"code","source":["# 방법1\n","import pandas as pd\n","from sklearn.preprocessing import MinMaxScaler\n","\n","# df = pd.read_csv(\"chem.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch8/chem.csv\")\n","\n","# 1) Min-Max 스케일링 (데이터프레임으로 만들기)\n","scaler = MinMaxScaler()\n","df['co_scaled'] = scaler.fit_transform(df[['co']])\n","df['nmhc_scaled'] = scaler.fit_transform(df[['nmhc']])\n","\n","# 2) 표준편차 계산 (판다스 표준편차)\n","co_std = df['co_scaled'].std()\n","nmhc_std = df['nmhc_scaled'].std()\n","print(co_std, nmhc_std)\n","\n","# 3) 표준편차 차이 계산 및 반올림\n","std_diff = round(co_std - nmhc_std, 3)\n","print(std_diff)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"YazPGRcxgrxp","executionInfo":{"status":"ok","timestamp":1720249909533,"user_tz":-540,"elapsed":3,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"496dab78-83ab-4ce6-b393-00af8aa8bb57"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["0.2856516497116944 0.3030617020578397\n","-0.017\n"]}]},{"cell_type":"code","source":["# 방법2\n","import pandas as pd\n","from sklearn.preprocessing import MinMaxScaler\n","\n","# df = pd.read_csv(\"chem.csv\")\n","df = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch8/chem.csv\")\n","\n","# 1) Min-Max 스케일링 (transform의 결과는 넘파이)\n","scaler = MinMaxScaler()\n","co_scaled = scaler.fit_transform(df[['co']])\n","nmhc_scaled = scaler.fit_transform(df[['nmhc']])\n","\n","# 2) 표준편차 계산 (넘파이 표준편차)\n","co_std = co_scaled.std()\n","nmhc_std = nmhc_scaled.std()\n","print(co_std, nmhc_std)\n","\n","# 3) 표준편차 차이 계산 및 반올림\n","std_diff = round(co_std - nmhc_std, 3)\n","print(std_diff)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ctWqLY_1aKn7","executionInfo":{"status":"ok","timestamp":1720249982457,"user_tz":-540,"elapsed":437,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"4091cdb2-30b9-4272-bf06-c80b0a3de6f3"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["0.2842198028519168 0.3015425862157\n","-0.017\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"GqQuCqEBDz1g"},"execution_count":null,"outputs":[]}]}
--------------------------------------------------------------------------------
/part4/ch8/p8_type2.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyP+iM3+CLs5xtO4gKriPWUA"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["[구글 코랩(Colab)에서 실행하기](https://colab.research.google.com/github/lovedlim/bigdata_analyst_cert/blob/main/part4/ch8/p8_type2.ipynb)"],"metadata":{"id":"QYnF_RFLE3X5"}},{"cell_type":"code","source":["# 문제정의\n","# 평가: MAE\n","# target: TotalCharges\n","# 최종파일: result.csv(컬럼 1개 pred)\n","\n","# 라이브러리 및 데이터 불러오기\n","import pandas as pd\n","# train = pd.read_csv(\"churn_train.csv\")\n","# test = pd.read_csv(\"churn_test.csv\")\n","train = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch8/churn_train.csv\")\n","test = pd.read_csv(\"https://raw.githubusercontent.com/lovedlim/bigdata_analyst_cert/main/part4/ch8/churn_test.csv\")\n","\n","# 탐색적 데이터 분석(EDA)\n","print(\"===== 데이터 크기 =====\")\n","print(\"Train Shape:\", train.shape)\n","print(\"Test Shape:\", test.shape)\n","\n","print(\"\\n ===== 데이터 정보(자료형) =====\")\n","print(train.info())\n","\n","print(\"\\n ===== train 결측치 수 =====\")\n","print(train.isnull().sum().sum())\n","\n","print(\"\\n ===== test 결측치 수 =====\")\n","print(test.isnull().sum().sum())\n","\n","print(\"\\n ===== customerID unique 수 =====\")\n","print(train['customerID'].nunique())\n","\n","print(\"\\n ===== target 기술 통계 =====\")\n","print(train['TotalCharges'].describe())"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"U_YZY2gvjyvc","executionInfo":{"status":"ok","timestamp":1722876159202,"user_tz":-540,"elapsed":1254,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"cb66c07b-267e-450d-e0cb-a1a29e0eda4d"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["===== 데이터 크기 =====\n","Train Shape: (4116, 19)\n","Test Shape: (1764, 18)\n","\n"," ===== 데이터 정보(자료형) =====\n","\n","RangeIndex: 4116 entries, 0 to 4115\n","Data columns (total 19 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 customerID 4116 non-null object \n"," 1 gender 4116 non-null object \n"," 2 SeniorCitizen 4116 non-null int64 \n"," 3 Partner 4116 non-null object \n"," 4 Dependents 4116 non-null object \n"," 5 tenure 4116 non-null int64 \n"," 6 PhoneService 4116 non-null object \n"," 7 MultipleLines 4116 non-null object \n"," 8 InternetService 4116 non-null object \n"," 9 OnlineSecurity 4116 non-null object \n"," 10 OnlineBackup 4116 non-null object \n"," 11 DeviceProtection 4116 non-null object \n"," 12 TechSupport 4116 non-null object \n"," 13 StreamingTV 4116 non-null object \n"," 14 StreamingMovies 4116 non-null object \n"," 15 Contract 4116 non-null object \n"," 16 PaperlessBilling 4116 non-null object \n"," 17 PaymentMethod 4116 non-null object \n"," 18 TotalCharges 4116 non-null float64\n","dtypes: float64(1), int64(2), object(16)\n","memory usage: 611.1+ KB\n","None\n","\n"," ===== train 결측치 수 =====\n","0\n","\n"," ===== test 결측치 수 =====\n","0\n","\n"," ===== customerID unique 수 =====\n","4116\n","\n"," ===== target 기술 통계 =====\n","count 4116.000000\n","mean 2566.580782\n","std 1911.356766\n","min 20.030000\n","25% 1020.922500\n","50% 2144.285000\n","75% 3765.670000\n","max 8589.600000\n","Name: TotalCharges, dtype: float64\n"]}]},{"cell_type":"code","source":["# 데이터 전처리\n","train = train.drop('customerID', axis=1)\n","test = test.drop(['customerID'], axis=1)\n","target = train.pop('TotalCharges')\n","\n","# 레이블 인코딩\n","from sklearn.preprocessing import LabelEncoder\n","cols = train.select_dtypes(include='O').columns\n","\n","for col in cols:\n"," le = LabelEncoder()\n"," train[col] = le.fit_transform(train[col])\n"," test[col] = le.transform(test[col])\n","\n","# 검증데이터 분리\n","from sklearn.model_selection import train_test_split\n","X_tr, X_val, y_tr, y_val = train_test_split(train, target, test_size=0.2, random_state=0)\n","\n","# 랜덤포레스트\n","from sklearn.ensemble import RandomForestRegressor\n","rf = RandomForestRegressor(random_state=0)\n","rf.fit(X_tr, y_tr)\n","pred = rf.predict(X_val)\n","\n","# MAE\n","from sklearn.metrics import mean_absolute_error\n","print(mean_absolute_error(y_val, pred))\n","\n","# LightGBM\n","import lightgbm as lgb\n","lg = lgb.LGBMRegressor(random_state=0, verbose=-1)\n","lg.fit(X_tr, y_tr)\n","pred = lg.predict(X_val)\n","print(mean_absolute_error(y_val, pred))\n","\n","# 최종 제출 파일\n","pred = rf.predict(test)\n","result = pd.DataFrame({\n"," 'pred':pred\n","})\n","result.to_csv(\"result.csv\", index=False)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-VHVlEn7eZ6j","executionInfo":{"status":"ok","timestamp":1722876170198,"user_tz":-540,"elapsed":9640,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"003fa2d7-b489-4a10-9556-90e68a018446"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["951.0960435538027\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/dask/dataframe/__init__.py:42: FutureWarning: \n","Dask dataframe query planning is disabled because dask-expr is not installed.\n","\n","You can install it with `pip install dask[dataframe]` or `conda install dask`.\n","This will raise in a future version.\n","\n"," warnings.warn(msg, FutureWarning)\n"]},{"output_type":"stream","name":"stdout","text":["952.7925407798712\n"]}]},{"cell_type":"code","source":["# 1. pred 행의 수\n","print(pred.shape)\n","\n","# 2. 생성한 csv 확인\n","print(pd.read_csv(\"result.csv\").head())"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"gR0wVy3tkIRU","executionInfo":{"status":"ok","timestamp":1722876170199,"user_tz":-540,"elapsed":6,"user":{"displayName":"Tae Heon Kim","userId":"07653788752262629837"}},"outputId":"65d774a0-7227-4f22-eca9-d1f7f0a6bb77"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["(1764,)\n"," pred\n","0 3707.6212\n","1 923.7132\n","2 4057.4078\n","3 952.6143\n","4 1322.1638\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"-JyPFkwlFHzv"},"execution_count":null,"outputs":[]}]}
--------------------------------------------------------------------------------
/part4/ch8/piq.csv:
--------------------------------------------------------------------------------
1 | PIQ,Brain,Height,Weight
2 | 132,85.78,62.5,127
3 | 96,86.54,68.0,135
4 | 84,90.49,66.3,134
5 | 134,79.06,62.0,122
6 | 86,88.91,70.0,180
7 | 102,83.18,63.0,114
8 | 128,107.95,70.0,151
9 | 128,107.95,70.0,151
10 | 131,93.55,72.0,171
11 | 94,89.4,64.5,139
12 | 131,93.55,72.0,171
13 | 128,96.54,68.8,172
14 | 128,95.5,68.0,132
15 | 150,103.84,73.3,143
16 | 131,93.55,72.0,171
17 | 137,94.96,67.0,191
18 | 89,93.59,75.5,179
19 | 150,103.84,73.3,143
20 | 134,79.06,62.0,122
21 | 81,83.43,66.5,143
22 | 124,92.41,69.0,155
23 | 128,95.5,68.0,132
24 | 84,79.86,68.0,140
25 | 72,79.35,63.0,106
26 | 124,86.67,66.5,159
27 | 120,85.22,68.5,127
28 | 96,86.54,68.0,135
29 | 128,96.54,68.8,172
30 | 74,93.0,74.0,148
31 | 98,85.43,66.0,175
32 | 134,79.06,62.0,122
33 | 147,95.55,68.8,172
34 | 84,80.8,66.3,136
35 | 134,95.15,65.0,147
36 | 84,79.86,68.0,140
37 | 90,87.89,66.0,146
38 | 147,95.55,68.8,172
39 | 110,106.25,77.0,187
40 | 150,103.84,73.3,143
41 | 84,90.59,76.5,186
42 | 124,86.67,66.5,159
43 | 98,85.43,66.0,175
44 | 84,90.49,66.3,134
45 | 124,94.94,70.5,144
46 | 90,87.89,66.0,146
47 | 102,94.51,73.5,178
48 | 94,89.4,64.5,139
49 | 134,95.15,65.0,147
50 | 150,103.84,73.3,143
51 | 131,99.13,64.5,138
52 |
--------------------------------------------------------------------------------
/part4/ch8/tourist.csv:
--------------------------------------------------------------------------------
1 | 나라,관광,공무,사업,기타
2 | 국가1,1184,270,380,55
3 | 국가2,1059,184,267,86
4 | 국가3,1129,168,261,50
5 | 국가4,692,106,214,125
6 | 국가5,1335,296,296,84
7 | 국가6,1263,147,204,119
8 | 국가7,1207,227,395,103
9 | 국가8,859,231,339,130
10 | 국가9,509,200,286,112
11 | 국가10,1223,280,321,58
12 | 국가11,777,178,309,111
13 | 국가12,1254,243,275,51
14 | 국가13,1304,248,384,131
15 | 국가14,1099,286,216,85
16 | 국가15,570,123,352,141
17 | 국가16,972,241,357,90
18 | 국가17,1100,217,349,86
19 | 국가18,896,185,310,98
20 | 국가19,814,148,225,75
21 | 국가20,1205,149,388,117
22 | 국가21,986,169,321,85
23 | 국가22,1051,269,318,80
24 | 국가23,587,263,317,79
25 | 국가24,674,292,389,83
26 | 국가25,1100,195,283,68
27 | 국가26,1349,297,361,67
28 | 국가27,1177,194,304,143
29 | 국가28,1037,100,360,134
30 | 국가29,1345,213,321,52
31 | 국가30,572,278,270,119
32 | 국가31,1277,136,231,62
33 | 국가32,1416,262,213,94
34 | 국가33,615,148,271,116
35 | 국가34,1476,193,384,141
36 | 국가35,1255,231,352,135
37 | 국가36,1209,198,279,89
38 | 국가37,1347,142,241,89
39 | 국가38,931,212,218,125
40 | 국가39,948,249,240,72
41 | 국가40,1350,227,382,80
42 | 국가41,599,100,211,67
43 | 국가42,1484,238,366,120
44 | 국가43,677,214,311,121
45 | 국가44,1255,143,293,68
46 | 국가45,1297,286,329,142
47 | 국가46,1159,227,318,93
48 | 국가47,647,123,244,133
49 | 국가48,1410,287,325,99
50 | 국가49,923,230,224,91
51 | 국가50,788,221,267,143
52 | 국가51,1461,198,203,96
53 | 국가52,765,162,235,71
54 | 국가53,1197,263,389,123
55 | 국가54,1139,223,397,139
56 | 국가55,1044,295,243,146
57 | 국가56,1043,182,232,141
58 | 국가57,1214,274,211,123
59 | 국가58,744,248,304,78
60 | 국가59,651,150,338,131
61 | 국가60,1175,255,382,108
62 | 국가61,1010,114,365,50
63 | 국가62,959,141,325,136
64 | 국가63,1382,158,356,113
65 | 국가64,683,293,311,66
66 | 국가65,528,136,202,86
67 | 국가66,1302,110,227,144
68 | 국가67,628,186,351,74
69 | 국가68,628,143,253,113
70 | 국가69,1432,204,251,117
71 | 국가70,553,111,374,101
72 | 국가71,1401,102,348,58
73 | 국가72,1050,151,381,106
74 | 국가73,988,180,229,141
75 | 국가74,1256,132,267,143
76 | 국가75,773,282,235,137
77 | 국가76,835,228,239,82
78 | 국가77,888,138,337,69
79 | 국가78,1117,119,273,122
80 | 국가79,542,274,241,121
81 | 국가80,942,142,351,137
82 | 국가81,1043,215,331,63
83 | 국가82,1388,284,246,108
84 | 국가83,757,288,378,131
85 | 국가84,821,177,308,105
86 | 국가85,1499,130,203,114
87 | 국가86,1437,124,231,125
88 | 국가87,557,225,209,142
89 | 국가88,791,102,338,86
90 | 국가89,1370,103,227,75
91 | 국가90,619,194,373,82
92 | 국가91,1279,207,399,92
93 | 국가92,930,113,367,64
94 | 국가93,582,212,261,136
95 | 국가94,591,140,285,78
96 | 국가95,1396,172,297,70
97 | 국가96,898,119,244,132
98 | 국가97,1111,195,234,118
99 | 국가98,1065,172,362,72
100 | 국가99,1408,254,288,149
101 | 국가100,1133,294,233,133
102 |
--------------------------------------------------------------------------------