├── README.md
├── .gitignore
├── template_WI24.ipynb
├── ProjectProposal_Group110_WI24.ipynb
├── template3.ipynb
├── template2.ipynb
└── DataCheckpoint_Group110_WI24.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | This is your group repo for your final project for COGS108.
2 |
3 | This repository is private, and is only visible to the course instructors and your group mates; it is not visible to anyone else.
4 |
5 | Template notebooks for each component are provided. Only work on the notebook prior to its due date. After each submission is due, move onto the next notebook (For example, after the proposal is due, start working in the Data Checkpoint notebook).
6 |
7 | This repository will be frozen on the final project due date. No further changes can be made after that time.
8 |
9 | Your project proposal and final project will be graded based solely on the corresponding project notebooks in this repository.
10 |
11 | Template Jupyter notebooks have been included, with your group number replacing the XXX in the following file names. For each due date, make sure you have a notebook present in this repository by each due date with the following name (where XXX is replaced by your group number):
12 |
13 | - `ProjectProposal_groupXXX.ipynb`
14 | - `DataCheckpoint_groupXXX.ipynb`
15 | - `EDACheckpoint_groupXXX.ipynb`
16 | - `FinalProject_groupXXX.ipynb`
17 |
18 | This is *your* repo. You are free to manage the repo as you see fit, edit this README, add data files, add scripts, etc. So long as there are the four files above on due dates with the required information, the rest is up to you all.
19 |
20 | Also, you are free and encouraged to share this project after the course and to add it to your portfolio. Just be sure to fork it to your GitHub at the end of the quarter!
21 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
162 |
--------------------------------------------------------------------------------
/template_WI24.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# COGS 108 - Project Proposal"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# Names\n",
15 | "\n",
16 | "- Ant Man\n",
17 | "- Hulk\n",
18 | "- Iron Man\n",
19 | "- Thor\n",
20 | "- Wasp"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "# Research Question"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "- Include a specific, clear data science question.\n",
35 | "- Make sure what you're measuring (variables) to answer the question is clear\n",
36 | "\n",
37 | "What is your research question? Include the specific question you're setting out to answer. This question should be specific, answerable with data, and clear. A general question with specific subquestions is permitted. (1-2 sentences)\n",
38 | "\n"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "## Background and Prior Work"
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "metadata": {},
51 | "source": [
52 | "\n",
53 | "- Include a general introduction to your topic\n",
54 | "- Include explanation of what work has been done previously\n",
55 | "- Include citations or links to previous work\n",
56 | "\n",
57 | "This section will present the background and context of your topic and question in a few paragraphs. Include a general introduction to your topic and then describe what information you currently know about the topic after doing your initial research. Include references to other projects who have asked similar questions or approached similar problems. Explain what others have learned in their projects.\n",
58 | "\n",
59 | "Find some relevant prior work, and reference those sources, summarizing what each did and what they learned. Even if you think you have a totally novel question, find the most similar prior work that you can and discuss how it relates to your project.\n",
60 | "\n",
61 | "References can be research publications, but they need not be. Blogs, GitHub repositories, company websites, etc., are all viable references if they are relevant to your project. It must be clear which information comes from which references. (2-3 paragraphs, including at least 2 references)\n",
62 | "\n",
63 | " **Use inline citation through HTML footnotes to specify which references support which statements** \n",
64 | "\n",
65 | "For example: After government genocide in the 20th century, real birds were replaced with surveillance drones designed to look just like birds.[1](#cite_note-1) Use a minimum of 2 or 3 citations, but we prefer more.[2](#cite_note-2) You need enough to fully explain and back up important facts. \n",
66 | "\n",
67 | "Note that if you click a footnote number in the paragraph above it will transport you to the proper entry in the footnotes list below. And if you click the ^ in the footnote entry, it will return you to the place in the main text where the footnote is made.\n",
68 | "\n",
69 | "To understand the HTML here, ` ` is a tag that allows you produce a named reference for a given location. Markdown has the construciton `[text with hyperlink](#named reference)` that will produce a clickable link that transports you the named reference.\n",
70 | "\n",
71 | "1. [^](#cite_ref-1) Lorenz, T. (9 Dec 2021) Birds Aren’t Real, or Are They? Inside a Gen Z Conspiracy Theory. *The New York Times*. https://www.nytimes.com/2021/12/09/technology/birds-arent-real-gen-z-misinformation.html \n",
72 | "2. [^](#cite_ref-2) Also refs should be important to the background, not some randomly chosen vaguely related stuff. Include a web link if possible in refs as above.\n"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "# Hypothesis\n"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "\n",
87 | "- Include your team's hypothesis\n",
88 | "- Ensure that this hypothesis is clear to readers\n",
89 | "- Explain why you think this will be the outcome (what was your thinking?)\n",
90 | "\n",
91 | "What is your main hypothesis/predictions about what the answer to your question is? Briefly explain your thinking. (2-3 sentences)"
92 | ]
93 | },
94 | {
95 | "cell_type": "markdown",
96 | "metadata": {},
97 | "source": [
98 | "# Data"
99 | ]
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "1. Explain what the **ideal** dataset you would want to answer this question. (This should include: What variables? How many observations? Who/what/how would these data be collected? How would these data be stored/organized?)\n",
106 | "1. Search for potential **real** datasets that could provide you with something useful for this project. You do not have to find every piece of data you will use, but you do need to have demonstrated some idea that (a) this data is gettable and (b) that this data may be different from what your ideal is."
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "metadata": {},
112 | "source": [
113 | "# Ethics & Privacy"
114 | ]
115 | },
116 | {
117 | "cell_type": "markdown",
118 | "metadata": {},
119 | "source": [
120 | "- Thoughtful discussion of ethical concerns included\n",
121 | "- Ethical concerns consider the whole data science process (question asked, data collected, data being used, the bias in data, analysis, post-analysis, etc.)\n",
122 | "- How your group handled bias/ethical concerns clearly described\n",
123 | "\n",
124 | "Acknowledge and address any ethics & privacy related issues of your question(s), proposed dataset(s), and/or analyses. Use the information provided in lecture to guide your group discussion and thinking. If you need further guidance, check out [Deon's Ethics Checklist](http://deon.drivendata.org/#data-science-ethics-checklist). In particular:\n",
125 | "\n",
126 | "- Are there any biases/privacy/terms of use issues with the data you propsed?\n",
127 | "- Are there potential biases in your dataset(s), in terms of who it composes, and how it was collected, that may be problematic in terms of it allowing for equitable analysis? (For example, does your data exclude particular populations, or is it likely to reflect particular human biases in a way that could be a problem?)\n",
128 | "- How will you set out to detect these specific biases before, during, and after/when communicating your analysis?\n",
129 | "- Are there any other issues related to your topic area, data, and/or analyses that are potentially problematic in terms of data privacy and equitable impact?\n",
130 | "- How will you handle issues you identified?"
131 | ]
132 | },
133 | {
134 | "cell_type": "markdown",
135 | "metadata": {},
136 | "source": [
137 | "# Team Expectations "
138 | ]
139 | },
140 | {
141 | "cell_type": "markdown",
142 | "metadata": {},
143 | "source": [
144 | "\n",
145 | "Read over the [COGS108 Team Policies](https://github.com/COGS108/Projects/blob/master/COGS108_TeamPolicies.md) individually. Then, include your group’s expectations of one another for successful completion of your COGS108 project below. Discuss and agree on what all of your expectations are. Discuss how your team will communicate throughout the quarter and consider how you will communicate respectfully should conflicts arise. By including each member’s name above and by adding their name to the submission, you are indicating that you have read the COGS108 Team Policies, accept your team’s expectations below, and have every intention to fulfill them. These expectations are for your team’s use and benefit — they won’t be graded for their details.\n",
146 | "\n",
147 | "* *Team Expectation 1*\n",
148 | "* *Team Expectation 2*\n",
149 | "* *Team Expecation 3*\n",
150 | "* ..."
151 | ]
152 | },
153 | {
154 | "cell_type": "markdown",
155 | "metadata": {},
156 | "source": [
157 | "# Project Timeline Proposal"
158 | ]
159 | },
160 | {
161 | "cell_type": "markdown",
162 | "metadata": {},
163 | "source": [
164 | "Specify your team's specific project timeline. An example timeline has been provided. Changes the dates, times, names, and details to fit your group's plan.\n",
165 | "\n",
166 | "If you think you will need any special resources or training outside what we have covered in COGS 108 to solve your problem, then your proposal should state these clearly. For example, if you have selected a problem that involves implementing multiple neural networks, please state this so we can make sure you know what you’re doing and so we can point you to resources you will need to implement your project. Note that you are not required to use outside methods.\n",
167 | "\n",
168 | "\n",
169 | "\n",
170 | "| Meeting Date | Meeting Time| Completed Before Meeting | Discuss at Meeting |\n",
171 | "|---|---|---|---|\n",
172 | "| 1/20 | 1 PM | Read & Think about COGS 108 expectations; brainstorm topics/questions | Determine best form of communication; Discuss and decide on final project topic; discuss hypothesis; begin background research | \n",
173 | "| 1/26 | 10 AM | Do background research on topic | Discuss ideal dataset(s) and ethics; draft project proposal | \n",
174 | "| 2/1 | 10 AM | Edit, finalize, and submit proposal; Search for datasets | Discuss Wrangling and possible analytical approaches; Assign group members to lead each specific part |\n",
175 | "| 2/14 | 6 PM | Import & Wrangle Data (Ant Man); EDA (Hulk) | Review/Edit wrangling/EDA; Discuss Analysis Plan |\n",
176 | "| 2/23 | 12 PM | Finalize wrangling/EDA; Begin Analysis (Iron Man; Thor) | Discuss/edit Analysis; Complete project check-in |\n",
177 | "| 3/13 | 12 PM | Complete analysis; Draft results/conclusion/discussion (Wasp)| Discuss/edit full project |\n",
178 | "| 3/20 | Before 11:59 PM | NA | Turn in Final Project & Group Project Surveys |"
179 | ]
180 | }
181 | ],
182 | "metadata": {
183 | "kernelspec": {
184 | "display_name": "Python 3 (ipykernel)",
185 | "language": "python",
186 | "name": "python3"
187 | },
188 | "language_info": {
189 | "codemirror_mode": {
190 | "name": "ipython",
191 | "version": 3
192 | },
193 | "file_extension": ".py",
194 | "mimetype": "text/x-python",
195 | "name": "python",
196 | "nbconvert_exporter": "python",
197 | "pygments_lexer": "ipython3",
198 | "version": "3.9.7"
199 | }
200 | },
201 | "nbformat": 4,
202 | "nbformat_minor": 2
203 | }
204 |
--------------------------------------------------------------------------------
/ProjectProposal_Group110_WI24.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# COGS 108 - Project Proposal:"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# Names\n",
15 | "\n",
16 | "Sophia Ashraf
\n",
17 | "Dylan Oquendo
\n",
18 | "Karun Mokha
\n",
19 | "Jake Kondo
\n",
20 | "Ekrem Ersoz"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "# Research Question"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "**How did the interplay between mental health and specific Covid-19-related events influence pregnancy outcomes, and what novel patterns emerge when comparing pre-pandemic, pandemic, and post-vaccine introduction phases?**\n",
35 | "\n",
36 | "**Sub Questions:**
\n",
37 | "\n",
38 | "**Normal time data (overall population):**
\n",
39 | "Analyze pregnancy outcomes data from periods before the COVID-19 pandemic as a baseline. This will help establish what \"normal\" outcomes look like, against which pandemic-era outcomes can be compared.
\n",
40 | "\n",
41 | "**Was it related to mental health:**
\n",
42 | "Investigate whether changes in pregnancy outcomes during the COVID-19 pandemic correlate with reported changes in mental health statistics. This involves collecting data on mental health issues among pregnant individuals during the pandemic and comparing these with pregnancy outcomes.
\n",
43 | "\n",
44 | "**How did these relate to large events (e.g., vaccines)? Did that change anything…:**
\n",
45 | "Examine the timeline of major Covid-19-related events, such as lockdowns, infection waves, and the introduction of vaccines, and analyze their impact on mental health and pregnancy outcomes. This could involve comparing pregnancy outcomes and mental health data before and after such events to identify any significant changes or trends."
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "metadata": {},
51 | "source": [
52 | "## Background and Prior Work"
53 | ]
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "metadata": {},
58 | "source": [
59 | "The impact that COVID-19 had on mental health around the globe has been acknowledged and studied by scientific researchers to examine its effects, particularly on specific “at risk” populations such as pregnant and postpartum women. In fact the World Health Organization (WHO) estimates anxiety and depression prevalence increased 25% globally during the height of the pandemic. A systematic review and meta-analysis conducted by Gayathri Delanerolle, department of Health Care Science at Oxford, titled “The prevalence of mental ill-health in women during pregnancy and after childbirth during the COVID-19 pandemic” found a significant increase in negative mental health outcomes specifically in women who were pregnant and postpartum . They quantified depression, anxiety, and stress, and suggested that the increase of these conditions highlight a need for mental health resources amongst maternal healthcare services, especially during a pandemic as impactful as COVID-19. This research provides a strong empirical context for our group illustrating the vulnerability of pregnant women to negative mental health outcomes during the pandemic. \n",
60 | "\n",
61 | "For our project, we also want to contextualize maternal stress during pregnancy at times not during a pandemic so we have a measure to compare. An article titled, “Prenatal developmental origins of behavior and mental health: The influence of maternal stress in pregnancy” Van den Bergh et al’s did a review on current research to find that stress during pregnancy cna have long lasting behavioral and mental health affects later in life. This includes affecting fetal development in a variety of ways including hormonal changes and affecting brain development. Overall the article highlights how critical the pregnancy period is for mothers, and how during extra stressful times there could be profound long lasting effects on child development. \n",
62 | "\n",
63 | "These two studies serve as a foundation for our project as context for stress, and mental health in pregnant and postpartum women with a meta-analysis on mental health during the pandemic, and a systematic review of all current research on maternal stress and child development. Delanerolle et al. 's article found increase in mental health issues amongst pregnant and postpartum women during the pandemic, while Van den Bergh et al’s comprehensive review gave us insight into the origins of prenatal mental health by highlighting cognitive and neurodevelopment dysfunction. . These findings highlight the importance of our research question. Our goal is to examine the relationship between mental health during COVID-19 and pregnancy outcomes, our project aims to contribute to the prior research and understandings of these interactions and serve to suppose maternal well-being during pandemic eras currently and in the future. \n",
64 | "\n",
65 | "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9883834/\n",
66 | "\n",
67 | "https://pubmed.ncbi.nlm.nih.gov/28757456/\n"
68 | ]
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {},
73 | "source": [
74 | "# Hypothesis\n"
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {},
80 | "source": [
81 | "Increased stress and mental health challenges faced by pregnant individuals during the Covid-19 pandemic negatively impacted pregnancy outcomes. We arrived at our prediction by considering the radical effect of the Covid-19 pandemic on an individual’s emotional and mental well being. We also paired this with the already existing stress that a woman faces throughout her pregnancy, and came up with our hypothesis. "
82 | ]
83 | },
84 | {
85 | "cell_type": "markdown",
86 | "metadata": {},
87 | "source": [
88 | "# Data"
89 | ]
90 | },
91 | {
92 | "cell_type": "markdown",
93 | "metadata": {},
94 | "source": [
95 | "**Variables:** Mental health indicators (e.g., levels of stress, anxiety, depression), pregnancy outcomes (e.g., gestational age at birth, birth weight, any complications), Covid-19 impact metrics (e.g., infection status, lockdown impact), major pandemic events timelines (e.g., start of lockdowns, vaccine rollouts).\n",
96 | "\n",
97 | "**Population:** Pregnant individuals during the COVID-19 pandemic, with a comparison group from before the pandemic as a baseline.\n",
98 | "\n",
99 | "**Time Period:** Data should span from before the COVID-19 pandemic (as a baseline) and continue through the pandemic, ideally with timestamps to align with major pandemic events for dynamic analysis.\n",
100 | "\n",
101 | "This dataset from the Pregnancy during the COVID-19 Pandemic (PdP) project includes variables such as maternal age, household income, maternal education levels, Edinburgh Postnatal Depression Scale (EPDS) scores, PROMIS Anxiety scores, gestational age at birth, delivery date, birth length and weight, delivery mode, NICU stay, survey language, and perceived threat levels to life and unborn baby due to COVID-19. These variables offer comprehensive insights into the socio-economic, psychological, and health-related aspects of pregnant individuals' experiences during the pandemic, allowing for a multifaceted analysis of the impact of COVID-19 on pregnancy outcomes.\n",
102 | "\n",
103 | "**https://www.kaggle.com/datasets/yeganehbavafa/mental-health-in-the-pregnancy-during-the-covid-19/data**"
104 | ]
105 | },
106 | {
107 | "cell_type": "markdown",
108 | "metadata": {},
109 | "source": [
110 | "# Ethics & Privacy"
111 | ]
112 | },
113 | {
114 | "cell_type": "markdown",
115 | "metadata": {},
116 | "source": [
117 | "The dataset from the Pregnancy during the COVID-19 Pandemic (PdP) project might exhibit biases such as geographic concentration (limited to Canada, possibly not reflecting experiences in other healthcare systems or cultures), socio-economic and educational disparities (respondents may skew towards certain income or education levels based on survey reach and accessibility), and potential language barriers (despite survey language options, nuances in understanding or expression may affect responses). For the variables which we used such as Edinburgh Postnatal Depression Scale and the PROMIS Anxiety score those are both established statistical measurements in research which have already gotten consent from participants to not only partake in the research but to give permission to publish the findings, so though we are looking at their numerical measurements we’ve ensured the ethics and privacy of those numbers. To address these, analyzing demographic data against wider population statistics and considering the socio-cultural context in interpretations will be important. The only issues about privacy that could come up would be mental health information of the individuals. "
118 | ]
119 | },
120 | {
121 | "cell_type": "markdown",
122 | "metadata": {},
123 | "source": [
124 | "# Team Expectations "
125 | ]
126 | },
127 | {
128 | "cell_type": "markdown",
129 | "metadata": {},
130 | "source": [
131 | "* It's agreed that there should be equal effort from all members \n",
132 | "* The team plans to meet once a week in person for discussions and updates on the project's progress. \n",
133 | "* The team will efficiently communicate regularly and reply to the group messages in a timely manner.\n",
134 | "* The team will respect one another's opinions and ideas and allow for a safe environment to share ideas.\n"
135 | ]
136 | },
137 | {
138 | "cell_type": "markdown",
139 | "metadata": {},
140 | "source": [
141 | "# Project Timeline Proposal"
142 | ]
143 | },
144 | {
145 | "cell_type": "markdown",
146 | "metadata": {},
147 | "source": [
148 | "**February 22nd** - Met in person to dicuss about Checkpoint #1 and Project Proposal
\n",
149 | "**February 23rd** - attend OH to get approval for new project idea
\n",
150 | "**February 25th** - Complete Checkpoint #1, Rewriting the project proposal, discord call as a group
\n",
151 | "**February 28th** - Had team meeting online to work on the project
\n",
152 | "**March 8th** - Held team meeting in-person and collobrated on Checkpoint #2
\n",
153 | "**March 10th** - Complete Checkpoint #2
\n",
154 | "**March 14th** - Have a team meeting in person to finalize the project, attend OH if needed
\n",
155 | "**March 15th** - Complete the Final Project Report, Have a meeting to review the report
\n",
156 | "**March 20th** - Submit all project work
"
157 | ]
158 | }
159 | ],
160 | "metadata": {
161 | "kernelspec": {
162 | "display_name": "Python 3 (ipykernel)",
163 | "language": "python",
164 | "name": "python3"
165 | },
166 | "language_info": {
167 | "codemirror_mode": {
168 | "name": "ipython",
169 | "version": 3
170 | },
171 | "file_extension": ".py",
172 | "mimetype": "text/x-python",
173 | "name": "python",
174 | "nbconvert_exporter": "python",
175 | "pygments_lexer": "ipython3",
176 | "version": "3.9.5"
177 | }
178 | },
179 | "nbformat": 4,
180 | "nbformat_minor": 2
181 | }
182 |
--------------------------------------------------------------------------------
/template3.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# COGS 108 - Final Project (change this to your project's title)\n",
8 | "\n",
9 | "# Permissions\n",
10 | "\n",
11 | "Place an `X` in the appropriate bracket below to specify if you would like your group's project to be made available to the public. (Note that student names will be included (but PIDs will be scraped from any groups who include their PIDs).\n",
12 | "\n",
13 | "* [ ] YES - make available\n",
14 | "* [ ] NO - keep private\n",
15 | "\n",
16 | "# Names\n",
17 | "\n",
18 | "- Ant Man\n",
19 | "- Hulk\n",
20 | "- Iron Man\n",
21 | "- Thor\n",
22 | "- Wasp\n",
23 | "\n",
24 | "# Abstract\n",
25 | "\n",
26 | "Please write one to four paragraphs that describe a very brief overview of why you did this, how you did, and the major findings and conclusions."
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "# Research Question"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {},
39 | "source": [
40 | "- Include a specific, clear data science question.\n",
41 | "- Make sure what you're measuring (variables) to answer the question is clear\n",
42 | "\n",
43 | "What is your research question? Include the specific question you're setting out to answer. This question should be specific, answerable with data, and clear. A general question with specific subquestions is permitted. (1-2 sentences)\n",
44 | "\n"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "metadata": {},
50 | "source": [
51 | "## Background and Prior Work"
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "metadata": {},
57 | "source": [
58 | "\n",
59 | "- Include a general introduction to your topic\n",
60 | "- Include explanation of what work has been done previously\n",
61 | "- Include citations or links to previous work\n",
62 | "\n",
63 | "This section will present the background and context of your topic and question in a few paragraphs. Include a general introduction to your topic and then describe what information you currently know about the topic after doing your initial research. Include references to other projects who have asked similar questions or approached similar problems. Explain what others have learned in their projects.\n",
64 | "\n",
65 | "Find some relevant prior work, and reference those sources, summarizing what each did and what they learned. Even if you think you have a totally novel question, find the most similar prior work that you can and discuss how it relates to your project.\n",
66 | "\n",
67 | "References can be research publications, but they need not be. Blogs, GitHub repositories, company websites, etc., are all viable references if they are relevant to your project. It must be clear which information comes from which references. (2-3 paragraphs, including at least 2 references)\n",
68 | "\n",
69 | " **Use inline citation through HTML footnotes to specify which references support which statements** \n",
70 | "\n",
71 | "For example: After government genocide in the 20th century, real birds were replaced with surveillance drones designed to look just like birds.[1](#cite_note-1) Use a minimum of 2 or 3 citations, but we prefer more.[2](#cite_note-2) You need enough to fully explain and back up important facts. \n",
72 | "\n",
73 | "Note that if you click a footnote number in the paragraph above it will transport you to the proper entry in the footnotes list below. And if you click the ^ in the footnote entry, it will return you to the place in the main text where the footnote is made.\n",
74 | "\n",
75 | "To understand the HTML here, ` ` is a tag that allows you produce a named reference for a given location. Markdown has the construciton `[text with hyperlink](#named reference)` that will produce a clickable link that transports you the named reference.\n",
76 | "\n",
77 | "1. [^](#cite_ref-1) Lorenz, T. (9 Dec 2021) Birds Aren’t Real, or Are They? Inside a Gen Z Conspiracy Theory. *The New York Times*. https://www.nytimes.com/2021/12/09/technology/birds-arent-real-gen-z-misinformation.html \n",
78 | "2. [^](#cite_ref-2) Also refs should be important to the background, not some randomly chosen vaguely related stuff. Include a web link if possible in refs as above.\n"
79 | ]
80 | },
81 | {
82 | "cell_type": "markdown",
83 | "metadata": {},
84 | "source": [
85 | "# Hypothesis\n"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "\n",
93 | "- Include your team's hypothesis\n",
94 | "- Ensure that this hypothesis is clear to readers\n",
95 | "- Explain why you think this will be the outcome (what was your thinking?)\n",
96 | "\n",
97 | "What is your main hypothesis/predictions about what the answer to your question is? Briefly explain your thinking. (2-3 sentences)"
98 | ]
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "metadata": {},
103 | "source": [
104 | "# Data"
105 | ]
106 | },
107 | {
108 | "cell_type": "markdown",
109 | "metadata": {},
110 | "source": [
111 | "## Data overview\n",
112 | "\n",
113 | "For each dataset include the following information\n",
114 | "- Dataset #1\n",
115 | " - Dataset Name:\n",
116 | " - Link to the dataset:\n",
117 | " - Number of observations:\n",
118 | " - Number of variables:\n",
119 | "- Dataset #2 (if you have more than one!)\n",
120 | " - Dataset Name:\n",
121 | " - Link to the dataset:\n",
122 | " - Number of observations:\n",
123 | " - Number of variables:\n",
124 | "- etc\n",
125 | "\n",
126 | "Now write 2 - 5 sentences describing each dataset here. Include a short description of the important variables in the dataset; what the metrics and datatypes are, what concepts they may be proxies for. Include information about how you would need to wrangle/clean/preprocess the dataset\n",
127 | "\n",
128 | "If you plan to use multiple datasets, add a few sentences about how you plan to combine these datasets."
129 | ]
130 | },
131 | {
132 | "cell_type": "markdown",
133 | "metadata": {},
134 | "source": [
135 | "## Dataset #1 (use name instead of number here)"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": null,
141 | "metadata": {},
142 | "outputs": [],
143 | "source": [
144 | "## YOUR CODE TO LOAD/CLEAN/TIDY/WRANGLE THE DATA GOES HERE\n",
145 | "## FEEL FREE TO ADD MULTIPLE CELLS PER SECTION "
146 | ]
147 | },
148 | {
149 | "cell_type": "markdown",
150 | "metadata": {},
151 | "source": [
152 | "## Dataset #2 (if you have more than one, use name instead of number here)"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": null,
158 | "metadata": {},
159 | "outputs": [],
160 | "source": [
161 | "## YOUR CODE TO LOAD/CLEAN/TIDY/WRANGLE THE DATA GOES HERE\n",
162 | "## FEEL FREE TO ADD MULTIPLE CELLS PER SECTION "
163 | ]
164 | },
165 | {
166 | "cell_type": "markdown",
167 | "metadata": {},
168 | "source": [
169 | "# Results\n",
170 | "\n",
171 | "## Exploratory Data Analysis\n",
172 | "\n",
173 | "Carry out whatever EDA you need to for your project. Because every project will be different we can't really give you much of a template at this point. But please make sure you describe the what and why in text here as well as providing interpretation of results and context."
174 | ]
175 | },
176 | {
177 | "cell_type": "markdown",
178 | "metadata": {},
179 | "source": [
180 | "## First Analysis You Did - Give it a better title\n",
181 | "\n",
182 | "Some more words and stuff. Remember notebooks work best if you interleave the code that generates a result with properly annotate figures and text that puts these results into context."
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": null,
188 | "metadata": {},
189 | "outputs": [],
190 | "source": [
191 | "## YOUR CODE HERE\n",
192 | "## FEEL FREE TO ADD MULTIPLE CELLS PER SECTION"
193 | ]
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "metadata": {},
198 | "source": [
199 | "## Second Analysis You Did - Give it a better title\n",
200 | "\n",
201 | "Some more words and stuff. Remember notebooks work best if you interleave the code that generates a result with properly annotate figures and text that puts these results into context."
202 | ]
203 | },
204 | {
205 | "cell_type": "code",
206 | "execution_count": null,
207 | "metadata": {},
208 | "outputs": [],
209 | "source": [
210 | "## YOUR CODE HERE\n",
211 | "## FEEL FREE TO ADD MULTIPLE CELLS PER SECTION"
212 | ]
213 | },
214 | {
215 | "cell_type": "markdown",
216 | "metadata": {},
217 | "source": [
218 | "## ETC AD NASEUM\n",
219 | "\n",
220 | "Some more words and stuff. Remember notebooks work best if you interleave the code that generates a result with properly annotate figures and text that puts these results into context."
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "execution_count": null,
226 | "metadata": {},
227 | "outputs": [],
228 | "source": [
229 | "## YOUR CODE HERE\n",
230 | "## FEEL FREE TO ADD MULTIPLE CELLS PER SECTION"
231 | ]
232 | },
233 | {
234 | "cell_type": "markdown",
235 | "metadata": {},
236 | "source": [
237 | "# Ethics & Privacy"
238 | ]
239 | },
240 | {
241 | "cell_type": "markdown",
242 | "metadata": {},
243 | "source": [
244 | "- Thoughtful discussion of ethical concerns included\n",
245 | "- Ethical concerns consider the whole data science process (question asked, data collected, data being used, the bias in data, analysis, post-analysis, etc.)\n",
246 | "- How your group handled bias/ethical concerns clearly described\n",
247 | "\n",
248 | "Acknowledge and address any ethics & privacy related issues of your question(s), proposed dataset(s), and/or analyses. Use the information provided in lecture to guide your group discussion and thinking. If you need further guidance, check out [Deon's Ethics Checklist](http://deon.drivendata.org/#data-science-ethics-checklist). In particular:\n",
249 | "\n",
250 | "- Are there any biases/privacy/terms of use issues with the data you propsed?\n",
251 | "- Are there potential biases in your dataset(s), in terms of who it composes, and how it was collected, that may be problematic in terms of it allowing for equitable analysis? (For example, does your data exclude particular populations, or is it likely to reflect particular human biases in a way that could be a problem?)\n",
252 | "- How will you set out to detect these specific biases before, during, and after/when communicating your analysis?\n",
253 | "- Are there any other issues related to your topic area, data, and/or analyses that are potentially problematic in terms of data privacy and equitable impact?\n",
254 | "- How will you handle issues you identified?"
255 | ]
256 | },
257 | {
258 | "cell_type": "markdown",
259 | "metadata": {},
260 | "source": [
261 | "# Discusison and Conclusion\n",
262 | "\n",
263 | "Wrap it all up here. Somewhere between 3 and 10 paragraphs roughly. A good time to refer back to your Background section and review how this work extended the previous stuff. \n",
264 | "\n",
265 | "\n",
266 | "# Team Contributions\n",
267 | "\n",
268 | "Speficy who did what. This should be pretty granular, perhaps bullet points, no more than a few sentences per person."
269 | ]
270 | }
271 | ],
272 | "metadata": {
273 | "kernelspec": {
274 | "display_name": "Python 3 (ipykernel)",
275 | "language": "python",
276 | "name": "python3"
277 | },
278 | "language_info": {
279 | "codemirror_mode": {
280 | "name": "ipython",
281 | "version": 3
282 | },
283 | "file_extension": ".py",
284 | "mimetype": "text/x-python",
285 | "name": "python",
286 | "nbconvert_exporter": "python",
287 | "pygments_lexer": "ipython3",
288 | "version": "3.9.7"
289 | }
290 | },
291 | "nbformat": 4,
292 | "nbformat_minor": 2
293 | }
294 |
--------------------------------------------------------------------------------
/template2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "**If you lost points on the last checkpoint you can get them back by responding to TA/IA feedback** \n",
8 | "\n",
9 | "Update/change the relevant sections where you lost those points, make sure you respond on GitHub Issues to your TA/IA to call their attention to the changes you made here.\n",
10 | "\n",
11 | "Please update your Timeline... no battle plan survives contact with the enemy, so make sure we understand how your plans have changed."
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "# COGS 108 - EDA Checkpoint"
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "# Names\n",
26 | "\n",
27 | "- Ant Man\n",
28 | "- Hulk\n",
29 | "- Iron Man\n",
30 | "- Thor\n",
31 | "- Wasp"
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {},
37 | "source": [
38 | "# Research Question"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "- Include a specific, clear data science question.\n",
46 | "- Make sure what you're measuring (variables) to answer the question is clear\n",
47 | "\n",
48 | "What is your research question? Include the specific question you're setting out to answer. This question should be specific, answerable with data, and clear. A general question with specific subquestions is permitted. (1-2 sentences)\n",
49 | "\n"
50 | ]
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "metadata": {},
55 | "source": [
56 | "## Background and Prior Work"
57 | ]
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {},
62 | "source": [
63 | "\n",
64 | "- Include a general introduction to your topic\n",
65 | "- Include explanation of what work has been done previously\n",
66 | "- Include citations or links to previous work\n",
67 | "\n",
68 | "This section will present the background and context of your topic and question in a few paragraphs. Include a general introduction to your topic and then describe what information you currently know about the topic after doing your initial research. Include references to other projects who have asked similar questions or approached similar problems. Explain what others have learned in their projects.\n",
69 | "\n",
70 | "Find some relevant prior work, and reference those sources, summarizing what each did and what they learned. Even if you think you have a totally novel question, find the most similar prior work that you can and discuss how it relates to your project.\n",
71 | "\n",
72 | "References can be research publications, but they need not be. Blogs, GitHub repositories, company websites, etc., are all viable references if they are relevant to your project. It must be clear which information comes from which references. (2-3 paragraphs, including at least 2 references)\n",
73 | "\n",
74 | " **Use inline citation through HTML footnotes to specify which references support which statements** \n",
75 | "\n",
76 | "For example: After government genocide in the 20th century, real birds were replaced with surveillance drones designed to look just like birds.[1](#cite_note-1) Use a minimum of 2 or 3 citations, but we prefer more.[2](#cite_note-2) You need enough to fully explain and back up important facts. \n",
77 | "\n",
78 | "Note that if you click a footnote number in the paragraph above it will transport you to the proper entry in the footnotes list below. And if you click the ^ in the footnote entry, it will return you to the place in the main text where the footnote is made.\n",
79 | "\n",
80 | "To understand the HTML here, ` ` is a tag that allows you produce a named reference for a given location. Markdown has the construciton `[text with hyperlink](#named reference)` that will produce a clickable link that transports you the named reference.\n",
81 | "\n",
82 | "1. [^](#cite_ref-1) Lorenz, T. (9 Dec 2021) Birds Aren’t Real, or Are They? Inside a Gen Z Conspiracy Theory. *The New York Times*. https://www.nytimes.com/2021/12/09/technology/birds-arent-real-gen-z-misinformation.html \n",
83 | "2. [^](#cite_ref-2) Also refs should be important to the background, not some randomly chosen vaguely related stuff. Include a web link if possible in refs as above.\n"
84 | ]
85 | },
86 | {
87 | "cell_type": "markdown",
88 | "metadata": {},
89 | "source": [
90 | "# Hypothesis\n"
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {},
96 | "source": [
97 | "\n",
98 | "- Include your team's hypothesis\n",
99 | "- Ensure that this hypothesis is clear to readers\n",
100 | "- Explain why you think this will be the outcome (what was your thinking?)\n",
101 | "\n",
102 | "What is your main hypothesis/predictions about what the answer to your question is? Briefly explain your thinking. (2-3 sentences)"
103 | ]
104 | },
105 | {
106 | "cell_type": "markdown",
107 | "metadata": {},
108 | "source": [
109 | "# Data"
110 | ]
111 | },
112 | {
113 | "cell_type": "markdown",
114 | "metadata": {},
115 | "source": [
116 | "## Data overview\n",
117 | "\n",
118 | "For each dataset include the following information\n",
119 | "- Dataset #1\n",
120 | " - Dataset Name:\n",
121 | " - Link to the dataset:\n",
122 | " - Number of observations:\n",
123 | " - Number of variables:\n",
124 | "- Dataset #2 (if you have more than one!)\n",
125 | " - Dataset Name:\n",
126 | " - Link to the dataset:\n",
127 | " - Number of observations:\n",
128 | " - Number of variables:\n",
129 | "- etc\n",
130 | "\n",
131 | "Now write 2 - 5 sentences describing each dataset here. Include a short description of the important variables in the dataset; what the metrics and datatypes are, what concepts they may be proxies for. Include information about how you would need to wrangle/clean/preprocess the dataset\n",
132 | "\n",
133 | "If you plan to use multiple datasets, add a few sentences about how you plan to combine these datasets."
134 | ]
135 | },
136 | {
137 | "cell_type": "markdown",
138 | "metadata": {},
139 | "source": [
140 | "## Dataset #1 (use name instead of number here)"
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": null,
146 | "metadata": {},
147 | "outputs": [],
148 | "source": [
149 | "## YOUR CODE TO LOAD/CLEAN/TIDY/WRANGLE THE DATA GOES HERE\n",
150 | "## FEEL FREE TO ADD MULTIPLE CELLS PER SECTION "
151 | ]
152 | },
153 | {
154 | "cell_type": "markdown",
155 | "metadata": {},
156 | "source": [
157 | "## Dataset #2 (if you have more than one, use name instead of number here)"
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": null,
163 | "metadata": {},
164 | "outputs": [],
165 | "source": [
166 | "## YOUR CODE TO LOAD/CLEAN/TIDY/WRANGLE THE DATA GOES HERE\n",
167 | "## FEEL FREE TO ADD MULTIPLE CELLS PER SECTION "
168 | ]
169 | },
170 | {
171 | "cell_type": "markdown",
172 | "metadata": {},
173 | "source": [
174 | "# Results\n",
175 | "\n",
176 | "## Exploratory Data Analysis\n",
177 | "\n",
178 | "Carry out whatever EDA you need to for your project. Because every project will be different we can't really give you much of a template at this point. But please make sure you describe the what and why in text here as well as providing interpretation of results and context."
179 | ]
180 | },
181 | {
182 | "cell_type": "markdown",
183 | "metadata": {},
184 | "source": [
185 | "### Section 1 of EDA - please give it a better title than this\n",
186 | "\n",
187 | "Some more words and stuff. Remember notebooks work best if you interleave the code that generates a result with properly annotate figures and text that puts these results into context."
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": null,
193 | "metadata": {},
194 | "outputs": [],
195 | "source": [
196 | "## YOUR CODE HERE\n",
197 | "## FEEL FREE TO ADD MULTIPLE CELLS PER SECTION"
198 | ]
199 | },
200 | {
201 | "cell_type": "markdown",
202 | "metadata": {},
203 | "source": [
204 | "### Section 2 of EDA if you need it - please give it a better title than this\n",
205 | "\n",
206 | "Some more words and stuff. Remember notebooks work best if you interleave the code that generates a result with properly annotate figures and text that puts these results into context."
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": null,
212 | "metadata": {},
213 | "outputs": [],
214 | "source": [
215 | "## YOUR CODE HERE\n",
216 | "## FEEL FREE TO ADD MULTIPLE CELLS PER SECTION"
217 | ]
218 | },
219 | {
220 | "cell_type": "markdown",
221 | "metadata": {},
222 | "source": [
223 | "# Ethics & Privacy"
224 | ]
225 | },
226 | {
227 | "cell_type": "markdown",
228 | "metadata": {},
229 | "source": [
230 | "- Thoughtful discussion of ethical concerns included\n",
231 | "- Ethical concerns consider the whole data science process (question asked, data collected, data being used, the bias in data, analysis, post-analysis, etc.)\n",
232 | "- How your group handled bias/ethical concerns clearly described\n",
233 | "\n",
234 | "Acknowledge and address any ethics & privacy related issues of your question(s), proposed dataset(s), and/or analyses. Use the information provided in lecture to guide your group discussion and thinking. If you need further guidance, check out [Deon's Ethics Checklist](http://deon.drivendata.org/#data-science-ethics-checklist). In particular:\n",
235 | "\n",
236 | "- Are there any biases/privacy/terms of use issues with the data you propsed?\n",
237 | "- Are there potential biases in your dataset(s), in terms of who it composes, and how it was collected, that may be problematic in terms of it allowing for equitable analysis? (For example, does your data exclude particular populations, or is it likely to reflect particular human biases in a way that could be a problem?)\n",
238 | "- How will you set out to detect these specific biases before, during, and after/when communicating your analysis?\n",
239 | "- Are there any other issues related to your topic area, data, and/or analyses that are potentially problematic in terms of data privacy and equitable impact?\n",
240 | "- How will you handle issues you identified?"
241 | ]
242 | },
243 | {
244 | "cell_type": "markdown",
245 | "metadata": {},
246 | "source": [
247 | "# Team Expectations "
248 | ]
249 | },
250 | {
251 | "cell_type": "markdown",
252 | "metadata": {},
253 | "source": [
254 | "\n",
255 | "Read over the [COGS108 Team Policies](https://github.com/COGS108/Projects/blob/master/COGS108_TeamPolicies.md) individually. Then, include your group’s expectations of one another for successful completion of your COGS108 project below. Discuss and agree on what all of your expectations are. Discuss how your team will communicate throughout the quarter and consider how you will communicate respectfully should conflicts arise. By including each member’s name above and by adding their name to the submission, you are indicating that you have read the COGS108 Team Policies, accept your team’s expectations below, and have every intention to fulfill them. These expectations are for your team’s use and benefit — they won’t be graded for their details.\n",
256 | "\n",
257 | "* *Team Expectation 1*\n",
258 | "* *Team Expectation 2*\n",
259 | "* *Team Expecation 3*\n",
260 | "* ..."
261 | ]
262 | },
263 | {
264 | "cell_type": "markdown",
265 | "metadata": {},
266 | "source": [
267 | "# Project Timeline Proposal"
268 | ]
269 | },
270 | {
271 | "cell_type": "markdown",
272 | "metadata": {},
273 | "source": [
274 | "Specify your team's specific project timeline. An example timeline has been provided. Changes the dates, times, names, and details to fit your group's plan.\n",
275 | "\n",
276 | "If you think you will need any special resources or training outside what we have covered in COGS 108 to solve your problem, then your proposal should state these clearly. For example, if you have selected a problem that involves implementing multiple neural networks, please state this so we can make sure you know what you’re doing and so we can point you to resources you will need to implement your project. Note that you are not required to use outside methods.\n",
277 | "\n",
278 | "\n",
279 | "\n",
280 | "| Meeting Date | Meeting Time| Completed Before Meeting | Discuss at Meeting |\n",
281 | "|---|---|---|---|\n",
282 | "| 1/20 | 1 PM | Read & Think about COGS 108 expectations; brainstorm topics/questions | Determine best form of communication; Discuss and decide on final project topic; discuss hypothesis; begin background research | \n",
283 | "| 1/26 | 10 AM | Do background research on topic | Discuss ideal dataset(s) and ethics; draft project proposal | \n",
284 | "| 2/1 | 10 AM | Edit, finalize, and submit proposal; Search for datasets | Discuss Wrangling and possible analytical approaches; Assign group members to lead each specific part |\n",
285 | "| 2/14 | 6 PM | Import & Wrangle Data (Ant Man); EDA (Hulk) | Review/Edit wrangling/EDA; Discuss Analysis Plan |\n",
286 | "| 2/23 | 12 PM | Finalize wrangling/EDA; Begin Analysis (Iron Man; Thor) | Discuss/edit Analysis; Complete project check-in |\n",
287 | "| 3/13 | 12 PM | Complete analysis; Draft results/conclusion/discussion (Wasp)| Discuss/edit full project |\n",
288 | "| 3/20 | Before 11:59 PM | NA | Turn in Final Project & Group Project Surveys |"
289 | ]
290 | }
291 | ],
292 | "metadata": {
293 | "kernelspec": {
294 | "display_name": "Python 3 (ipykernel)",
295 | "language": "python",
296 | "name": "python3"
297 | },
298 | "language_info": {
299 | "codemirror_mode": {
300 | "name": "ipython",
301 | "version": 3
302 | },
303 | "file_extension": ".py",
304 | "mimetype": "text/x-python",
305 | "name": "python",
306 | "nbconvert_exporter": "python",
307 | "pygments_lexer": "ipython3",
308 | "version": "3.9.7"
309 | }
310 | },
311 | "nbformat": 4,
312 | "nbformat_minor": 2
313 | }
314 |
--------------------------------------------------------------------------------
/DataCheckpoint_Group110_WI24.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "f69d488e",
6 | "metadata": {},
7 | "source": [
8 | "
Checkpoint #1: Data
\n",
9 | "\n",
10 | "\n",
11 | "# NAMES:
\n",
12 | "Sophia Ashraf
\n",
13 | "Dylan Oquendo
\n",
14 | "Karun Mokha
\n",
15 | "Jake Kondo
\n",
16 | "Ekrem Ersoz\n",
17 | "\n",
18 | "\n",
19 | "# RESEARCH QUESTION:\n",
20 | "\n",
21 | "\n",
22 | "**How did the interplay between mental health and specific Covid-19-related events influence pregnancy outcomes, and what novel patterns emerge when comparing pre-pandemic, pandemic, and post-vaccine introduction phases?**\n",
23 | "\n",
24 | "**Sub Questions:**\n",
25 | "\n",
26 | "**Normal time data (overall population):**
\n",
27 | "Analyze pregnancy outcomes data from periods before the COVID-19 pandemic as a baseline. This will help establish what \"normal\" outcomes look like, against which pandemic-era outcomes can be compared.
\n",
28 | "\n",
29 | "**Was it related to mental health:**
\n",
30 | "Investigate whether changes in pregnancy outcomes during the COVID-19 pandemic correlate with reported changes in mental health statistics. This involves collecting data on mental health issues among pregnant individuals during the pandemic and comparing these with pregnancy outcomes.
\n",
31 | "\n",
32 | "**How did these relate to large events (e.g., vaccines)?**
\n",
33 | "Examine the timeline of major Covid-19-related events, such as lockdowns, infection waves, and the introduction of vaccines, and analyze their impact on mental health and pregnancy outcomes. This could involve comparing pregnancy outcomes and mental health data before and after such events to identify any significant changes or trends.\n",
34 | "\n",
35 | "\n",
36 | "\n",
37 | "\n",
38 | "\n",
39 | "\n",
40 | "\n",
41 | "\n",
42 | "\n",
43 | "\n",
44 | "\n",
45 | "\n",
46 | "\n",
47 | "\n",
48 | "\n",
49 | "# DATASET(S):\n",
50 | "\n",
51 | "In our study, we have focused on a comprehensive selection of variables that represent three pivotal themes: mental health status, pregnancy outcomes, and the COVID-19 pandemic's impact, guided by data from a highly regarded source detailed in the dataset article at https://doi.org/10.1016/j.dib.2023.109366. The mental health indicators include stress, anxiety, and depression levels, measured by validated scales such as the Edinburgh Postnatal Depression Scale (EPDS) and the PROMIS Anxiety scores, to assess the psychological well-being of pregnant individuals during different phases of the pandemic. Additionally, we measure pregnancy outcomes through clinical metrics like gestational age at birth, birth weight, and NICU stay, reflecting the health of both mother and newborn. To specifically examine the pandemic's effects, we consider variables such as perceived threat levels to life and unborn baby, aligning with key events like lockdowns and vaccine rollouts. Our selection also includes socio-economic and demographic factors—maternal age, household income, and maternal education—to address the broader social and economic influences on child development and mental health, acknowledging the critical role these play in access to resources and opportunities.\n",
52 | "\n",
53 | "Building upon the findings of the research article accessible via https://doi.org/10.1016/j.jad.2021.12.057, which explored the influence of mental health on pregnancy outcomes using the same dataset, our analysis aims to reveal new patterns by segmenting data across different pandemic phases: pre-pandemic, during pandemic, and post-vaccine introduction. We employ advanced analytical methods, including temporal analysis and interaction effects, to delve into the dynamics of mental health fluctuations in response to specific COVID-19-related events and their effects on pregnancy outcomes. Our goal is to identify critical periods within the pandemic that significantly impacted mental health and pregnancy outcomes, and to understand the mediating role of socio-economic factors. This approach enables us to extend beyond the scope of the original study, offering novel insights into the complex interplay between mental health, socio-economic variables, and the progression of the COVID-19 pandemic.\n",
54 | "\n",
55 | "\n",
56 | "**SETUP:** \n",
57 | "\n",
58 | "- import pandas as pd\n",
59 | "- import numpy as np\n",
60 | "\n",
61 | "**DATA CLEANING:** \n",
62 | "\n",
63 | "The data was moderately clean, however we did need to remove rows with null values in the columns we deemed important to answering our questions, as these observations would not help us in answering them.\n",
64 | "We also renamed columns, and changed some column entries to be simpler and uniform for readability and efficiency. Some entries were made into percentages since they were scores out of 100, and we changed birth date to be a datetime variable.\n",
65 | "Before making these changes however, we had to check the datatype of the columns of the dataframe, and also check the unique entries of the categorical data in order to rename and clean them up, or convert in some cases."
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": 18,
71 | "id": "b99d98c8",
72 | "metadata": {},
73 | "outputs": [
74 | {
75 | "data": {
76 | "text/html": [
77 | "\n",
78 | "\n",
91 | "
\n",
92 | " \n",
93 | " \n",
94 | " | \n",
95 | " OSF_ID | \n",
96 | " Maternal_Age | \n",
97 | " Household_Income | \n",
98 | " Maternal_Education | \n",
99 | " Edinburgh_Postnatal_Depression_Scale | \n",
100 | " PROMIS_Anxiety | \n",
101 | " Gestational_Age_At_Birth | \n",
102 | " Delivery_Date(converted to month and year) | \n",
103 | " Birth_Length | \n",
104 | " Birth_Weight | \n",
105 | " Delivery_Mode | \n",
106 | " NICU_Stay | \n",
107 | " Language | \n",
108 | " Threaten_Life | \n",
109 | " Threaten_Baby_Danger | \n",
110 | " Threaten_Baby_Harm | \n",
111 | "
\n",
112 | " \n",
113 | " \n",
114 | " \n",
115 | " | 0 | \n",
116 | " 1 | \n",
117 | " 38.3 | \n",
118 | " $200,000+ | \n",
119 | " Masters degree | \n",
120 | " 9.0 | \n",
121 | " 13.0 | \n",
122 | " 39.71 | \n",
123 | " Dec2020 | \n",
124 | " 49.20 | \n",
125 | " 3431.0 | \n",
126 | " Vaginally | \n",
127 | " No | \n",
128 | " English | \n",
129 | " 2 | \n",
130 | " 3 | \n",
131 | " 27 | \n",
132 | "
\n",
133 | " \n",
134 | " | 1 | \n",
135 | " 2 | \n",
136 | " 34.6 | \n",
137 | " $200,000+ | \n",
138 | " Undergraduate degree | \n",
139 | " 4.0 | \n",
140 | " 17.0 | \n",
141 | " NaN | \n",
142 | " NaN | \n",
143 | " NaN | \n",
144 | " NaN | \n",
145 | " NaN | \n",
146 | " NaN | \n",
147 | " English | \n",
148 | " 2 | \n",
149 | " 33 | \n",
150 | " 92 | \n",
151 | "
\n",
152 | " \n",
153 | " | 2 | \n",
154 | " 3 | \n",
155 | " 34.3 | \n",
156 | " $100,000 -$124,999 | \n",
157 | " Undergraduate degree | \n",
158 | " NaN | \n",
159 | " NaN | \n",
160 | " NaN | \n",
161 | " NaN | \n",
162 | " NaN | \n",
163 | " NaN | \n",
164 | " NaN | \n",
165 | " NaN | \n",
166 | " French | \n",
167 | " | \n",
168 | " | \n",
169 | " | \n",
170 | "
\n",
171 | " \n",
172 | " | 3 | \n",
173 | " 4 | \n",
174 | " 28.8 | \n",
175 | " $100,000 -$124,999 | \n",
176 | " Masters degree | \n",
177 | " 9.0 | \n",
178 | " 20.0 | \n",
179 | " 38.57 | \n",
180 | " Dec2020 | \n",
181 | " 41.00 | \n",
182 | " 2534.0 | \n",
183 | " Vaginally | \n",
184 | " No | \n",
185 | " French | \n",
186 | " 53 | \n",
187 | " 67 | \n",
188 | " 54 | \n",
189 | "
\n",
190 | " \n",
191 | " | 4 | \n",
192 | " 5 | \n",
193 | " 36.5 | \n",
194 | " $40,000-$69,999 | \n",
195 | " Undergraduate degree | \n",
196 | " 14.0 | \n",
197 | " 20.0 | \n",
198 | " 39.86 | \n",
199 | " Oct2020 | \n",
200 | " 53.34 | \n",
201 | " 3714.0 | \n",
202 | " Caesarean-section (c-section) | \n",
203 | " No | \n",
204 | " English | \n",
205 | " 23 | \n",
206 | " 32 | \n",
207 | " 71 | \n",
208 | "
\n",
209 | " \n",
210 | "
\n",
211 | "
"
212 | ],
213 | "text/plain": [
214 | " OSF_ID Maternal_Age Household_Income Maternal_Education \\\n",
215 | "0 1 38.3 $200,000+ Masters degree \n",
216 | "1 2 34.6 $200,000+ Undergraduate degree \n",
217 | "2 3 34.3 $100,000 -$124,999 Undergraduate degree \n",
218 | "3 4 28.8 $100,000 -$124,999 Masters degree \n",
219 | "4 5 36.5 $40,000-$69,999 Undergraduate degree \n",
220 | "\n",
221 | " Edinburgh_Postnatal_Depression_Scale PROMIS_Anxiety \\\n",
222 | "0 9.0 13.0 \n",
223 | "1 4.0 17.0 \n",
224 | "2 NaN NaN \n",
225 | "3 9.0 20.0 \n",
226 | "4 14.0 20.0 \n",
227 | "\n",
228 | " Gestational_Age_At_Birth Delivery_Date(converted to month and year) \\\n",
229 | "0 39.71 Dec2020 \n",
230 | "1 NaN NaN \n",
231 | "2 NaN NaN \n",
232 | "3 38.57 Dec2020 \n",
233 | "4 39.86 Oct2020 \n",
234 | "\n",
235 | " Birth_Length Birth_Weight Delivery_Mode NICU_Stay \\\n",
236 | "0 49.20 3431.0 Vaginally No \n",
237 | "1 NaN NaN NaN NaN \n",
238 | "2 NaN NaN NaN NaN \n",
239 | "3 41.00 2534.0 Vaginally No \n",
240 | "4 53.34 3714.0 Caesarean-section (c-section) No \n",
241 | "\n",
242 | " Language Threaten_Life Threaten_Baby_Danger Threaten_Baby_Harm \n",
243 | "0 English 2 3 27 \n",
244 | "1 English 2 33 92 \n",
245 | "2 French \n",
246 | "3 French 53 67 54 \n",
247 | "4 English 23 32 71 "
248 | ]
249 | },
250 | "execution_count": 18,
251 | "metadata": {},
252 | "output_type": "execute_result"
253 | }
254 | ],
255 | "source": [
256 | "import pandas as pd\n",
257 | "import numpy as np\n",
258 | "\n",
259 | "# Load the dataset\n",
260 | "\n",
261 | "file_path = 'Pregnancy During the COVID-19 Pandemic.csv'\n",
262 | "df = pd.read_csv(file_path)\n",
263 | "\n",
264 | "# Display the first few rows to understand the structure of the dataset\n",
265 | "df.head()"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": 19,
271 | "id": "53b787bb",
272 | "metadata": {},
273 | "outputs": [
274 | {
275 | "data": {
276 | "text/plain": [
277 | "(5176, 16)"
278 | ]
279 | },
280 | "execution_count": 19,
281 | "metadata": {},
282 | "output_type": "execute_result"
283 | }
284 | ],
285 | "source": [
286 | "#drop OSF ID\n",
287 | "\n",
288 | "\n",
289 | "\n",
290 | "df = df.dropna(subset=['PROMIS_Anxiety', 'Birth_Length', 'Birth_Weight', 'NICU_Stay', 'Edinburgh_Postnatal_Depression_Scale'])\n",
291 | "#Drop any row with null value for the values we are most interested in\n",
292 | "df = df.reset_index(drop=True)\n",
293 | "df.shape"
294 | ]
295 | },
296 | {
297 | "cell_type": "code",
298 | "execution_count": 20,
299 | "id": "135e72b1",
300 | "metadata": {},
301 | "outputs": [],
302 | "source": [
303 | "#drop language column\n",
304 | "\n",
305 | "df = df.drop('Language', axis=1)"
306 | ]
307 | },
308 | {
309 | "cell_type": "code",
310 | "execution_count": 21,
311 | "id": "15345416",
312 | "metadata": {},
313 | "outputs": [
314 | {
315 | "data": {
316 | "text/plain": [
317 | "OSF_ID 0\n",
318 | "Maternal_Age 3\n",
319 | "Household_Income 20\n",
320 | "Maternal_Education 14\n",
321 | "Edinburgh_Postnatal_Depression_Scale 0\n",
322 | "PROMIS_Anxiety 0\n",
323 | "Gestational_Age_At_Birth 0\n",
324 | "Delivery_Date(converted to month and year) 0\n",
325 | "Birth_Length 0\n",
326 | "Birth_Weight 0\n",
327 | "Delivery_Mode 0\n",
328 | "NICU_Stay 0\n",
329 | "Threaten_Life 0\n",
330 | "Threaten_Baby_Danger 0\n",
331 | "Threaten_Baby_Harm 0\n",
332 | "dtype: int64"
333 | ]
334 | },
335 | "execution_count": 21,
336 | "metadata": {},
337 | "output_type": "execute_result"
338 | }
339 | ],
340 | "source": [
341 | "# Checking for missing values\n",
342 | "missing_values = df.isnull().sum()\n",
343 | "\n",
344 | "missing_values"
345 | ]
346 | },
347 | {
348 | "cell_type": "code",
349 | "execution_count": 22,
350 | "id": "0fe325a7",
351 | "metadata": {},
352 | "outputs": [
353 | {
354 | "data": {
355 | "text/html": [
356 | "\n",
357 | "\n",
370 | "
\n",
371 | " \n",
372 | " \n",
373 | " | \n",
374 | " OSF_ID | \n",
375 | " mat_age | \n",
376 | " income | \n",
377 | " mat_edu | \n",
378 | " depression | \n",
379 | " anxiety | \n",
380 | " birth_age | \n",
381 | " birth_date | \n",
382 | " Birth_Length | \n",
383 | " Birth_Weight | \n",
384 | " Delivery_Mode | \n",
385 | " NICU_Stay | \n",
386 | " Threaten_Life | \n",
387 | " Threaten_Baby_Danger | \n",
388 | " Threaten_Baby_Harm | \n",
389 | "
\n",
390 | " \n",
391 | " \n",
392 | " \n",
393 | " | 0 | \n",
394 | " 1 | \n",
395 | " 38.3 | \n",
396 | " $200,000+ | \n",
397 | " Masters degree | \n",
398 | " 9.0 | \n",
399 | " 13.0 | \n",
400 | " 39.71 | \n",
401 | " Dec2020 | \n",
402 | " 49.20 | \n",
403 | " 3431.0 | \n",
404 | " Vaginally | \n",
405 | " No | \n",
406 | " 2 | \n",
407 | " 3 | \n",
408 | " 27 | \n",
409 | "
\n",
410 | " \n",
411 | " | 1 | \n",
412 | " 4 | \n",
413 | " 28.8 | \n",
414 | " $100,000 -$124,999 | \n",
415 | " Masters degree | \n",
416 | " 9.0 | \n",
417 | " 20.0 | \n",
418 | " 38.57 | \n",
419 | " Dec2020 | \n",
420 | " 41.00 | \n",
421 | " 2534.0 | \n",
422 | " Vaginally | \n",
423 | " No | \n",
424 | " 53 | \n",
425 | " 67 | \n",
426 | " 54 | \n",
427 | "
\n",
428 | " \n",
429 | " | 2 | \n",
430 | " 5 | \n",
431 | " 36.5 | \n",
432 | " $40,000-$69,999 | \n",
433 | " Undergraduate degree | \n",
434 | " 14.0 | \n",
435 | " 20.0 | \n",
436 | " 39.86 | \n",
437 | " Oct2020 | \n",
438 | " 53.34 | \n",
439 | " 3714.0 | \n",
440 | " Caesarean-section (c-section) | \n",
441 | " No | \n",
442 | " 23 | \n",
443 | " 32 | \n",
444 | " 71 | \n",
445 | "
\n",
446 | " \n",
447 | " | 3 | \n",
448 | " 9 | \n",
449 | " 33.1 | \n",
450 | " $100,000 -$124,999 | \n",
451 | " College/trade school | \n",
452 | " 1.0 | \n",
453 | " 7.0 | \n",
454 | " 40.86 | \n",
455 | " Nov2020 | \n",
456 | " 55.88 | \n",
457 | " 4480.0 | \n",
458 | " Vaginally | \n",
459 | " No | \n",
460 | " 27 | \n",
461 | " 76 | \n",
462 | " 72 | \n",
463 | "
\n",
464 | " \n",
465 | " | 4 | \n",
466 | " 14 | \n",
467 | " 29.2 | \n",
468 | " $70,000-$99,999 | \n",
469 | " Masters degree | \n",
470 | " 14.0 | \n",
471 | " 17.0 | \n",
472 | " 41.00 | \n",
473 | " Oct2020 | \n",
474 | " 47.00 | \n",
475 | " 3084.0 | \n",
476 | " Vaginally | \n",
477 | " No | \n",
478 | " 68 | \n",
479 | " 69 | \n",
480 | " 81 | \n",
481 | "
\n",
482 | " \n",
483 | "
\n",
484 | "
"
485 | ],
486 | "text/plain": [
487 | " OSF_ID mat_age income mat_edu depression \\\n",
488 | "0 1 38.3 $200,000+ Masters degree 9.0 \n",
489 | "1 4 28.8 $100,000 -$124,999 Masters degree 9.0 \n",
490 | "2 5 36.5 $40,000-$69,999 Undergraduate degree 14.0 \n",
491 | "3 9 33.1 $100,000 -$124,999 College/trade school 1.0 \n",
492 | "4 14 29.2 $70,000-$99,999 Masters degree 14.0 \n",
493 | "\n",
494 | " anxiety birth_age birth_date Birth_Length Birth_Weight \\\n",
495 | "0 13.0 39.71 Dec2020 49.20 3431.0 \n",
496 | "1 20.0 38.57 Dec2020 41.00 2534.0 \n",
497 | "2 20.0 39.86 Oct2020 53.34 3714.0 \n",
498 | "3 7.0 40.86 Nov2020 55.88 4480.0 \n",
499 | "4 17.0 41.00 Oct2020 47.00 3084.0 \n",
500 | "\n",
501 | " Delivery_Mode NICU_Stay Threaten_Life Threaten_Baby_Danger \\\n",
502 | "0 Vaginally No 2 3 \n",
503 | "1 Vaginally No 53 67 \n",
504 | "2 Caesarean-section (c-section) No 23 32 \n",
505 | "3 Vaginally No 27 76 \n",
506 | "4 Vaginally No 68 69 \n",
507 | "\n",
508 | " Threaten_Baby_Harm \n",
509 | "0 27 \n",
510 | "1 54 \n",
511 | "2 71 \n",
512 | "3 72 \n",
513 | "4 81 "
514 | ]
515 | },
516 | "execution_count": 22,
517 | "metadata": {},
518 | "output_type": "execute_result"
519 | }
520 | ],
521 | "source": [
522 | "df.rename(columns={'Maternal_Age': 'mat_age', 'Household_Income': 'income', 'Maternal_Education': 'mat_edu',\n",
523 | " 'Edinburgh_Postnatal_Depression_Scale': 'depression',\n",
524 | " 'PROMIS_Anxiety': 'anxiety', 'Gestational_Age_At_Birth': 'birth_age', \n",
525 | " 'Delivery_Date(converted to month and year)': 'birth_date'}, inplace=True)\n",
526 | "df.head()"
527 | ]
528 | },
529 | {
530 | "cell_type": "code",
531 | "execution_count": 23,
532 | "id": "7bf0cddb",
533 | "metadata": {},
534 | "outputs": [
535 | {
536 | "data": {
537 | "text/plain": [
538 | "( OSF_ID mat_age depression anxiety birth_age \\\n",
539 | " count 5176.000000 5173.000000 5176.000000 5176.000000 5176.00000 \n",
540 | " mean 5300.733578 32.521322 9.738022 18.389104 39.33868 \n",
541 | " std 3114.246816 4.140823 5.307232 5.950169 1.62486 \n",
542 | " min 1.000000 18.500000 0.000000 7.000000 24.86000 \n",
543 | " 25% 2560.750000 29.700000 6.000000 14.000000 38.57000 \n",
544 | " 50% 5294.500000 32.400000 10.000000 19.000000 39.57000 \n",
545 | " 75% 8009.250000 35.300000 13.000000 23.000000 40.43000 \n",
546 | " max 10764.000000 49.000000 28.000000 35.000000 42.86000 \n",
547 | " \n",
548 | " Birth_Length Birth_Weight \n",
549 | " count 5176.000000 5176.000000 \n",
550 | " mean 50.499834 3412.676005 \n",
551 | " std 4.433899 534.564742 \n",
552 | " min 20.000000 314.000000 \n",
553 | " 25% 49.000000 3119.000000 \n",
554 | " 50% 50.800000 3431.000000 \n",
555 | " 75% 53.310000 3742.000000 \n",
556 | " max 70.000000 5968.000000 ,\n",
557 | " OSF_ID 0\n",
558 | " Maternal_Age 3\n",
559 | " Household_Income 20\n",
560 | " Maternal_Education 14\n",
561 | " Edinburgh_Postnatal_Depression_Scale 0\n",
562 | " PROMIS_Anxiety 0\n",
563 | " Gestational_Age_At_Birth 0\n",
564 | " Delivery_Date(converted to month and year) 0\n",
565 | " Birth_Length 0\n",
566 | " Birth_Weight 0\n",
567 | " Delivery_Mode 0\n",
568 | " NICU_Stay 0\n",
569 | " Threaten_Life 0\n",
570 | " Threaten_Baby_Danger 0\n",
571 | " Threaten_Baby_Harm 0\n",
572 | " dtype: int64)"
573 | ]
574 | },
575 | "execution_count": 23,
576 | "metadata": {},
577 | "output_type": "execute_result"
578 | }
579 | ],
580 | "source": [
581 | "# Summary statistics for numerical columns\n",
582 | "summary_statistics = df.describe()\n",
583 | "\n",
584 | "\n",
585 | "summary_statistics, missing_values"
586 | ]
587 | },
588 | {
589 | "cell_type": "code",
590 | "execution_count": 24,
591 | "id": "a0183010",
592 | "metadata": {},
593 | "outputs": [
594 | {
595 | "data": {
596 | "text/html": [
597 | "\n",
598 | "\n",
611 | "
\n",
612 | " \n",
613 | " \n",
614 | " | \n",
615 | " OSF_ID | \n",
616 | " mat_age | \n",
617 | " income | \n",
618 | " mat_edu | \n",
619 | " depression | \n",
620 | " anxiety | \n",
621 | " birth_age | \n",
622 | " birth_date | \n",
623 | " Birth_Length | \n",
624 | " Birth_Weight | \n",
625 | " Delivery_Mode | \n",
626 | " NICU_Stay | \n",
627 | " Threaten_Life | \n",
628 | " Threaten_Baby_Danger | \n",
629 | " Threaten_Baby_Harm | \n",
630 | "
\n",
631 | " \n",
632 | " \n",
633 | " \n",
634 | " | 0 | \n",
635 | " 1 | \n",
636 | " 38.3 | \n",
637 | " 250000.0 | \n",
638 | " Masters degree | \n",
639 | " 9.0 | \n",
640 | " 13.0 | \n",
641 | " 39.71 | \n",
642 | " Dec2020 | \n",
643 | " 49.20 | \n",
644 | " 3431.0 | \n",
645 | " Vaginally | \n",
646 | " No | \n",
647 | " 2 | \n",
648 | " 3 | \n",
649 | " 27 | \n",
650 | "
\n",
651 | " \n",
652 | " | 1 | \n",
653 | " 4 | \n",
654 | " 28.8 | \n",
655 | " 112499.5 | \n",
656 | " Masters degree | \n",
657 | " 9.0 | \n",
658 | " 20.0 | \n",
659 | " 38.57 | \n",
660 | " Dec2020 | \n",
661 | " 41.00 | \n",
662 | " 2534.0 | \n",
663 | " Vaginally | \n",
664 | " No | \n",
665 | " 53 | \n",
666 | " 67 | \n",
667 | " 54 | \n",
668 | "
\n",
669 | " \n",
670 | " | 2 | \n",
671 | " 5 | \n",
672 | " 36.5 | \n",
673 | " 54999.5 | \n",
674 | " Undergraduate degree | \n",
675 | " 14.0 | \n",
676 | " 20.0 | \n",
677 | " 39.86 | \n",
678 | " Oct2020 | \n",
679 | " 53.34 | \n",
680 | " 3714.0 | \n",
681 | " Caesarean-section (c-section) | \n",
682 | " No | \n",
683 | " 23 | \n",
684 | " 32 | \n",
685 | " 71 | \n",
686 | "
\n",
687 | " \n",
688 | " | 3 | \n",
689 | " 9 | \n",
690 | " 33.1 | \n",
691 | " 112499.5 | \n",
692 | " College/trade school | \n",
693 | " 1.0 | \n",
694 | " 7.0 | \n",
695 | " 40.86 | \n",
696 | " Nov2020 | \n",
697 | " 55.88 | \n",
698 | " 4480.0 | \n",
699 | " Vaginally | \n",
700 | " No | \n",
701 | " 27 | \n",
702 | " 76 | \n",
703 | " 72 | \n",
704 | "
\n",
705 | " \n",
706 | " | 4 | \n",
707 | " 14 | \n",
708 | " 29.2 | \n",
709 | " 84999.5 | \n",
710 | " Masters degree | \n",
711 | " 14.0 | \n",
712 | " 17.0 | \n",
713 | " 41.00 | \n",
714 | " Oct2020 | \n",
715 | " 47.00 | \n",
716 | " 3084.0 | \n",
717 | " Vaginally | \n",
718 | " No | \n",
719 | " 68 | \n",
720 | " 69 | \n",
721 | " 81 | \n",
722 | "
\n",
723 | " \n",
724 | "
\n",
725 | "
"
726 | ],
727 | "text/plain": [
728 | " OSF_ID mat_age income mat_edu depression anxiety \\\n",
729 | "0 1 38.3 250000.0 Masters degree 9.0 13.0 \n",
730 | "1 4 28.8 112499.5 Masters degree 9.0 20.0 \n",
731 | "2 5 36.5 54999.5 Undergraduate degree 14.0 20.0 \n",
732 | "3 9 33.1 112499.5 College/trade school 1.0 7.0 \n",
733 | "4 14 29.2 84999.5 Masters degree 14.0 17.0 \n",
734 | "\n",
735 | " birth_age birth_date Birth_Length Birth_Weight \\\n",
736 | "0 39.71 Dec2020 49.20 3431.0 \n",
737 | "1 38.57 Dec2020 41.00 2534.0 \n",
738 | "2 39.86 Oct2020 53.34 3714.0 \n",
739 | "3 40.86 Nov2020 55.88 4480.0 \n",
740 | "4 41.00 Oct2020 47.00 3084.0 \n",
741 | "\n",
742 | " Delivery_Mode NICU_Stay Threaten_Life Threaten_Baby_Danger \\\n",
743 | "0 Vaginally No 2 3 \n",
744 | "1 Vaginally No 53 67 \n",
745 | "2 Caesarean-section (c-section) No 23 32 \n",
746 | "3 Vaginally No 27 76 \n",
747 | "4 Vaginally No 68 69 \n",
748 | "\n",
749 | " Threaten_Baby_Harm \n",
750 | "0 27 \n",
751 | "1 54 \n",
752 | "2 71 \n",
753 | "3 72 \n",
754 | "4 81 "
755 | ]
756 | },
757 | "execution_count": 24,
758 | "metadata": {},
759 | "output_type": "execute_result"
760 | }
761 | ],
762 | "source": [
763 | "def standardize_income(income):\n",
764 | " if pd.isna(income):\n",
765 | " # Return NaN as is, you can also choose to fill it with a specific value if required\n",
766 | " return np.nan\n",
767 | " elif isinstance(income, str):\n",
768 | " # Check for non-standard strings and convert them\n",
769 | " if 'Less than' in income:\n",
770 | " return 20000 # Example value, adjust based on your dataset\n",
771 | " # Check if income is a range\n",
772 | " elif '-' in income:\n",
773 | " parts = income.replace('$', '').replace(',', '').split('-')\n",
774 | " # Calculate midpoint for ranges\n",
775 | " if len(parts) == 2 and parts[1]:\n",
776 | " low, high = map(int, parts)\n",
777 | " return (low + high) / 2\n",
778 | " else: # Handle cases like '$150,000 -'\n",
779 | " low = int(parts[0])\n",
780 | " return low * 1.25\n",
781 | " elif '+' in income:\n",
782 | " # Handle open-ended values like '$200,000+'\n",
783 | " low = int(income.replace('$', '').replace(',', '').replace('+', ''))\n",
784 | " return low * 1.25\n",
785 | " else:\n",
786 | " # Handle single values without range\n",
787 | " return int(income.replace('$', '').replace(',', '').replace(' ', ''))\n",
788 | " else:\n",
789 | " # If income is already a number, just return it\n",
790 | " return income\n",
791 | "\n",
792 | "# Assuming 'df' is your dataframe\n",
793 | "df['income'] = df['income'].apply(standardize_income)\n",
794 | "\n",
795 | "df.head()"
796 | ]
797 | },
798 | {
799 | "cell_type": "code",
800 | "execution_count": 25,
801 | "id": "a44a78c6",
802 | "metadata": {},
803 | "outputs": [
804 | {
805 | "data": {
806 | "text/html": [
807 | "\n",
808 | "\n",
821 | "
\n",
822 | " \n",
823 | " \n",
824 | " | \n",
825 | " OSF_ID | \n",
826 | " mat_age | \n",
827 | " income | \n",
828 | " mat_edu | \n",
829 | " depression | \n",
830 | " anxiety | \n",
831 | " birth_age | \n",
832 | " birth_date | \n",
833 | " Birth_Length | \n",
834 | " Birth_Weight | \n",
835 | " Delivery_Mode | \n",
836 | " NICU_Stay | \n",
837 | " Threaten_Life | \n",
838 | " Threaten_Baby_Danger | \n",
839 | " Threaten_Baby_Harm | \n",
840 | "
\n",
841 | " \n",
842 | " \n",
843 | " \n",
844 | " | 0 | \n",
845 | " 1 | \n",
846 | " 38.3 | \n",
847 | " High | \n",
848 | " Masters degree | \n",
849 | " 9.0 | \n",
850 | " 13.0 | \n",
851 | " 39.71 | \n",
852 | " Dec2020 | \n",
853 | " 49.20 | \n",
854 | " 3431.0 | \n",
855 | " Vaginally | \n",
856 | " No | \n",
857 | " 2 | \n",
858 | " 3 | \n",
859 | " 27 | \n",
860 | "
\n",
861 | " \n",
862 | " | 1 | \n",
863 | " 4 | \n",
864 | " 28.8 | \n",
865 | " Middle | \n",
866 | " Masters degree | \n",
867 | " 9.0 | \n",
868 | " 20.0 | \n",
869 | " 38.57 | \n",
870 | " Dec2020 | \n",
871 | " 41.00 | \n",
872 | " 2534.0 | \n",
873 | " Vaginally | \n",
874 | " No | \n",
875 | " 53 | \n",
876 | " 67 | \n",
877 | " 54 | \n",
878 | "
\n",
879 | " \n",
880 | " | 2 | \n",
881 | " 5 | \n",
882 | " 36.5 | \n",
883 | " Low | \n",
884 | " Undergraduate degree | \n",
885 | " 14.0 | \n",
886 | " 20.0 | \n",
887 | " 39.86 | \n",
888 | " Oct2020 | \n",
889 | " 53.34 | \n",
890 | " 3714.0 | \n",
891 | " Caesarean-section (c-section) | \n",
892 | " No | \n",
893 | " 23 | \n",
894 | " 32 | \n",
895 | " 71 | \n",
896 | "
\n",
897 | " \n",
898 | " | 3 | \n",
899 | " 9 | \n",
900 | " 33.1 | \n",
901 | " Middle | \n",
902 | " College/trade school | \n",
903 | " 1.0 | \n",
904 | " 7.0 | \n",
905 | " 40.86 | \n",
906 | " Nov2020 | \n",
907 | " 55.88 | \n",
908 | " 4480.0 | \n",
909 | " Vaginally | \n",
910 | " No | \n",
911 | " 27 | \n",
912 | " 76 | \n",
913 | " 72 | \n",
914 | "
\n",
915 | " \n",
916 | " | 4 | \n",
917 | " 14 | \n",
918 | " 29.2 | \n",
919 | " Middle | \n",
920 | " Masters degree | \n",
921 | " 14.0 | \n",
922 | " 17.0 | \n",
923 | " 41.00 | \n",
924 | " Oct2020 | \n",
925 | " 47.00 | \n",
926 | " 3084.0 | \n",
927 | " Vaginally | \n",
928 | " No | \n",
929 | " 68 | \n",
930 | " 69 | \n",
931 | " 81 | \n",
932 | "
\n",
933 | " \n",
934 | "
\n",
935 | "
"
936 | ],
937 | "text/plain": [
938 | " OSF_ID mat_age income mat_edu depression anxiety \\\n",
939 | "0 1 38.3 High Masters degree 9.0 13.0 \n",
940 | "1 4 28.8 Middle Masters degree 9.0 20.0 \n",
941 | "2 5 36.5 Low Undergraduate degree 14.0 20.0 \n",
942 | "3 9 33.1 Middle College/trade school 1.0 7.0 \n",
943 | "4 14 29.2 Middle Masters degree 14.0 17.0 \n",
944 | "\n",
945 | " birth_age birth_date Birth_Length Birth_Weight \\\n",
946 | "0 39.71 Dec2020 49.20 3431.0 \n",
947 | "1 38.57 Dec2020 41.00 2534.0 \n",
948 | "2 39.86 Oct2020 53.34 3714.0 \n",
949 | "3 40.86 Nov2020 55.88 4480.0 \n",
950 | "4 41.00 Oct2020 47.00 3084.0 \n",
951 | "\n",
952 | " Delivery_Mode NICU_Stay Threaten_Life Threaten_Baby_Danger \\\n",
953 | "0 Vaginally No 2 3 \n",
954 | "1 Vaginally No 53 67 \n",
955 | "2 Caesarean-section (c-section) No 23 32 \n",
956 | "3 Vaginally No 27 76 \n",
957 | "4 Vaginally No 68 69 \n",
958 | "\n",
959 | " Threaten_Baby_Harm \n",
960 | "0 27 \n",
961 | "1 54 \n",
962 | "2 71 \n",
963 | "3 72 \n",
964 | "4 81 "
965 | ]
966 | },
967 | "execution_count": 25,
968 | "metadata": {},
969 | "output_type": "execute_result"
970 | }
971 | ],
972 | "source": [
973 | "def categorize_income(value, low_thresh, high_thresh):\n",
974 | " if pd.isna(value):\n",
975 | " return 'Unknown' # Handle NaN values as 'Unknown'\n",
976 | " elif value < low_thresh:\n",
977 | " return 'Low'\n",
978 | " elif low_thresh <= value < high_thresh:\n",
979 | " return 'Middle'\n",
980 | " else:\n",
981 | " return 'High'\n",
982 | "\n",
983 | "# Assuming 'df' is your dataframe and 'income' is the correct income column\n",
984 | "low_threshold = df['income'].quantile(0.33)\n",
985 | "high_threshold = df['income'].quantile(0.66)\n",
986 | "\n",
987 | "# Categorize incomes using the thresholds\n",
988 | "df['income'] = df['income'].apply(lambda x: categorize_income(x, low_threshold, high_threshold))\n",
989 | "\n",
990 | "# Display the updated DataFrame\n",
991 | "df.head()"
992 | ]
993 | },
994 | {
995 | "cell_type": "code",
996 | "execution_count": 26,
997 | "id": "a6d22f05",
998 | "metadata": {},
999 | "outputs": [
1000 | {
1001 | "data": {
1002 | "text/html": [
1003 | "\n",
1004 | "\n",
1017 | "
\n",
1018 | " \n",
1019 | " \n",
1020 | " | \n",
1021 | " OSF_ID | \n",
1022 | " mat_age | \n",
1023 | " income | \n",
1024 | " mat_edu | \n",
1025 | " depression | \n",
1026 | " anxiety | \n",
1027 | " birth_age | \n",
1028 | " birth_date | \n",
1029 | " Birth_Length | \n",
1030 | " Birth_Weight | \n",
1031 | " Delivery_Mode | \n",
1032 | " NICU_Stay | \n",
1033 | " Threaten_Life | \n",
1034 | " Threaten_Baby_Danger | \n",
1035 | " Threaten_Baby_Harm | \n",
1036 | "
\n",
1037 | " \n",
1038 | " \n",
1039 | " \n",
1040 | " | 0 | \n",
1041 | " 1 | \n",
1042 | " 38.3 | \n",
1043 | " High | \n",
1044 | " Masters degree | \n",
1045 | " 9.0 | \n",
1046 | " 13.0 | \n",
1047 | " 39.71 | \n",
1048 | " Dec2020 | \n",
1049 | " 49.20 | \n",
1050 | " 3431.0 | \n",
1051 | " Vaginally | \n",
1052 | " 0 | \n",
1053 | " 2 | \n",
1054 | " 3 | \n",
1055 | " 27 | \n",
1056 | "
\n",
1057 | " \n",
1058 | " | 1 | \n",
1059 | " 4 | \n",
1060 | " 28.8 | \n",
1061 | " Middle | \n",
1062 | " Masters degree | \n",
1063 | " 9.0 | \n",
1064 | " 20.0 | \n",
1065 | " 38.57 | \n",
1066 | " Dec2020 | \n",
1067 | " 41.00 | \n",
1068 | " 2534.0 | \n",
1069 | " Vaginally | \n",
1070 | " 0 | \n",
1071 | " 53 | \n",
1072 | " 67 | \n",
1073 | " 54 | \n",
1074 | "
\n",
1075 | " \n",
1076 | " | 2 | \n",
1077 | " 5 | \n",
1078 | " 36.5 | \n",
1079 | " Low | \n",
1080 | " Undergraduate degree | \n",
1081 | " 14.0 | \n",
1082 | " 20.0 | \n",
1083 | " 39.86 | \n",
1084 | " Oct2020 | \n",
1085 | " 53.34 | \n",
1086 | " 3714.0 | \n",
1087 | " Caesarean-section (c-section) | \n",
1088 | " 0 | \n",
1089 | " 23 | \n",
1090 | " 32 | \n",
1091 | " 71 | \n",
1092 | "
\n",
1093 | " \n",
1094 | " | 3 | \n",
1095 | " 9 | \n",
1096 | " 33.1 | \n",
1097 | " Middle | \n",
1098 | " College/trade school | \n",
1099 | " 1.0 | \n",
1100 | " 7.0 | \n",
1101 | " 40.86 | \n",
1102 | " Nov2020 | \n",
1103 | " 55.88 | \n",
1104 | " 4480.0 | \n",
1105 | " Vaginally | \n",
1106 | " 0 | \n",
1107 | " 27 | \n",
1108 | " 76 | \n",
1109 | " 72 | \n",
1110 | "
\n",
1111 | " \n",
1112 | " | 4 | \n",
1113 | " 14 | \n",
1114 | " 29.2 | \n",
1115 | " Middle | \n",
1116 | " Masters degree | \n",
1117 | " 14.0 | \n",
1118 | " 17.0 | \n",
1119 | " 41.00 | \n",
1120 | " Oct2020 | \n",
1121 | " 47.00 | \n",
1122 | " 3084.0 | \n",
1123 | " Vaginally | \n",
1124 | " 0 | \n",
1125 | " 68 | \n",
1126 | " 69 | \n",
1127 | " 81 | \n",
1128 | "
\n",
1129 | " \n",
1130 | "
\n",
1131 | "
"
1132 | ],
1133 | "text/plain": [
1134 | " OSF_ID mat_age income mat_edu depression anxiety \\\n",
1135 | "0 1 38.3 High Masters degree 9.0 13.0 \n",
1136 | "1 4 28.8 Middle Masters degree 9.0 20.0 \n",
1137 | "2 5 36.5 Low Undergraduate degree 14.0 20.0 \n",
1138 | "3 9 33.1 Middle College/trade school 1.0 7.0 \n",
1139 | "4 14 29.2 Middle Masters degree 14.0 17.0 \n",
1140 | "\n",
1141 | " birth_age birth_date Birth_Length Birth_Weight \\\n",
1142 | "0 39.71 Dec2020 49.20 3431.0 \n",
1143 | "1 38.57 Dec2020 41.00 2534.0 \n",
1144 | "2 39.86 Oct2020 53.34 3714.0 \n",
1145 | "3 40.86 Nov2020 55.88 4480.0 \n",
1146 | "4 41.00 Oct2020 47.00 3084.0 \n",
1147 | "\n",
1148 | " Delivery_Mode NICU_Stay Threaten_Life \\\n",
1149 | "0 Vaginally 0 2 \n",
1150 | "1 Vaginally 0 53 \n",
1151 | "2 Caesarean-section (c-section) 0 23 \n",
1152 | "3 Vaginally 0 27 \n",
1153 | "4 Vaginally 0 68 \n",
1154 | "\n",
1155 | " Threaten_Baby_Danger Threaten_Baby_Harm \n",
1156 | "0 3 27 \n",
1157 | "1 67 54 \n",
1158 | "2 32 71 \n",
1159 | "3 76 72 \n",
1160 | "4 69 81 "
1161 | ]
1162 | },
1163 | "execution_count": 26,
1164 | "metadata": {},
1165 | "output_type": "execute_result"
1166 | }
1167 | ],
1168 | "source": [
1169 | "# Convert 'Yes'/'No' to 1/0 in NICU_Stay\n",
1170 | "df['NICU_Stay'] = df['NICU_Stay'].map({'Yes': 1, 'No': 0})\n",
1171 | "df.head()"
1172 | ]
1173 | },
1174 | {
1175 | "cell_type": "code",
1176 | "execution_count": 27,
1177 | "id": "a8d8d6fd",
1178 | "metadata": {},
1179 | "outputs": [
1180 | {
1181 | "data": {
1182 | "text/html": [
1183 | "\n",
1184 | "\n",
1197 | "
\n",
1198 | " \n",
1199 | " \n",
1200 | " | \n",
1201 | " OSF_ID | \n",
1202 | " mat_age | \n",
1203 | " income | \n",
1204 | " mat_edu | \n",
1205 | " depression | \n",
1206 | " anxiety | \n",
1207 | " birth_age | \n",
1208 | " birth_date | \n",
1209 | " Birth_Length | \n",
1210 | " Birth_Weight | \n",
1211 | " Delivery_Mode | \n",
1212 | " NICU_Stay | \n",
1213 | " Threaten_Life | \n",
1214 | " Threaten_Baby_Danger | \n",
1215 | " Threaten_Baby_Harm | \n",
1216 | "
\n",
1217 | " \n",
1218 | " \n",
1219 | " \n",
1220 | " | 0 | \n",
1221 | " 1 | \n",
1222 | " 38.3 | \n",
1223 | " High | \n",
1224 | " Masters degree | \n",
1225 | " 9.0 | \n",
1226 | " 13.0 | \n",
1227 | " 39.71 | \n",
1228 | " NaT | \n",
1229 | " 49.20 | \n",
1230 | " 3431.0 | \n",
1231 | " Vaginally | \n",
1232 | " 0 | \n",
1233 | " 2 | \n",
1234 | " 3 | \n",
1235 | " 27 | \n",
1236 | "
\n",
1237 | " \n",
1238 | " | 1 | \n",
1239 | " 4 | \n",
1240 | " 28.8 | \n",
1241 | " Middle | \n",
1242 | " Masters degree | \n",
1243 | " 9.0 | \n",
1244 | " 20.0 | \n",
1245 | " 38.57 | \n",
1246 | " NaT | \n",
1247 | " 41.00 | \n",
1248 | " 2534.0 | \n",
1249 | " Vaginally | \n",
1250 | " 0 | \n",
1251 | " 53 | \n",
1252 | " 67 | \n",
1253 | " 54 | \n",
1254 | "
\n",
1255 | " \n",
1256 | " | 2 | \n",
1257 | " 5 | \n",
1258 | " 36.5 | \n",
1259 | " Low | \n",
1260 | " Undergraduate degree | \n",
1261 | " 14.0 | \n",
1262 | " 20.0 | \n",
1263 | " 39.86 | \n",
1264 | " NaT | \n",
1265 | " 53.34 | \n",
1266 | " 3714.0 | \n",
1267 | " Caesarean-section (c-section) | \n",
1268 | " 0 | \n",
1269 | " 23 | \n",
1270 | " 32 | \n",
1271 | " 71 | \n",
1272 | "
\n",
1273 | " \n",
1274 | " | 3 | \n",
1275 | " 9 | \n",
1276 | " 33.1 | \n",
1277 | " Middle | \n",
1278 | " College/trade school | \n",
1279 | " 1.0 | \n",
1280 | " 7.0 | \n",
1281 | " 40.86 | \n",
1282 | " NaT | \n",
1283 | " 55.88 | \n",
1284 | " 4480.0 | \n",
1285 | " Vaginally | \n",
1286 | " 0 | \n",
1287 | " 27 | \n",
1288 | " 76 | \n",
1289 | " 72 | \n",
1290 | "
\n",
1291 | " \n",
1292 | " | 4 | \n",
1293 | " 14 | \n",
1294 | " 29.2 | \n",
1295 | " Middle | \n",
1296 | " Masters degree | \n",
1297 | " 14.0 | \n",
1298 | " 17.0 | \n",
1299 | " 41.00 | \n",
1300 | " NaT | \n",
1301 | " 47.00 | \n",
1302 | " 3084.0 | \n",
1303 | " Vaginally | \n",
1304 | " 0 | \n",
1305 | " 68 | \n",
1306 | " 69 | \n",
1307 | " 81 | \n",
1308 | "
\n",
1309 | " \n",
1310 | "
\n",
1311 | "
"
1312 | ],
1313 | "text/plain": [
1314 | " OSF_ID mat_age income mat_edu depression anxiety \\\n",
1315 | "0 1 38.3 High Masters degree 9.0 13.0 \n",
1316 | "1 4 28.8 Middle Masters degree 9.0 20.0 \n",
1317 | "2 5 36.5 Low Undergraduate degree 14.0 20.0 \n",
1318 | "3 9 33.1 Middle College/trade school 1.0 7.0 \n",
1319 | "4 14 29.2 Middle Masters degree 14.0 17.0 \n",
1320 | "\n",
1321 | " birth_age birth_date Birth_Length Birth_Weight \\\n",
1322 | "0 39.71 NaT 49.20 3431.0 \n",
1323 | "1 38.57 NaT 41.00 2534.0 \n",
1324 | "2 39.86 NaT 53.34 3714.0 \n",
1325 | "3 40.86 NaT 55.88 4480.0 \n",
1326 | "4 41.00 NaT 47.00 3084.0 \n",
1327 | "\n",
1328 | " Delivery_Mode NICU_Stay Threaten_Life \\\n",
1329 | "0 Vaginally 0 2 \n",
1330 | "1 Vaginally 0 53 \n",
1331 | "2 Caesarean-section (c-section) 0 23 \n",
1332 | "3 Vaginally 0 27 \n",
1333 | "4 Vaginally 0 68 \n",
1334 | "\n",
1335 | " Threaten_Baby_Danger Threaten_Baby_Harm \n",
1336 | "0 3 27 \n",
1337 | "1 67 54 \n",
1338 | "2 32 71 \n",
1339 | "3 76 72 \n",
1340 | "4 69 81 "
1341 | ]
1342 | },
1343 | "execution_count": 27,
1344 | "metadata": {},
1345 | "output_type": "execute_result"
1346 | }
1347 | ],
1348 | "source": [
1349 | "df['birth_date'] = pd.to_datetime(df['birth_date'], format='%Y-%m-%d', errors='coerce')\n",
1350 | "df.head()"
1351 | ]
1352 | },
1353 | {
1354 | "cell_type": "code",
1355 | "execution_count": 28,
1356 | "id": "039e94db",
1357 | "metadata": {},
1358 | "outputs": [
1359 | {
1360 | "data": {
1361 | "text/plain": [
1362 | "(5176, 15)"
1363 | ]
1364 | },
1365 | "execution_count": 28,
1366 | "metadata": {},
1367 | "output_type": "execute_result"
1368 | }
1369 | ],
1370 | "source": [
1371 | "df.shape"
1372 | ]
1373 | }
1374 | ],
1375 | "metadata": {
1376 | "kernelspec": {
1377 | "display_name": "Python 3 (ipykernel)",
1378 | "language": "python",
1379 | "name": "python3"
1380 | },
1381 | "language_info": {
1382 | "codemirror_mode": {
1383 | "name": "ipython",
1384 | "version": 3
1385 | },
1386 | "file_extension": ".py",
1387 | "mimetype": "text/x-python",
1388 | "name": "python",
1389 | "nbconvert_exporter": "python",
1390 | "pygments_lexer": "ipython3",
1391 | "version": "3.9.5"
1392 | }
1393 | },
1394 | "nbformat": 4,
1395 | "nbformat_minor": 5
1396 | }
1397 |
--------------------------------------------------------------------------------