├── .gitignore
├── CITATION.cff
├── LICENSE
├── README.md
├── assignments
├── assignment_yourname_t81_558_class1.ipynb
├── assignment_yourname_t81_558_class10.ipynb
├── assignment_yourname_t81_558_class2.ipynb
├── assignment_yourname_t81_558_class3.ipynb
├── assignment_yourname_t81_558_class4.ipynb
├── assignment_yourname_t81_558_class5.ipynb
├── assignment_yourname_t81_558_class6.ipynb
├── assignment_yourname_t81_558_class7.ipynb
├── assignment_yourname_t81_558_class8.ipynb
└── assignment_yourname_t81_558_class9.ipynb
├── citations.bib
├── copyright.md
├── install
├── pytorch-install-aug-2023.ipynb
├── torch-conda.yml
├── torch-cuda.yml
└── torch.yml
├── intro.md
├── jeffs_helpful.ipynb
├── mpg.pkl
├── person.json
├── t81_558_class_01_1_overview.ipynb
├── t81_558_class_01_2_intro_python.ipynb
├── t81_558_class_01_3_python_collections.ipynb
├── t81_558_class_01_4_python_files.ipynb
├── t81_558_class_01_5_python_functional.ipynb
├── t81_558_class_02_1_python_pandas.ipynb
├── t81_558_class_02_2_pandas_cat.ipynb
├── t81_558_class_02_3_pandas_grouping.ipynb
├── t81_558_class_02_4_pandas_functional.ipynb
├── t81_558_class_02_5_pandas_features.ipynb
├── t81_558_class_03_1_neural_net.ipynb
├── t81_558_class_03_2_pytorch.ipynb
├── t81_558_class_03_3_feature_encode.ipynb
├── t81_558_class_03_4_early_stop.ipynb
├── t81_558_class_03_5_pytorch_class_sequence.ipynb
├── t81_558_class_04_1_kfold.ipynb
├── t81_558_class_04_2_schedule.ipynb
├── t81_558_class_04_3_dropout.ipynb
├── t81_558_class_04_4_batch_norm.ipynb
├── t81_558_class_04_5_rapids.ipynb
├── t81_558_class_05_1_python_images.ipynb
├── t81_558_class_05_2_cnn.ipynb
├── t81_558_class_05_3_vision_transfer.ipynb
├── t81_558_class_05_4_generators.ipynb
├── t81_558_class_05_5_yolo.ipynb
├── t81_558_class_06_1_transformers.ipynb
├── t81_558_class_06_2_chat_gpt.ipynb
├── t81_558_class_06_3_llm_memory.ipynb
├── t81_558_class_06_4_embedding.ipynb
├── t81_558_class_06_5_prompt_engineering.ipynb
├── t81_558_class_07_1_img_generative.ipynb
├── t81_558_class_07_2_gan_intro.ipynb
├── t81_558_class_07_3_deoldify.ipynb
├── t81_558_class_07_4_stable_diff.ipynb
├── t81_558_class_07_5_dream_booth.ipynb
├── t81_558_class_08_1_kaggle_intro.ipynb
├── t81_558_class_08_2_pytorch_ensembles.ipynb
├── t81_558_class_08_3_pytorch_hyperparameters.ipynb
├── t81_558_class_08_4_bayesian_hyperparameter_opt.ipynb
├── t81_558_class_08_5_kaggle_project.ipynb
├── t81_558_class_09_1_faces.ipynb
├── t81_558_class_09_2_face_features.ipynb
├── t81_558_class_09_3_reality_augmentation.ipynb
├── t81_558_class_09_4_emotion.ipynb
├── t81_558_class_09_5_blink.ipynb
├── t81_558_class_10_1_timeseries.ipynb
├── t81_558_class_10_2_lstm.ipynb
├── t81_558_class_10_3_transformer_timeseries.ipynb
├── t81_558_class_10_4_seasonal.ipynb
├── t81_558_class_10_5_prophet.ipynb
├── t81_558_class_11_1_hf.ipynb
├── t81_558_class_11_2_py_huggingface.ipynb
├── t81_558_class_11_3_tokenizers.ipynb
├── t81_558_class_11_4_hf_datasets.ipynb
├── t81_558_class_11_5_hf_train.ipynb
├── t81_558_class_12_1_ai_gym.ipynb
├── t81_558_class_12_2_qlearningreinforcement.ipynb
├── t81_558_class_12_3_pytorch_reinforce.ipynb
├── t81_558_class_12_4_atari.ipynb
├── t81_558_class_12_5_rl_future.ipynb
├── t81_558_class_13_1_auto_encode.ipynb
├── t81_558_class_13_2_anomaly.ipynb
├── t81_558_class_13_3_retrain.ipynb
├── t81_558_class_13_4_tpu.ipynb
├── t81_558_class_13_5_new_tech.ipynb
└── t81_559_class_08_4_kaggle_llm.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | *.code-workspace
2 | submit.csv
3 | .vscode
4 | .DS_Store
5 | # Byte-compiled / optimized / DLL files
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 |
10 | # C extensions
11 | *.so
12 |
13 | # Distribution / packaging
14 | .Python
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | share/python-wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .nox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *.cover
53 | *.py,cover
54 | .hypothesis/
55 | .pytest_cache/
56 | cover/
57 |
58 | # Translations
59 | *.mo
60 | *.pot
61 |
62 | # Django stuff:
63 | *.log
64 | local_settings.py
65 | db.sqlite3
66 | db.sqlite3-journal
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | .pybuilder/
80 | target/
81 |
82 | # Jupyter Notebook
83 | .ipynb_checkpoints
84 |
85 | # IPython
86 | profile_default/
87 | ipython_config.py
88 |
89 | # pyenv
90 | # For a library or package, you might want to ignore these files since the code is
91 | # intended to run in multiple environments; otherwise, check them in:
92 | # .python-version
93 |
94 | # pipenv
95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
98 | # install all needed dependencies.
99 | #Pipfile.lock
100 |
101 | # poetry
102 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103 | # This is especially recommended for binary packages to ensure reproducibility, and is more
104 | # commonly ignored for libraries.
105 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106 | #poetry.lock
107 |
108 | # pdm
109 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110 | #pdm.lock
111 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112 | # in version control.
113 | # https://pdm.fming.dev/#use-with-ide
114 | .pdm.toml
115 |
116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117 | __pypackages__/
118 |
119 | # Celery stuff
120 | celerybeat-schedule
121 | celerybeat.pid
122 |
123 | # SageMath parsed files
124 | *.sage.py
125 |
126 | # Environments
127 | .env
128 | .venv
129 | env/
130 | venv/
131 | ENV/
132 | env.bak/
133 | venv.bak/
134 |
135 | # Spyder project settings
136 | .spyderproject
137 | .spyproject
138 |
139 | # Rope project settings
140 | .ropeproject
141 |
142 | # mkdocs documentation
143 | /site
144 |
145 | # mypy
146 | .mypy_cache/
147 | .dmypy.json
148 | dmypy.json
149 |
150 | # Pyre type checker
151 | .pyre/
152 |
153 | # pytype static type analyzer
154 | .pytype/
155 |
156 | # Cython debug symbols
157 | cython_debug/
158 |
159 | # PyCharm
160 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162 | # and can be added to the global gitignore or merged into this file. For a more nuclear
163 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
164 | #.idea/
165 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | message: "If you use this software, please cite it as below."
3 | authors:
4 | - family-names: "Jeff"
5 | given-names: "Heaton"
6 | orcid: "https://orcid.org/0000-0003-1496-4049"
7 | title: "Applications of Deep Neural Networks"
8 | version: 2021.08.01
9 | date-released: 2021-08-01
10 | url: "https://arxiv.org/abs/2009.05673"
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # T81 558:Applications of Deep Neural Networks
2 |
3 | [Washington University in St. Louis](http://www.wustl.edu)
4 |
5 | Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/)
6 |
7 | - Section 1. Spring 2025, Wednesday, 2:30 PM, Location: Mallinckrodt / 305
8 |
9 | # Course Description
10 |
11 | Deep learning is a group of exciting new technologies for neural networks. Through a combination of advanced training techniques and neural network architectural components, it is now possible to create neural networks that can handle tabular data, images, text, and audio as both input and output. Deep learning allows a neural network to learn hierarchies of information in a way that is like the function of the human brain. This course will introduce the student to classic neural network structures, Convolution Neural Networks (CNN), Long Short-Term Memory (LSTM), Gated Recurrent Neural Networks (GRU), General Adversarial Networks (GAN) and reinforcement learning. Application of these architectures to computer vision, time series, security, natural language processing (NLP), and data generation will be covered. High Performance Computing (HPC) aspects will demonstrate how deep learning can be leveraged both on graphical processing units (GPUs), as well as grids. Focus is primarily upon the application of deep learning to problems, with some introduction to mathematical foundations. Students will use the Python programming language to implement deep learning using PyTorch. It is not necessary to know Python prior to this course; however, familiarity of at least one programming language is assumed. This course will be delivered in a hybrid format that includes both classroom and online instruction.
12 |
13 | # Objectives
14 |
15 | 1. Explain how neural networks (deep and otherwise) compare to other machine learning models.
16 | 2. Determine when a deep neural network would be a good choice for a particular problem.
17 | 3. Demonstrate your understanding of the material through a final project uploaded to GitHub.
18 |
19 | # Syllabus
20 |
21 | This syllabus presents the expected class schedule, due dates, and reading assignments. [Download current syllabus](https://s3.amazonaws.com/data.heatonresearch.com/wustl/syllabus/jheaton-t81-558-spring-2025-syllabus.pdf)
22 |
23 | | Module | Content |
24 | | ------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
25 | | [Module 1](t81_558_class_01_1_overview.ipynb)
Week of 06/09/2025 | **Module 1: Python Preliminaries**
- 1.1: Course Overview
- 1.2: Introduction to Python
- 1.3: Python Lists, Dictionaries, Sets & JSON
- 1.4: File Handling
- 1.5: Functions, Lambdas, and Map/ReducePython Preliminaries
|
26 | | [Module 2](t81_558_class_02_1_python_pandas.ipynb)
Week of 06/16/2025 | **Module 2: Python for Machine Learning**- 2.1: Introduction to Pandas for Deep Learning
- 2.2: Encoding Categorical Values in Pandas
- 2.3: Grouping, Sorting, and Shuffling
- 2.4: Using Apply and Map in Pandas
- 2.5: Feature Engineering in Padas
- [Module 1 Program](./assignments/assignment_yourname_t81_558_class1.ipynb) due: 06/17/2025
- Icebreaker due: 06/17/2025
|
27 | | [Module 3](t81_558_class_03_1_neural_net.ipynb)
Week of 06/16/2025 | **Module 3: PyTorch for Neural Networks**- 3.1: Deep Learning and Neural Network Introduction
- 3.2: Introduction to PyTorch
- 3.3: Encoding a Feature Vector for PyTorch Deep Learning
- 3.4: Early Stopping and Network Persistence
- 3.5: Sequences vs Classes in PyTorch
- [Module 2: Program](./assignments/assignment_yourname_t81_558_class2.ipynb) due: 06/17/2025
|
28 | | [Module 4](t81_558_class_04_1_kfold.ipynb)
Week of 06/23/2025 | **Module 4: Training for Tabular Data**- 4.1: Using K-Fold Cross-validation with PyTorch
- 4.2: Training Schedules for PyTorch
- 4.3: Dropout Regularization
- 4.4: Batch Normalization
- 4.5: RAPIDS for Tabular Data
- [Module 3 Program](./assignments/assignment_yourname_t81_558_class3.ipynb) due: 06/24/2025
|
29 | | [Module 5](t81_558_class_05_1_python_images.ipynb)
Week of 06/23/2025 | **Module 5: CNN and Computer Vision**- 5.1 Image Processing in Python
- 5.2 Using Convolutional Neural Networks
- 5.3 Using Pretrained Neural Networks
- 5.4 Looking at Generators and Image Augmentation
- 5.5 Recognizing Multiple Images with YOLO
- [Module 4 Program](./assignments/assignment_yourname_t81_558_class4.ipynb) due: 06/24/2025
|
30 | | [Module 6](t81_558_class_06_1_transformers.ipynb)
Week of 06/30/2025 | **Module 6: ChatGPT and Large Language Models**- 6.1: Introduction to Transformers
- 6.2: Accessing the ChatGPT API
- 6.3: LLM Memory
- 6.4: Introduction to Embeddings
- 6.5: Prompt Engineering
- [Module 5 Program](./assignments/assignment_yourname_t81_558_class5.ipynb) due: 07/01/2025
|
31 | | [Module 7](t81_558_class_07_1_img_generative.ipynb)
Week of 06/30/2025 | **Module 7: Image Generative Models**- 7.1: Introduction to Generative AI
- 7.2: Generating Faces with StyleGAN3
- 7.3: GANS to Enhance Old Photographs Deoldify
- 7.4: Text to Images with StableDiffusion
- 7.5: Finetuning with Dreambooth
- [Module 6 Program](./assignments/assignment_yourname_t81_558_class6.ipynb) due: 07/01/2025
|
32 | | [Module 8](t81_558_class_08_1_kaggle_intro.ipynb)
Week of 07/07/2025 | **Module 8: Kaggle**- 8.1 Introduction to Kaggle
- 8.2 Building Ensembles with Scikit-Learn and PyTorch
- 8.3 How Should you Architect Your PyTorch Neural Network: Hyperparameters
- 8.4 Bayesian Hyperparameter Optimization for PyTorch
- 8.5 Current Semester's Kaggle
- [Module 7 Program](./assignments/assignment_yourname_t81_558_class7.ipynb) due: 07/08/2025
|
33 | | [Module 9](t81_558_class_09_1_faces.ipynb)
Week of 07/14/2025 | **Module 9: Facial Recognition**- 9.1 Detecting Faces in an Image
- 9.2 Detecting Facial Features
- 9.3 Image Augmentation
- 9.4 Application: Emotion Detection
- 9.5 Application: Blink Efficiency
- [Module 8 Assignment](./assignments/assignment_yourname_t81_558_class8.ipynb) due: 07/15/2025
|
34 | | [Module 10](t81_558_class_10_1_timeseries.ipynb)
Week of 07/14/2025 | **Module 10: Time Series in PyTorch**- Time Series Data Encoding for Deep Learning, PyTorch
- Seasonality and Trend
- LSTM-Based Time Series with PyTorch
- CNN-Based Time Series with PyTorch
- Predicting with Meta Prophet
- [Module 9 Program](./assignments/assignment_yourname_t81_558_class9.ipynb) due: 07/15/2025
|
35 | | [Module 11](t81_558_class_11_1_hf.ipynb)
Week of 07/21/2025 | **Module 11: Natural Language Processing**- 11.1 Introduction to Natural Language Processing
- 11.2 Hugging Face Introduction
- 11.3 Hugging Face Tokenizers
- 11.4 Hugging Face Data Sets
- 11.5 Training a Model in Hugging Face
- [Module 10 Program](./assignments/assignment_yourname_t81_558_class10.ipynb) due: 07/22/2025
|
36 | | [Module 12](t81_558_class_12_1_ai_gym.ipynb)
Week of 07/21/2025 | **Module 12: Reinforcement Learning**- Introduction to Gymnasium
- Introduction to Q-Learning
- Stable Baselines Q-Learning
- Atari Games with Stable Baselines Neural Networks
- Future of Reinforcement Learning
|
37 | | [Module 13](t81_558_class_13_1_auto_encode.ipynb)
Week of 07/21/2025 | **Module 13: Deployment and Monitoring**- 13.1: Using Denoising AutoEncoders
- 13.2: Anomaly Detection
- 13.3: Model Drift and Retraining
- 13.4: Tensor Processing Units (TPUs)
- 13.5: Future Directions in Artificial Intelligence
- Kaggle Assignment due: 07/22/2025
|
38 | | Week 14
**Meet on 07/28/2025** | **Week 14: Kaggle Presentations**- Top Kaggle teams will present
- **We will meet on campus this week! (fourth meeting)**
- Final project due: 07/29/2025
|
39 |
40 | # Datasets
41 |
42 | - [Datasets can be downloaded here](https://data.heatonresearch.com/data/t81-558/index.html)
43 |
--------------------------------------------------------------------------------
/assignments/assignment_yourname_t81_558_class10.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "VB1NIAB8FIF1"
7 | },
8 | "source": [
9 | "
"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {
15 | "id": "BI9Um_OWFIF3"
16 | },
17 | "source": [
18 | "# T81-558: Applications of Deep Neural Networks\n",
19 | "* Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), McKelvey School of Engineering, [Washington University in St. Louis](https://engineering.wustl.edu/index.html)\n",
20 | "* For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/).\n",
21 | "\n",
22 | "**Module 10 Assignment: Time Series Neural Network**\n",
23 | "\n",
24 | "**Student Name: Your Name**"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {
30 | "id": "ev33f_x8FIF3"
31 | },
32 | "source": [
33 | "# Assignment Instructions\n",
34 | "\n",
35 | "For this assignment, you will use an LSTM to predict a time series contained in the data file **[series-31-num.csv](https://data.heatonresearch.com/data/t81-558/datasets/series-31-num.csv)**. The code that you will use to complete this will be similar to the sunspots example from the course module. This data set contains two columns: *time* and *value*. Create an LSTM network and train it with a sequence size of 5 and a prediction window of 1. If you use a different sequence size, you will not have the correct number of submission rows. Train the neural network, the data set is relatively simple, and you should easily be able to get an RMSE below 1.0. FYI, I generate this dataset by fitting a cubic spline to a series of random points.\n",
36 | "\n",
37 | "This file contains a time series data set, do not randomize the order of the rows! For your training data, use all *time* values less than 3000, and for the test, use the remaining amounts greater than or equal to 3000. For the submit file, please send me the results of your test evaluation. You should have two columns: *time* and *value*. The column *time* should be the time at the beginning of each predicted sequence. The *value* should be the next value that your neural network predicted for each of the sequences.\n",
38 | "\n",
39 | "Your submission file will look similar to:\n",
40 | "\n",
41 | "|time|value|\n",
42 | "|-|-|\n",
43 | "|3000|37.022846|\n",
44 | "|3001|37.030582|\n",
45 | "|3002|37.03816|\n",
46 | "|3003|37.045563|\n",
47 | "|3004|37.0528|\n",
48 | "|...|...|"
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {
54 | "id": "UHyBC1btFIF4"
55 | },
56 | "source": [
57 | "# Google CoLab Instructions\n",
58 | "\n",
59 | "If you are using Google CoLab, it will be necessary to mount your GDrive so that you can send your notebook during the submit process. Running the following code will map your GDrive to ```/content/drive```."
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": null,
65 | "metadata": {
66 | "colab": {
67 | "base_uri": "https://localhost:8080/"
68 | },
69 | "id": "g5X_AgKwFIF4",
70 | "outputId": "7fb273f5-5a36-4ee5-fe78-69c2a536c7be"
71 | },
72 | "outputs": [],
73 | "source": [
74 | "try:\n",
75 | " from google.colab import drive, userdata\n",
76 | " drive.mount('/content/drive', force_remount=True)\n",
77 | " COLAB = True\n",
78 | " print(\"Note: using Google CoLab\")\n",
79 | "except:\n",
80 | " print(\"Note: not using Google CoLab\")\n",
81 | " COLAB = False\n",
82 | "\n",
83 | "# Assignment Submission Key - Was sent you first week of class.\n",
84 | "# If you are in both classes, this is the same key.\n",
85 | "if COLAB:\n",
86 | " # For Colab, add to your \"Secrets\" (key icon at the left)\n",
87 | " key = userdata.get('T81_558_KEY')\n",
88 | "else:\n",
89 | " # If not colab, enter your key here, or use an environment variable.\n",
90 | " # (this is only an example key, use yours)\n",
91 | " key = \"Gx5en9cEVvaZnjhdaushddhuhhO4PsI32sgldAXj\""
92 | ]
93 | },
94 | {
95 | "cell_type": "markdown",
96 | "metadata": {
97 | "id": "XqrObrUzFIF6"
98 | },
99 | "source": [
100 | "# Assignment Submit Function\n",
101 | "\n",
102 | "You will submit the ten programming assignments electronically. The following **submit** function can be used to do this. My server will perform a basic check of each assignment and let you know if it sees any underlying problems.\n",
103 | "\n",
104 | "**It is unlikely that should need to modify this function.**"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": null,
110 | "metadata": {
111 | "id": "gpNq6djKFIF6"
112 | },
113 | "outputs": [],
114 | "source": [
115 | "import base64\n",
116 | "import os\n",
117 | "import numpy as np\n",
118 | "import pandas as pd\n",
119 | "import requests\n",
120 | "import PIL\n",
121 | "import PIL.Image\n",
122 | "import io\n",
123 | "from typing import List, Union\n",
124 | "\n",
125 | "# This function submits an assignment. You can submit an assignment as much as you like, only the final\n",
126 | "# submission counts. The paramaters are as follows:\n",
127 | "# data - List of pandas dataframes or images.\n",
128 | "# key - Your student key that was emailed to you.\n",
129 | "# course - The course that you are in, currently t81-558 or t81-559.\n",
130 | "# no - The assignment class number, should be 1 through 10.\n",
131 | "# source_file - The full path to your Python or IPYNB file. This must have \"_class1\" as part of its name.\n",
132 | "# . The number must match your assignment number. For example \"_class2\" for class assignment #2.\n",
133 | "\n",
134 | "def submit(\n",
135 | " data: List[Union[pd.DataFrame, PIL.Image.Image]],\n",
136 | " key: str,\n",
137 | " course: str,\n",
138 | " no: int,\n",
139 | " source_file: str = None\n",
140 | ") -> None:\n",
141 | " if source_file is None and '__file__' not in globals():\n",
142 | " raise Exception(\"Must specify a filename when in a Jupyter notebook.\")\n",
143 | " if source_file is None:\n",
144 | " source_file = __file__\n",
145 | "\n",
146 | " suffix = f'_class{no}'\n",
147 | " if suffix not in source_file:\n",
148 | " raise Exception(f\"{suffix} must be part of the filename.\")\n",
149 | "\n",
150 | " ext = os.path.splitext(source_file)[-1].lower()\n",
151 | " if ext not in ['.ipynb', '.py']:\n",
152 | " raise Exception(f\"Source file is {ext}; must be .py or .ipynb\")\n",
153 | "\n",
154 | " with open(source_file, \"rb\") as file:\n",
155 | " encoded_python = base64.b64encode(file.read()).decode('ascii')\n",
156 | "\n",
157 | " payload = []\n",
158 | " for item in data:\n",
159 | " if isinstance(item, PIL.Image.Image):\n",
160 | " buffered = io.BytesIO()\n",
161 | " item.save(buffered, format=\"PNG\")\n",
162 | " payload.append({'PNG': base64.b64encode(buffered.getvalue()).decode('ascii')})\n",
163 | " elif isinstance(item, pd.DataFrame):\n",
164 | " payload.append({'CSV': base64.b64encode(item.to_csv(index=False).encode('ascii')).decode(\"ascii\")})\n",
165 | " else:\n",
166 | " raise ValueError(f\"Unsupported data type: {type(item)}\")\n",
167 | "\n",
168 | " response = requests.post(\n",
169 | " \"https://api.heatonresearch.com/wu/submit\",\n",
170 | " headers={'x-api-key': key},\n",
171 | " json={\n",
172 | " 'payload': payload,\n",
173 | " 'assignment': no,\n",
174 | " 'course': course,\n",
175 | " 'ext': ext,\n",
176 | " 'py': encoded_python\n",
177 | " }\n",
178 | " )\n",
179 | "\n",
180 | " if response.status_code == 200:\n",
181 | " print(f\"Success: {response.text}\")\n",
182 | " else:\n",
183 | " print(f\"Failure: {response.text}\")"
184 | ]
185 | },
186 | {
187 | "cell_type": "markdown",
188 | "metadata": {
189 | "collapsed": true,
190 | "id": "85XMCYK_FIF6",
191 | "jupyter": {
192 | "outputs_hidden": true
193 | }
194 | },
195 | "source": [
196 | "# Assignment #10 Sample Code\n",
197 | "\n",
198 | "The following code provides a starting point for this assignment."
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": null,
204 | "metadata": {
205 | "colab": {
206 | "base_uri": "https://localhost:8080/"
207 | },
208 | "id": "2VNENmy-FIF6",
209 | "outputId": "2aa22014-0ad8-4efb-8a61-7ca4f891b5a5"
210 | },
211 | "outputs": [],
212 | "source": [
213 | "import numpy as np\n",
214 | "def to_sequences(seq_size, obs):\n",
215 | " x = []\n",
216 | " y = []\n",
217 | "\n",
218 | " for i in range(len(obs)-SEQUENCE_SIZE):\n",
219 | " #print(i)\n",
220 | " window = obs[i:(i+SEQUENCE_SIZE)]\n",
221 | " after_window = obs[i+SEQUENCE_SIZE]\n",
222 | " window = [[x] for x in window]\n",
223 | " #print(\"{} - {}\".format(window,after_window))\n",
224 | " x.append(window)\n",
225 | " y.append(after_window)\n",
226 | "\n",
227 | " return np.array(x),np.array(y)\n",
228 | "\n",
229 | "# You must identify your source file. (modify for your local setup)\n",
230 | "file=\"/content/drive/My Drive/Colab Notebooks/assignment_yourname_t81_558_class10.ipynb\" # Google CoLab\n",
231 | "# file='C:\\\\Users\\\\jeffh\\\\projects\\\\t81_558_deep_learning\\\\assignments\\\\assignment_yourname_class10.ipynb' # Windows\n",
232 | "# file='/Users/jheaton/projects/t81_558_deep_learning/assignments/assignment_yourname_class10.ipynb' # Mac/Linux\n",
233 | "\n",
234 | "\n",
235 | "## ... continue your code...\n",
236 | "\n",
237 | "## Submit assignment\n",
238 | "submit(source_file=file,data=[submit_df],key=key,no=10,course='t81-558')"
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": null,
244 | "metadata": {
245 | "id": "OnWNwhUnFIF7"
246 | },
247 | "outputs": [],
248 | "source": []
249 | }
250 | ],
251 | "metadata": {
252 | "anaconda-cloud": {},
253 | "colab": {
254 | "provenance": []
255 | },
256 | "kernelspec": {
257 | "display_name": "Python 3.11 (torch)",
258 | "language": "python",
259 | "name": "pytorch"
260 | },
261 | "language_info": {
262 | "codemirror_mode": {
263 | "name": "ipython",
264 | "version": 3
265 | },
266 | "file_extension": ".py",
267 | "mimetype": "text/x-python",
268 | "name": "python",
269 | "nbconvert_exporter": "python",
270 | "pygments_lexer": "ipython3",
271 | "version": "3.11.9"
272 | }
273 | },
274 | "nbformat": 4,
275 | "nbformat_minor": 4
276 | }
277 |
--------------------------------------------------------------------------------
/assignments/assignment_yourname_t81_558_class2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "SOw8QtGz46i0"
7 | },
8 | "source": [
9 | "
"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {
15 | "id": "DUPrEctE46i2"
16 | },
17 | "source": [
18 | "# T81-558: Applications of Deep Neural Networks\n",
19 | "* Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), School of Engineering and Applied Science, [Washington University in St. Louis](https://engineering.wustl.edu/Programs/Pages/default.aspx)\n",
20 | "* For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/).\n",
21 | "\n",
22 | "**Module 2 Assignment: Creating Columns in Pandas**\n",
23 | "\n",
24 | "**Student Name: Your Name**"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {
30 | "id": "ig-OGc3e46i3"
31 | },
32 | "source": [
33 | "# Assignment Instructions\n",
34 | "\n",
35 | "For this assignment, you will use the **reg-36-data.csv** dataset. This file contains a dataset that I generated specifically for this class. You can find the CSV file on my data site, at this location: [reg-36-data.csv](http://data.heatonresearch.com/data/t81-558/datasets/reg-36-data.csv).\n",
36 | "\n",
37 | "For this assignment, load and modify the data set. You will submit this modified dataset to the **submit** function. See [Assignment #1](https://github.com/jeffheaton/t81_558_deep_learning/blob/master/assignments/assignment_yourname_class1.ipynb) for details on how to submit an assignment or check that one was submitted.\n",
38 | "\n",
39 | "Modify the dataset as follows:\n",
40 | "\n",
41 | "* Add a column named *ratio* that is *max* divided by *number*. Leave *max* and *number* in the dataframe.\n",
42 | "* Replace the *cat2* column with dummy variables. e.g. 'cat2_CA-0', 'cat2_CA-1',\n",
43 | " 'cat2_CA-10', 'cat2_CA-11', 'cat2_CA-12', ...\n",
44 | "* Replace the *item* column with dummy variables, e.g. 'item_IT-0', 'item_IT-1',\n",
45 | " 'item_IT-10', 'item_IT-11', 'item_IT-12', ...\n",
46 | "* For field *length* replace missing values with the median of *length*.\n",
47 | "* For field *height* replace missing with median and convert to zscore.\n",
48 | "* Remove all other columns.\n",
49 | "* Your submitted dataframe will have these columns: 'height', 'max', 'number', 'length', 'ratio', 'cat2_CA-0', 'cat2_CA-1',\n",
50 | " 'cat2_CA-10', 'cat2_CA-11', 'cat2_CA-12', 'cat2_CA-13', 'cat2_CA-14',\n",
51 | " 'cat2_CA-15', 'cat2_CA-16', 'cat2_CA-17', 'cat2_CA-18', 'cat2_CA-19',\n",
52 | " 'cat2_CA-1A', 'cat2_CA-1B', 'cat2_CA-1C', 'cat2_CA-1D', 'cat2_CA-1E',\n",
53 | " 'cat2_CA-1F', 'cat2_CA-2', 'cat2_CA-20', 'cat2_CA-21', 'cat2_CA-22',\n",
54 | " 'cat2_CA-23', 'cat2_CA-24', 'cat2_CA-25', 'cat2_CA-26', 'cat2_CA-27',\n",
55 | " 'cat2_CA-3', 'cat2_CA-4', 'cat2_CA-5', 'cat2_CA-6', 'cat2_CA-7',\n",
56 | " 'cat2_CA-8', 'cat2_CA-9', 'cat2_CA-A', 'cat2_CA-B', 'cat2_CA-C',\n",
57 | " 'cat2_CA-D', 'cat2_CA-E', 'cat2_CA-F', 'item_IT-0', 'item_IT-1',\n",
58 | " 'item_IT-10', 'item_IT-11', 'item_IT-12', 'item_IT-13', 'item_IT-14',\n",
59 | " 'item_IT-15', 'item_IT-16', 'item_IT-17', 'item_IT-18', 'item_IT-19',\n",
60 | " 'item_IT-1A', 'item_IT-1B', 'item_IT-1C', 'item_IT-1D', 'item_IT-1E',\n",
61 | " 'item_IT-2', 'item_IT-3', 'item_IT-4', 'item_IT-5', 'item_IT-6',\n",
62 | " 'item_IT-7', 'item_IT-8', 'item_IT-9', 'item_IT-A', 'item_IT-B',\n",
63 | " 'item_IT-C', 'item_IT-D', 'item_IT-E', 'item_IT-F'."
64 | ]
65 | },
66 | {
67 | "cell_type": "markdown",
68 | "metadata": {
69 | "id": "Y3uIboDe46i3"
70 | },
71 | "source": [
72 | "# Google CoLab Instructions\n",
73 | "\n",
74 | "If you are using Google CoLab, it will be necessary to mount your GDrive so that you can send your notebook during the submit process. Running the following code will map your GDrive to ```/content/drive```."
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": null,
80 | "metadata": {
81 | "colab": {
82 | "base_uri": "https://localhost:8080/"
83 | },
84 | "id": "FfO731pE46i4",
85 | "outputId": "24db2742-7213-4b83-acee-7109c2f4ac23"
86 | },
87 | "outputs": [],
88 | "source": [
89 | "try:\n",
90 | " from google.colab import drive, userdata\n",
91 | " drive.mount('/content/drive', force_remount=True)\n",
92 | " COLAB = True\n",
93 | " print(\"Note: using Google CoLab\")\n",
94 | "except:\n",
95 | " print(\"Note: not using Google CoLab\")\n",
96 | " COLAB = False\n",
97 | "\n",
98 | "# Assignment Submission Key - Was sent you first week of class.\n",
99 | "# If you are in both classes, this is the same key.\n",
100 | "if COLAB:\n",
101 | " # For Colab, add to your \"Secrets\" (key icon at the left)\n",
102 | " key = userdata.get('T81_558_KEY')\n",
103 | "else:\n",
104 | " # If not colab, enter your key here, or use an environment variable.\n",
105 | " # (this is only an example key, use yours)\n",
106 | " key = \"Gx5en9cEVvaZnjhdaushddhuhhO4PsI32sgldAXj\""
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "metadata": {
112 | "id": "j3tm86Wr46i4"
113 | },
114 | "source": [
115 | "# Assignment Submit Function\n",
116 | "\n",
117 | "You will submit the ten programming assignments electronically. The following **submit** function can be used to do this. My server will perform a basic check of each assignment and let you know if it sees any underlying problems.\n",
118 | "\n",
119 | "**It is unlikely that should need to modify this function.**"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {
126 | "id": "Ll3HVVmK46i5"
127 | },
128 | "outputs": [],
129 | "source": [
130 | "import base64\n",
131 | "import os\n",
132 | "import numpy as np\n",
133 | "import pandas as pd\n",
134 | "import requests\n",
135 | "import PIL\n",
136 | "import PIL.Image\n",
137 | "import io\n",
138 | "from typing import List, Union\n",
139 | "\n",
140 | "# This function submits an assignment. You can submit an assignment as much as you like, only the final\n",
141 | "# submission counts. The paramaters are as follows:\n",
142 | "# data - List of pandas dataframes or images.\n",
143 | "# key - Your student key that was emailed to you.\n",
144 | "# course - The course that you are in, currently t81-558 or t81-559.\n",
145 | "# no - The assignment class number, should be 1 through 10.\n",
146 | "# source_file - The full path to your Python or IPYNB file. This must have \"_class1\" as part of its name.\n",
147 | "# . The number must match your assignment number. For example \"_class2\" for class assignment #2.\n",
148 | "\n",
149 | "def submit(\n",
150 | " data: List[Union[pd.DataFrame, PIL.Image.Image]],\n",
151 | " key: str,\n",
152 | " course: str,\n",
153 | " no: int,\n",
154 | " source_file: str = None\n",
155 | ") -> None:\n",
156 | " if source_file is None and '__file__' not in globals():\n",
157 | " raise Exception(\"Must specify a filename when in a Jupyter notebook.\")\n",
158 | " if source_file is None:\n",
159 | " source_file = __file__\n",
160 | "\n",
161 | " suffix = f'_class{no}'\n",
162 | " if suffix not in source_file:\n",
163 | " raise Exception(f\"{suffix} must be part of the filename.\")\n",
164 | "\n",
165 | " ext = os.path.splitext(source_file)[-1].lower()\n",
166 | " if ext not in ['.ipynb', '.py']:\n",
167 | " raise Exception(f\"Source file is {ext}; must be .py or .ipynb\")\n",
168 | "\n",
169 | " with open(source_file, \"rb\") as file:\n",
170 | " encoded_python = base64.b64encode(file.read()).decode('ascii')\n",
171 | "\n",
172 | " payload = []\n",
173 | " for item in data:\n",
174 | " if isinstance(item, PIL.Image.Image):\n",
175 | " buffered = io.BytesIO()\n",
176 | " item.save(buffered, format=\"PNG\")\n",
177 | " payload.append({'PNG': base64.b64encode(buffered.getvalue()).decode('ascii')})\n",
178 | " elif isinstance(item, pd.DataFrame):\n",
179 | " payload.append({'CSV': base64.b64encode(item.to_csv(index=False).encode('ascii')).decode(\"ascii\")})\n",
180 | " else:\n",
181 | " raise ValueError(f\"Unsupported data type: {type(item)}\")\n",
182 | "\n",
183 | " response = requests.post(\n",
184 | " \"https://api.heatonresearch.com/wu/submit\",\n",
185 | " headers={'x-api-key': key},\n",
186 | " json={\n",
187 | " 'payload': payload,\n",
188 | " 'assignment': no,\n",
189 | " 'course': course,\n",
190 | " 'ext': ext,\n",
191 | " 'py': encoded_python\n",
192 | " }\n",
193 | " )\n",
194 | "\n",
195 | " if response.status_code == 200:\n",
196 | " print(f\"Success: {response.text}\")\n",
197 | " else:\n",
198 | " print(f\"Failure: {response.text}\")"
199 | ]
200 | },
201 | {
202 | "cell_type": "markdown",
203 | "metadata": {
204 | "collapsed": true,
205 | "id": "9eFh-lVn46i5",
206 | "jupyter": {
207 | "outputs_hidden": true
208 | }
209 | },
210 | "source": [
211 | "# Assignment #2 Sample Code\n",
212 | "\n",
213 | "The following code provides a starting point for this assignment."
214 | ]
215 | },
216 | {
217 | "cell_type": "code",
218 | "execution_count": null,
219 | "metadata": {
220 | "colab": {
221 | "base_uri": "https://localhost:8080/"
222 | },
223 | "id": "xp24slyZ46i5",
224 | "outputId": "80faa032-ac4f-4695-c9e1-91a2879b13e9"
225 | },
226 | "outputs": [],
227 | "source": [
228 | "import os\n",
229 | "import pandas as pd\n",
230 | "from scipy.stats import zscore\n",
231 | "\n",
232 | "# You must identify your source file. (modify for your local setup)\n",
233 | "file=\"/content/drive/My Drive/Colab Notebooks/assignment_yourname_t81_558_class2.ipynb\" # Google CoLab\n",
234 | "# file='C:\\\\Users\\\\jeffh\\\\projects\\\\t81_558_deep_learning\\\\assignments\\\\assignment_yourname_class2.ipynb' # Windows\n",
235 | "# file='/Users/jheaton/projects/t81_558_deep_learning/assignments/assignment_yourname_class2.ipynb' # Mac/Linux\n",
236 | "\n",
237 | "# Begin assignment\n",
238 | "df = pd.read_csv(\"http://data.heatonresearch.com/data/t81-558/datasets/reg-36-data.csv\")\n",
239 | "print(len(df))\n",
240 | "\n",
241 | "df.drop('id',axis=1,inplace=True)\n",
242 | "df.drop('convention',axis=1,inplace=True)\n",
243 | "\n",
244 | "## ... continue your code...\n",
245 | "\n",
246 | "## Submit assignment\n",
247 | "df.to_csv('2.csv',index=False)\n",
248 | "submit(source_file=file,data=[df],key=key,course='t81-558',no=2)"
249 | ]
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": null,
254 | "metadata": {
255 | "id": "IEZXZeKV46i5"
256 | },
257 | "outputs": [],
258 | "source": []
259 | }
260 | ],
261 | "metadata": {
262 | "anaconda-cloud": {},
263 | "colab": {
264 | "provenance": []
265 | },
266 | "kernelspec": {
267 | "display_name": "Python 3.11 (torch)",
268 | "language": "python",
269 | "name": "pytorch"
270 | },
271 | "language_info": {
272 | "codemirror_mode": {
273 | "name": "ipython",
274 | "version": 3
275 | },
276 | "file_extension": ".py",
277 | "mimetype": "text/x-python",
278 | "name": "python",
279 | "nbconvert_exporter": "python",
280 | "pygments_lexer": "ipython3",
281 | "version": "3.11.9"
282 | }
283 | },
284 | "nbformat": 4,
285 | "nbformat_minor": 4
286 | }
287 |
--------------------------------------------------------------------------------
/assignments/assignment_yourname_t81_558_class6.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "CdL1ZvDepO-X"
7 | },
8 | "source": [
9 | "
"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {
15 | "id": "R_pemiL8pO-Y"
16 | },
17 | "source": [
18 | "# T81-558: Applications of Deep Neural Networks\n",
19 | "* Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), McKelvey School of Engineering, [Washington University in St. Louis](https://engineering.wustl.edu/index.html)\n",
20 | "* For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/).\n",
21 | "\n",
22 | "**Module 6 Assignment: Extract Text with LLM**\n",
23 | "\n",
24 | "**Student Name: Your Name**"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {
30 | "id": "lky4xopspO-Z"
31 | },
32 | "source": [
33 | "# Assignment Instructions\n",
34 | "\n",
35 | "A [file](https://s3.amazonaws.com/data.heatonresearch.com/data/t81-558/sentences.csv) is provided that contains 100 English sentences. Sample sentences from this file include:\n",
36 | "\n",
37 | "|id|sentence|\n",
38 | "|---|---|\n",
39 | "|1|Sarah found an old photograph in the attic.|\n",
40 | "|2|By the window, Jake noticed a sparkling diamond necklace.|\n",
41 | "|3|The antique clock was expertly fixed by Robert.|\n",
42 | "|4|At the beach, Maria stumbled upon a washed-up bottle.|\n",
43 | "|...|...|\n",
44 | "\n",
45 | "For each of these sentences you should extract the name of the person from the sentence. The results of this assignment would look like the following for the above input.\n",
46 | "\n",
47 | "|id|name|\n",
48 | "|---|---|\n",
49 | "|1|Sarah|\n",
50 | "|2|Jake|\n",
51 | "|3|Robert|\n",
52 | "|4|Maria|\n",
53 | "|...|...|\n",
54 | "\n",
55 | "Use a large language model (LLM) to extract the single word action from each of these sentences.\n",
56 | "\n"
57 | ]
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {
62 | "id": "U4LQZW_SpO-Z"
63 | },
64 | "source": [
65 | "# Google CoLab Instructions\n",
66 | "\n",
67 | "If you are using Google CoLab, it will be necessary to mount your GDrive so that you can send your notebook during the submit process. Running the following code will map your GDrive to ```/content/drive```."
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": null,
73 | "metadata": {
74 | "colab": {
75 | "base_uri": "https://localhost:8080/"
76 | },
77 | "id": "1ZnCEIEopO-Z",
78 | "outputId": "10b432e6-0d22-48e2-cfa5-b0fff6eecf58"
79 | },
80 | "outputs": [],
81 | "source": [
82 | "try:\n",
83 | " from google.colab import drive, userdata\n",
84 | " drive.mount('/content/drive', force_remount=True)\n",
85 | " COLAB = True\n",
86 | " print(\"Note: using Google CoLab\")\n",
87 | "except:\n",
88 | " print(\"Note: not using Google CoLab\")\n",
89 | " COLAB = False\n",
90 | "\n",
91 | "# Assignment Submission Key - Was sent you first week of class.\n",
92 | "# If you are in both classes, this is the same key.\n",
93 | "if COLAB:\n",
94 | " # For Colab, add to your \"Secrets\" (key icon at the left)\n",
95 | " key = userdata.get('T81_558_KEY')\n",
96 | "else:\n",
97 | " # If not colab, enter your key here, or use an environment variable.\n",
98 | " # (this is only an example key, use yours)\n",
99 | " key = \"Gx5en9cEVvaZnjhdaushddhuhhO4PsI32sgldAXj\""
100 | ]
101 | },
102 | {
103 | "cell_type": "markdown",
104 | "metadata": {
105 | "id": "vwx0hsWE3CQd"
106 | },
107 | "source": [
108 | "# LangChain Setup\n",
109 | "\n",
110 | "We must first install LangChain, refer to Module 6.2 for more information."
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": null,
116 | "metadata": {
117 | "colab": {
118 | "base_uri": "https://localhost:8080/"
119 | },
120 | "id": "WRPrqvDn3jYh",
121 | "outputId": "f01afd3d-cae1-41d2-cefe-4e305b7b0519"
122 | },
123 | "outputs": [],
124 | "source": [
125 | "!pip install langchain langchain_openai"
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {
131 | "id": "-A83r4MU3kmk"
132 | },
133 | "source": [
134 | "You will need a key for this assignment, for WUSTL students, look at Assignment 6 in Canvas."
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": null,
140 | "metadata": {
141 | "id": "OPutwNcc3mwj"
142 | },
143 | "outputs": [],
144 | "source": [
145 | "from langchain_openai import ChatOpenAI\n",
146 | "\n",
147 | "# Your OpenAI API key\n",
148 | "# If you are in my class at WUSTL, get this key from the Assignment 6 description in Canvas.\n",
149 | "OPENAI_KEY = '[Insert your API key]'\n",
150 | "\n",
151 | "# This is the model you will generally use for this class\n",
152 | "MODEL = 'gpt-4o-mini'\n",
153 | "TEMPERATURE = 0\n",
154 | "\n",
155 | "# Initialize the OpenAI LLM (Language Learning Model) with your API key\n",
156 | "# Initialize the OpenAI LLM with your API key\n",
157 | "llm = ChatOpenAI(\n",
158 | " api_key = OPENAI_KEY,\n",
159 | " model=MODEL,\n",
160 | " temperature=TEMPERATURE,\n",
161 | " n= 1,\n",
162 | " max_tokens= 256)"
163 | ]
164 | },
165 | {
166 | "cell_type": "markdown",
167 | "metadata": {
168 | "id": "PMLHwV0hpO-a"
169 | },
170 | "source": [
171 | "# Assignment Submit Function\n",
172 | "\n",
173 | "You will submit the 10 programming assignments electronically. The following submit function can be used to do this. My server will perform a basic check of each assignment and let you know if it sees any basic problems.\n",
174 | "\n",
175 | "**It is unlikely that should need to modify this function.**"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {
182 | "id": "ozSyLCNtpO-a"
183 | },
184 | "outputs": [],
185 | "source": [
186 | "import base64\n",
187 | "import os\n",
188 | "import numpy as np\n",
189 | "import pandas as pd\n",
190 | "import requests\n",
191 | "import PIL\n",
192 | "import PIL.Image\n",
193 | "import io\n",
194 | "from typing import List, Union\n",
195 | "\n",
196 | "# This function submits an assignment. You can submit an assignment as much as you like, only the final\n",
197 | "# submission counts. The paramaters are as follows:\n",
198 | "# data - List of pandas dataframes or images.\n",
199 | "# key - Your student key that was emailed to you.\n",
200 | "# course - The course that you are in, currently t81-558 or t81-559.\n",
201 | "# no - The assignment class number, should be 1 through 10.\n",
202 | "# source_file - The full path to your Python or IPYNB file. This must have \"_class1\" as part of its name.\n",
203 | "# . The number must match your assignment number. For example \"_class2\" for class assignment #2.\n",
204 | "\n",
205 | "def submit(\n",
206 | " data: List[Union[pd.DataFrame, PIL.Image.Image]],\n",
207 | " key: str,\n",
208 | " course: str,\n",
209 | " no: int,\n",
210 | " source_file: str = None\n",
211 | ") -> None:\n",
212 | " if source_file is None and '__file__' not in globals():\n",
213 | " raise Exception(\"Must specify a filename when in a Jupyter notebook.\")\n",
214 | " if source_file is None:\n",
215 | " source_file = __file__\n",
216 | "\n",
217 | " suffix = f'_class{no}'\n",
218 | " if suffix not in source_file:\n",
219 | " raise Exception(f\"{suffix} must be part of the filename.\")\n",
220 | "\n",
221 | " ext = os.path.splitext(source_file)[-1].lower()\n",
222 | " if ext not in ['.ipynb', '.py']:\n",
223 | " raise Exception(f\"Source file is {ext}; must be .py or .ipynb\")\n",
224 | "\n",
225 | " with open(source_file, \"rb\") as file:\n",
226 | " encoded_python = base64.b64encode(file.read()).decode('ascii')\n",
227 | "\n",
228 | " payload = []\n",
229 | " for item in data:\n",
230 | " if isinstance(item, PIL.Image.Image):\n",
231 | " buffered = io.BytesIO()\n",
232 | " item.save(buffered, format=\"PNG\")\n",
233 | " payload.append({'PNG': base64.b64encode(buffered.getvalue()).decode('ascii')})\n",
234 | " elif isinstance(item, pd.DataFrame):\n",
235 | " payload.append({'CSV': base64.b64encode(item.to_csv(index=False).encode('ascii')).decode(\"ascii\")})\n",
236 | " else:\n",
237 | " raise ValueError(f\"Unsupported data type: {type(item)}\")\n",
238 | "\n",
239 | " response = requests.post(\n",
240 | " \"https://api.heatonresearch.com/wu/submit\",\n",
241 | " headers={'x-api-key': key},\n",
242 | " json={\n",
243 | " 'payload': payload,\n",
244 | " 'assignment': no,\n",
245 | " 'course': course,\n",
246 | " 'ext': ext,\n",
247 | " 'py': encoded_python\n",
248 | " }\n",
249 | " )\n",
250 | "\n",
251 | " if response.status_code == 200:\n",
252 | " print(f\"Success: {response.text}\")\n",
253 | " else:\n",
254 | " print(f\"Failure: {response.text}\")"
255 | ]
256 | },
257 | {
258 | "cell_type": "markdown",
259 | "metadata": {
260 | "collapsed": true,
261 | "id": "H7kgvLHspO-a",
262 | "jupyter": {
263 | "outputs_hidden": true
264 | }
265 | },
266 | "source": [
267 | "# Assignment #6 Sample Code\n",
268 | "\n",
269 | "The following code provides a starting point for this assignment."
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": null,
275 | "metadata": {
276 | "colab": {
277 | "base_uri": "https://localhost:8080/"
278 | },
279 | "id": "8ZPLGWgkpO-a",
280 | "outputId": "b2a1e1f5-9658-40d5-a1e4-8989d1293e39"
281 | },
282 | "outputs": [],
283 | "source": [
284 | "import os\n",
285 | "import pandas as pd\n",
286 | "from scipy.stats import zscore\n",
287 | "import string\n",
288 | "from langchain.prompts import ChatPromptTemplate\n",
289 | "\n",
290 | "df = pd.read_csv(\"https://data.heatonresearch.com/data/t81-558/sentences.csv\")\n",
291 | "\n",
292 | "## ... continue your code...\n",
293 | "\n",
294 | "## Submit assignment\n",
295 | "\n",
296 | "# You must identify your source file. (modify for your local setup)\n",
297 | "file=\"/content/drive/My Drive/Colab Notebooks/assignment_yourname_t81_558_class6.ipynb\" # Google CoLab\n",
298 | "# file='C:\\\\Users\\\\jeffh\\\\projects\\\\t81_558_deep_learning\\\\assignments\\\\assignment_yourname_class6.ipynb' # Windows\n",
299 | "# file='/Users/jheaton/projects/t81_558_deep_learning/assignments/assignment_yourname_class6.ipynb' # Mac/Linux\n",
300 | "\n",
301 | "submit(source_file=file,data=[df_submit],course='t81-558',key=key,no=6)"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": null,
307 | "metadata": {},
308 | "outputs": [],
309 | "source": []
310 | }
311 | ],
312 | "metadata": {
313 | "anaconda-cloud": {},
314 | "colab": {
315 | "provenance": []
316 | },
317 | "kernelspec": {
318 | "display_name": "Python 3.11 (torch)",
319 | "language": "python",
320 | "name": "pytorch"
321 | },
322 | "language_info": {
323 | "codemirror_mode": {
324 | "name": "ipython",
325 | "version": 3
326 | },
327 | "file_extension": ".py",
328 | "mimetype": "text/x-python",
329 | "name": "python",
330 | "nbconvert_exporter": "python",
331 | "pygments_lexer": "ipython3",
332 | "version": "3.11.9"
333 | }
334 | },
335 | "nbformat": 4,
336 | "nbformat_minor": 4
337 | }
338 |
--------------------------------------------------------------------------------
/assignments/assignment_yourname_t81_558_class7.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "j9PZ4UHPXmr4"
7 | },
8 | "source": [
9 | "
"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {
15 | "id": "1HFB6Ur-Xmr4"
16 | },
17 | "source": [
18 | "# T81-558: Applications of Deep Neural Networks\n",
19 | "* Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), McKelVey School of Engineering [Washington University in St. Louis](https://engineering.wustl.edu/Programs/Pages/default.aspx)\n",
20 | "* For more information visit the [class website](https://sites.wustl.edu/jeffheaton/app_deep_learning/).\n",
21 | "\n",
22 | "**Module 7 Assignment: Image Processing**\n",
23 | "\n",
24 | "**Student Name: Your Name**"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {
30 | "id": "4CHAt9vhXmr5"
31 | },
32 | "source": [
33 | "# Assignment Instructions\n",
34 | "\n",
35 | "For this assignment you will use two images:\n",
36 | "\n",
37 | "* [Dog House](https://github.com/jeffheaton/t81_558_deep_learning/raw/master/photos/hickory_home.jpg)\n",
38 | "* [Land Scape](https://github.com/jeffheaton/t81_558_deep_learning/raw/master/photos/landscape.jpg)\n",
39 | "\n",
40 | "\n",
41 | "Your code should work with any image; however, these are the two that the **submit** function is expecting. The goal is to convert both images into square-sized. In this module, we saw how to transform into a square by cropping. This time we will switch to a square by adding space. If an image is [landscape orientation](https://en.wikipedia.org/wiki/Page_orientation) you will need to add space at the top and bottom. Similarly, for portrait (taller than wide), you will add space at the sides. Make sure that your program centers the image between the space.\n",
42 | "\n",
43 | "The following diagram illustrates this.\n",
44 | "\n",
45 | "\n",
46 | "\n",
47 | "To calculate the color to add to the new space, take the average of all RGB values. Essentially sum all the red values, green, and blue and divide by the total number of pixels. Notice how the darker landscape picture above has a darker color added to the above/below space? This effect is due to this averaging. Make sure you convert your average RGB to an integer, RGB does not have fractional values.\n",
48 | "\n",
49 | "The submit function will check to see if your height and width match my solution. Your height and width should be square and match my dimensions. If this is not the case, you likely have a problem with your assignment. \n",
50 | "\n",
51 | "The submit function also takes three pixels and tests them. Pixels 1 and 3 are the upper left and lower-right; these are the average color and should match my solution exactly. You might see a difference in pixel 2, which is in the center if you center the image differently than I do. If you want to match my solution, make sure to round to integer after any divisions.\n"
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "metadata": {
57 | "id": "TxfWFbNMXmr5"
58 | },
59 | "source": [
60 | "# Google CoLab Instructions\n",
61 | "\n",
62 | "If you are using Google CoLab, it will be necessary to mount your GDrive so that you can send your notebook during the submit process. Running the following code will map your GDrive to ```/content/drive```."
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": null,
68 | "metadata": {
69 | "colab": {
70 | "base_uri": "https://localhost:8080/"
71 | },
72 | "id": "0yn69lkeXmr5",
73 | "outputId": "f961b667-0f17-4b20-a00f-36bbb781d3bc"
74 | },
75 | "outputs": [],
76 | "source": [
77 | "try:\n",
78 | " from google.colab import drive, userdata\n",
79 | " drive.mount('/content/drive', force_remount=True)\n",
80 | " COLAB = True\n",
81 | " print(\"Note: using Google CoLab\")\n",
82 | "except:\n",
83 | " print(\"Note: not using Google CoLab\")\n",
84 | " COLAB = False\n",
85 | "\n",
86 | "# Assignment Submission Key - Was sent you first week of class.\n",
87 | "# If you are in both classes, this is the same key.\n",
88 | "if COLAB:\n",
89 | " # For Colab, add to your \"Secrets\" (key icon at the left)\n",
90 | " key = userdata.get('T81_558_KEY')\n",
91 | "else:\n",
92 | " # If not colab, enter your key here, or use an environment variable.\n",
93 | " # (this is only an example key, use yours)\n",
94 | " key = \"Gx5en9cEVvaZnjhdaushddhuhhO4PsI32sgldAXj\""
95 | ]
96 | },
97 | {
98 | "cell_type": "markdown",
99 | "metadata": {
100 | "id": "cJlCNp44Xmr6"
101 | },
102 | "source": [
103 | "# Assignment Submit Function\n",
104 | "\n",
105 | "You will submit the ten programming assignments electronically. The following **submit** function can be used to do this. My server will perform a basic check of each assignment and let you know if it sees any underlying problems.\n",
106 | "\n",
107 | "**It is unlikely that should need to modify this function.**"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": null,
113 | "metadata": {
114 | "id": "TwtQxG7HXmr6"
115 | },
116 | "outputs": [],
117 | "source": [
118 | "import base64\n",
119 | "import os\n",
120 | "import numpy as np\n",
121 | "import pandas as pd\n",
122 | "import requests\n",
123 | "import PIL\n",
124 | "import PIL.Image\n",
125 | "import io\n",
126 | "from typing import List, Union\n",
127 | "\n",
128 | "# This function submits an assignment. You can submit an assignment as much as you like, only the final\n",
129 | "# submission counts. The paramaters are as follows:\n",
130 | "# data - List of pandas dataframes or images.\n",
131 | "# key - Your student key that was emailed to you.\n",
132 | "# course - The course that you are in, currently t81-558 or t81-559.\n",
133 | "# no - The assignment class number, should be 1 through 10.\n",
134 | "# source_file - The full path to your Python or IPYNB file. This must have \"_class1\" as part of its name.\n",
135 | "# . The number must match your assignment number. For example \"_class2\" for class assignment #2.\n",
136 | "\n",
137 | "def submit(\n",
138 | " data: List[Union[pd.DataFrame, PIL.Image.Image]],\n",
139 | " key: str,\n",
140 | " course: str,\n",
141 | " no: int,\n",
142 | " source_file: str = None\n",
143 | ") -> None:\n",
144 | " if source_file is None and '__file__' not in globals():\n",
145 | " raise Exception(\"Must specify a filename when in a Jupyter notebook.\")\n",
146 | " if source_file is None:\n",
147 | " source_file = __file__\n",
148 | "\n",
149 | " suffix = f'_class{no}'\n",
150 | " if suffix not in source_file:\n",
151 | " raise Exception(f\"{suffix} must be part of the filename.\")\n",
152 | "\n",
153 | " ext = os.path.splitext(source_file)[-1].lower()\n",
154 | " if ext not in ['.ipynb', '.py']:\n",
155 | " raise Exception(f\"Source file is {ext}; must be .py or .ipynb\")\n",
156 | "\n",
157 | " with open(source_file, \"rb\") as file:\n",
158 | " encoded_python = base64.b64encode(file.read()).decode('ascii')\n",
159 | "\n",
160 | " payload = []\n",
161 | " for item in data:\n",
162 | " if isinstance(item, PIL.Image.Image):\n",
163 | " buffered = io.BytesIO()\n",
164 | " item.save(buffered, format=\"PNG\")\n",
165 | " payload.append({'PNG': base64.b64encode(buffered.getvalue()).decode('ascii')})\n",
166 | " elif isinstance(item, pd.DataFrame):\n",
167 | " payload.append({'CSV': base64.b64encode(item.to_csv(index=False).encode('ascii')).decode(\"ascii\")})\n",
168 | " else:\n",
169 | " raise ValueError(f\"Unsupported data type: {type(item)}\")\n",
170 | "\n",
171 | " response = requests.post(\n",
172 | " \"https://api.heatonresearch.com/wu/submit\",\n",
173 | " headers={'x-api-key': key},\n",
174 | " json={\n",
175 | " 'payload': payload,\n",
176 | " 'assignment': no,\n",
177 | " 'course': course,\n",
178 | " 'ext': ext,\n",
179 | " 'py': encoded_python\n",
180 | " }\n",
181 | " )\n",
182 | "\n",
183 | " if response.status_code == 200:\n",
184 | " print(f\"Success: {response.text}\")\n",
185 | " else:\n",
186 | " print(f\"Failure: {response.text}\")"
187 | ]
188 | },
189 | {
190 | "cell_type": "markdown",
191 | "metadata": {
192 | "collapsed": true,
193 | "id": "u2_9pv54Xmr6",
194 | "jupyter": {
195 | "outputs_hidden": true
196 | }
197 | },
198 | "source": [
199 | "# Assignment #7 Sample Code\n",
200 | "\n",
201 | "The following code provides a starting point for this assignment."
202 | ]
203 | },
204 | {
205 | "cell_type": "code",
206 | "execution_count": null,
207 | "metadata": {
208 | "colab": {
209 | "base_uri": "https://localhost:8080/",
210 | "height": 1000
211 | },
212 | "id": "DeJll4KOXmr7",
213 | "outputId": "5217cbdd-f351-4753-f04a-df39d82fa26f"
214 | },
215 | "outputs": [],
216 | "source": [
217 | "%matplotlib inline\n",
218 | "\n",
219 | "import os\n",
220 | "import pandas as pd\n",
221 | "import io\n",
222 | "import requests\n",
223 | "import numpy as np\n",
224 | "from scipy.stats import zscore\n",
225 | "from PIL import Image, ImageFile\n",
226 | "from matplotlib.pyplot import imshow\n",
227 | "import requests\n",
228 | "from io import BytesIO\n",
229 | "import numpy as np\n",
230 | "\n",
231 | "# You must identify your source file. (modify for your local setup)\n",
232 | "file=\"/content/drive/My Drive/Colab Notebooks/assignment_yourname_t81_558_class7.ipynb\" # Google CoLab\n",
233 | "# file='C:\\\\Users\\\\jeffh\\\\projects\\\\t81_558_deep_learning\\\\assignments\\\\assignment_yourname_class7.ipynb' # Windows\n",
234 | "# file='/Users/jheaton/projects/t81_558_deep_learning/assignments/assignment_yourname_class7.ipynb' # Mac/Linux\n",
235 | "\n",
236 | "# Handle first image\n",
237 | "url = \"https://github.com/jeffheaton/t81_558_deep_learning/raw/master/photos/hickory_home.jpg\"\n",
238 | "\n",
239 | "## ... continue your code...\n",
240 | "\n",
241 | "## Submit assignment\n",
242 | "\n",
243 | "submit(source_file=file,data=[submit_img1,submit_img2],key=key,no=7, course='t81-558')"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": null,
249 | "metadata": {
250 | "id": "r5_TkpYUXmr7"
251 | },
252 | "outputs": [],
253 | "source": []
254 | }
255 | ],
256 | "metadata": {
257 | "anaconda-cloud": {},
258 | "colab": {
259 | "provenance": []
260 | },
261 | "kernelspec": {
262 | "display_name": "Python 3.11 (torch)",
263 | "language": "python",
264 | "name": "pytorch"
265 | },
266 | "language_info": {
267 | "codemirror_mode": {
268 | "name": "ipython",
269 | "version": 3
270 | },
271 | "file_extension": ".py",
272 | "mimetype": "text/x-python",
273 | "name": "python",
274 | "nbconvert_exporter": "python",
275 | "pygments_lexer": "ipython3",
276 | "version": "3.11.9"
277 | }
278 | },
279 | "nbformat": 4,
280 | "nbformat_minor": 4
281 | }
282 |
--------------------------------------------------------------------------------
/assignments/assignment_yourname_t81_558_class8.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "domrnonINuu4"
7 | },
8 | "source": [
9 | "
"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {
15 | "id": "hTAVwaaOFuEf"
16 | },
17 | "source": [
18 | "# T81-558: Applications of Deep Neural Networks\n",
19 | "* Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), McKelvey School of Engineering, [Washington University in St. Louis](https://engineering.wustl.edu/Programs/Pages/default.aspx)\n",
20 | "* For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/).\n",
21 | "\n",
22 | "**Module 8 Assignment: Feature Engineering**\n",
23 | "\n",
24 | "**Student Name: Your Name**"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {
30 | "id": "4YUdI4CcFuEg"
31 | },
32 | "source": [
33 | "# Assignment Instructions\n",
34 | "\n",
35 | "This assignment is similar to assignment 5, except that you must use feature engineering to solve it. I provide you with a dataset that contains dimensions and the quality of items of specific shapes. With the values of 'height', 'width', 'depth'. 'shape', and 'quality' you should try to predict the cost of these items. You should be able to match very close to solution file, if you feature engineer correctly. To get full credit your average cost should not be more than 50 off from the solution. The autocorrector will let you know if you are in this range.\n",
36 | "\n",
37 | "You can find all of the needed CSV files here:\n",
38 | "\n",
39 | "* [Shapes - Training](https://data.heatonresearch.com/data/t81-558/datasets/shapes-train.csv)\n",
40 | "* [Shapes - Submit](https://data.heatonresearch.com/data/t81-558/datasets/shapes-test.csv)\n",
41 | "\n",
42 | "Use the training file to train your neural network and submit results for for the data contained in the test/submit file."
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": null,
48 | "metadata": {
49 | "colab": {
50 | "base_uri": "https://localhost:8080/"
51 | },
52 | "id": "9ZvFFOR5A-Wo",
53 | "outputId": "4fe3375a-f443-4c13-93d9-5a83ead1eca0"
54 | },
55 | "outputs": [],
56 | "source": [
57 | "try:\n",
58 | " from google.colab import drive, userdata\n",
59 | " drive.mount('/content/drive', force_remount=True)\n",
60 | " COLAB = True\n",
61 | " print(\"Note: using Google CoLab\")\n",
62 | "except:\n",
63 | " print(\"Note: not using Google CoLab\")\n",
64 | " COLAB = False\n",
65 | "\n",
66 | "# Assignment Submission Key - Was sent you first week of class.\n",
67 | "# If you are in both classes, this is the same key.\n",
68 | "if COLAB:\n",
69 | " # For Colab, add to your \"Secrets\" (key icon at the left)\n",
70 | " key = userdata.get('T81_558_KEY')\n",
71 | "else:\n",
72 | " # If not colab, enter your key here, or use an environment variable.\n",
73 | " # (this is only an example key, use yours)\n",
74 | " key = \"Gx5en9cEVvaZnjhdaushddhuhhO4PsI32sgldAXj\""
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {
80 | "id": "4qrsT_KZFuEh"
81 | },
82 | "source": [
83 | "# Assignment Submit Function\n",
84 | "\n",
85 | "You will submit the 10 programming assignments electronically. The following submit function can be used to do this. My server will perform a basic check of each assignment and let you know if it sees any basic problems.\n",
86 | "\n",
87 | "**It is unlikely that should need to modify this function.**"
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": null,
93 | "metadata": {
94 | "id": "43KOAL0OFuEi"
95 | },
96 | "outputs": [],
97 | "source": [
98 | "import base64\n",
99 | "import os\n",
100 | "import numpy as np\n",
101 | "import pandas as pd\n",
102 | "import requests\n",
103 | "import PIL\n",
104 | "import PIL.Image\n",
105 | "import io\n",
106 | "from typing import List, Union\n",
107 | "\n",
108 | "# This function submits an assignment. You can submit an assignment as much as you like, only the final\n",
109 | "# submission counts. The paramaters are as follows:\n",
110 | "# data - List of pandas dataframes or images.\n",
111 | "# key - Your student key that was emailed to you.\n",
112 | "# course - The course that you are in, currently t81-558 or t81-559.\n",
113 | "# no - The assignment class number, should be 1 through 10.\n",
114 | "# source_file - The full path to your Python or IPYNB file. This must have \"_class1\" as part of its name.\n",
115 | "# . The number must match your assignment number. For example \"_class2\" for class assignment #2.\n",
116 | "\n",
117 | "def submit(\n",
118 | " data: List[Union[pd.DataFrame, PIL.Image.Image]],\n",
119 | " key: str,\n",
120 | " course: str,\n",
121 | " no: int,\n",
122 | " source_file: str = None\n",
123 | ") -> None:\n",
124 | " if source_file is None and '__file__' not in globals():\n",
125 | " raise Exception(\"Must specify a filename when in a Jupyter notebook.\")\n",
126 | " if source_file is None:\n",
127 | " source_file = __file__\n",
128 | "\n",
129 | " suffix = f'_class{no}'\n",
130 | " if suffix not in source_file:\n",
131 | " raise Exception(f\"{suffix} must be part of the filename.\")\n",
132 | "\n",
133 | " ext = os.path.splitext(source_file)[-1].lower()\n",
134 | " if ext not in ['.ipynb', '.py']:\n",
135 | " raise Exception(f\"Source file is {ext}; must be .py or .ipynb\")\n",
136 | "\n",
137 | " with open(source_file, \"rb\") as file:\n",
138 | " encoded_python = base64.b64encode(file.read()).decode('ascii')\n",
139 | "\n",
140 | " payload = []\n",
141 | " for item in data:\n",
142 | " if isinstance(item, PIL.Image.Image):\n",
143 | " buffered = io.BytesIO()\n",
144 | " item.save(buffered, format=\"PNG\")\n",
145 | " payload.append({'PNG': base64.b64encode(buffered.getvalue()).decode('ascii')})\n",
146 | " elif isinstance(item, pd.DataFrame):\n",
147 | " payload.append({'CSV': base64.b64encode(item.to_csv(index=False).encode('ascii')).decode(\"ascii\")})\n",
148 | " else:\n",
149 | " raise ValueError(f\"Unsupported data type: {type(item)}\")\n",
150 | "\n",
151 | " response = requests.post(\n",
152 | " \"https://api.heatonresearch.com/wu/submit\",\n",
153 | " headers={'x-api-key': key},\n",
154 | " json={\n",
155 | " 'payload': payload,\n",
156 | " 'assignment': no,\n",
157 | " 'course': course,\n",
158 | " 'ext': ext,\n",
159 | " 'py': encoded_python\n",
160 | " }\n",
161 | " )\n",
162 | "\n",
163 | " if response.status_code == 200:\n",
164 | " print(f\"Success: {response.text}\")\n",
165 | " else:\n",
166 | " print(f\"Failure: {response.text}\")"
167 | ]
168 | },
169 | {
170 | "cell_type": "markdown",
171 | "metadata": {
172 | "collapsed": true,
173 | "id": "zd5fX98YFuEm",
174 | "jupyter": {
175 | "outputs_hidden": true
176 | }
177 | },
178 | "source": [
179 | "# Assignment #8 Sample Code\n",
180 | "\n",
181 | "The following code provides a starting point for this assignment."
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": null,
187 | "metadata": {
188 | "colab": {
189 | "base_uri": "https://localhost:8080/"
190 | },
191 | "id": "XxG1Q-4806C-",
192 | "outputId": "4ae95bb1-1905-4c3a-8bd0-f7c5c096eb25",
193 | "scrolled": true
194 | },
195 | "outputs": [],
196 | "source": [
197 | "import os\n",
198 | "import pandas as pd\n",
199 | "import numpy as np\n",
200 | "from sklearn.model_selection import train_test_split\n",
201 | "import torch\n",
202 | "import torch.nn as nn\n",
203 | "import torch.optim as optim\n",
204 | "\n",
205 | "# You must identify your source file. (modify for your local setup)\n",
206 | "file=\"/content/drive/My Drive/Colab Notebooks/assignment_yourname_t81_558_class8.ipynb\" # Google CoLab\n",
207 | "# file='C:\\\\Users\\\\jeffh\\\\projects\\\\t81_558_deep_learning\\\\assignments\\\\assignment_yourname_class8.ipynb' # Windows\n",
208 | "# file='/Users/jheaton/projects/t81_558_deep_learning/assignments/assignment_yourname_class8.ipynb' # Mac/Linux\n",
209 | "\n",
210 | "\n",
211 | "# Reading data\n",
212 | "df_train = pd.read_csv(\"https://data.heatonresearch.com/data/t81-558/datasets/shapes-train.csv\")\n",
213 | "df_submit = pd.read_csv(\"https://data.heatonresearch.com/data/t81-558/datasets/shapes-test.csv\")\n",
214 | "\n",
215 | "## ... continue your code...\n",
216 | "\n",
217 | "## Submit assignment\n",
218 | "\n",
219 | "submit(source_file=file, data=[submit_df], key=key, no=8, course='t81-558')\n"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": null,
225 | "metadata": {
226 | "id": "welI_CXa06C-"
227 | },
228 | "outputs": [],
229 | "source": []
230 | }
231 | ],
232 | "metadata": {
233 | "anaconda-cloud": {},
234 | "colab": {
235 | "provenance": []
236 | },
237 | "kernelspec": {
238 | "display_name": "Python 3.11 (torch)",
239 | "language": "python",
240 | "name": "pytorch"
241 | },
242 | "language_info": {
243 | "codemirror_mode": {
244 | "name": "ipython",
245 | "version": 3
246 | },
247 | "file_extension": ".py",
248 | "mimetype": "text/x-python",
249 | "name": "python",
250 | "nbconvert_exporter": "python",
251 | "pygments_lexer": "ipython3",
252 | "version": "3.11.9"
253 | }
254 | },
255 | "nbformat": 4,
256 | "nbformat_minor": 4
257 | }
258 |
--------------------------------------------------------------------------------
/assignments/assignment_yourname_t81_558_class9.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "cK9q7DEQlmJ1"
7 | },
8 | "source": [
9 | "
"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {
15 | "id": "sxyItFumlmJ4"
16 | },
17 | "source": [
18 | "# T81-558: Applications of Deep Neural Networks\n",
19 | "* Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), McKelvey School of Engineering, [Washington University in St. Louis](https://engineering.wustl.edu/index.html)\n",
20 | "* For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/).\n",
21 | "\n",
22 | "**Module 9 Assignment: Detect Multiple Faces**\n",
23 | "\n",
24 | "**Student Name: Your Name**"
25 | ]
26 | },
27 | {
28 | "cell_type": "markdown",
29 | "metadata": {
30 | "id": "coBqKSu7lmJ4"
31 | },
32 | "source": [
33 | "# Assignment Instructions\n",
34 | "\n",
35 | "I provide you with five images of people in various poses, such as the following:\n",
36 | "\n",
37 | "\n",
38 | "\n",
39 | "As you have seen from module 9, you can detect faces in this image, as you can see here:\n",
40 | "\n",
41 | "\n",
42 | "\n",
43 | "Your task for this assignment is to extract the coordinates (x,y) and dimensions (height, width). Extract the dimensions/coordinates for a rectangle around the individual face rectangles. This rectangle will overlap the edges of some of the separate face rectangles. The single rectangle would look similar, though you do not need to draw it. Rather, you will return a data frame of the coordinates and dimensions.\n",
44 | "\n",
45 | "\n",
46 | "\n",
47 | "Generate your dataframe from the following images.\n",
48 | "\n",
49 | "* https://data.heatonresearch.com/images/wustl/data/AdobeStock_158302589-low.jpg\n",
50 | "* https://data.heatonresearch.com/images/wustl/data/AdobeStock_268797955-low.jpg\n",
51 | "* https://data.heatonresearch.com/images/wustl/data/AdobeStock_319245189-low.jpg\n",
52 | "* https://data.heatonresearch.com/images/wustl/data/AdobeStock_622573012-low.jpg\n",
53 | "* https://data.heatonresearch.com/images/wustl/data/AdobeStock_632061559-low.jpg\n",
54 | "\n",
55 | "Your submitted dataframe should look like this. Make sure to round your numbers and convert to integer. You will need to calculate the width and height.\n",
56 | "\n",
57 | "|x|y|width|height|\n",
58 | "|-|-|-|-|\n",
59 | "|177|215|614|134|\n",
60 | "|316|74|472|231|\n",
61 | "|231|59|497|264|\n",
62 | "|436|160|167|245|\n",
63 | "|140|192|760|252|"
64 | ]
65 | },
66 | {
67 | "cell_type": "markdown",
68 | "metadata": {
69 | "id": "Z606GghplmJ5"
70 | },
71 | "source": [
72 | "# Google CoLab Instructions\n",
73 | "\n",
74 | "If you are using Google CoLab, it will be necessary to mount your GDrive so that you can send your notebook during the submit process. Running the following code will map your GDrive to ```/content/drive```."
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": null,
80 | "metadata": {
81 | "colab": {
82 | "base_uri": "https://localhost:8080/"
83 | },
84 | "id": "sWJdoS7ClmJ5",
85 | "outputId": "90dac886-ad32-4474-8551-a9cb28b9f466"
86 | },
87 | "outputs": [],
88 | "source": [
89 | "try:\n",
90 | " from google.colab import drive, userdata\n",
91 | " drive.mount('/content/drive', force_remount=True)\n",
92 | " COLAB = True\n",
93 | " print(\"Note: using Google CoLab\")\n",
94 | "except:\n",
95 | " print(\"Note: not using Google CoLab\")\n",
96 | " COLAB = False\n",
97 | "\n",
98 | "# Assignment Submission Key - Was sent you first week of class.\n",
99 | "# If you are in both classes, this is the same key.\n",
100 | "if COLAB:\n",
101 | " # For Colab, add to your \"Secrets\" (key icon at the left)\n",
102 | " key = userdata.get('T81_558_KEY')\n",
103 | "else:\n",
104 | " # If not colab, enter your key here, or use an environment variable.\n",
105 | " # (this is only an example key, use yours)\n",
106 | " key = \"Gx5en9cEVvaZnjhdaushddhuhhO4PsI32sgldAXj\"\n",
107 | "\n",
108 | "# Make use of a GPU or MPS (Apple) if one is available. (see module 3.2)\n",
109 | "import torch\n",
110 | "has_mps = torch.backends.mps.is_built()\n",
111 | "device = \"mps\" if has_mps else \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
112 | "print(f\"Using device: {device}\")"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {
118 | "id": "uwon6e4vlmJ6"
119 | },
120 | "source": [
121 | "# Assignment Submit Function\n",
122 | "\n",
123 | "You will submit the 10 programming assignments electronically. The following submit function can be used to do this. My server will perform a basic check of each assignment and let you know if it sees any basic problems.\n",
124 | "\n",
125 | "**It is unlikely that should need to modify this function.**"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": null,
131 | "metadata": {
132 | "id": "1uVC8RUTlmJ6"
133 | },
134 | "outputs": [],
135 | "source": [
136 | "import base64\n",
137 | "import os\n",
138 | "import numpy as np\n",
139 | "import pandas as pd\n",
140 | "import requests\n",
141 | "import PIL\n",
142 | "import PIL.Image\n",
143 | "import io\n",
144 | "from typing import List, Union\n",
145 | "\n",
146 | "# This function submits an assignment. You can submit an assignment as much as you like, only the final\n",
147 | "# submission counts. The paramaters are as follows:\n",
148 | "# data - List of pandas dataframes or images.\n",
149 | "# key - Your student key that was emailed to you.\n",
150 | "# course - The course that you are in, currently t81-558 or t81-559.\n",
151 | "# no - The assignment class number, should be 1 through 10.\n",
152 | "# source_file - The full path to your Python or IPYNB file. This must have \"_class1\" as part of its name.\n",
153 | "# . The number must match your assignment number. For example \"_class2\" for class assignment #2.\n",
154 | "\n",
155 | "def submit(\n",
156 | " data: List[Union[pd.DataFrame, PIL.Image.Image]],\n",
157 | " key: str,\n",
158 | " course: str,\n",
159 | " no: int,\n",
160 | " source_file: str = None\n",
161 | ") -> None:\n",
162 | " if source_file is None and '__file__' not in globals():\n",
163 | " raise Exception(\"Must specify a filename when in a Jupyter notebook.\")\n",
164 | " if source_file is None:\n",
165 | " source_file = __file__\n",
166 | "\n",
167 | " suffix = f'_class{no}'\n",
168 | " if suffix not in source_file:\n",
169 | " raise Exception(f\"{suffix} must be part of the filename.\")\n",
170 | "\n",
171 | " ext = os.path.splitext(source_file)[-1].lower()\n",
172 | " if ext not in ['.ipynb', '.py']:\n",
173 | " raise Exception(f\"Source file is {ext}; must be .py or .ipynb\")\n",
174 | "\n",
175 | " with open(source_file, \"rb\") as file:\n",
176 | " encoded_python = base64.b64encode(file.read()).decode('ascii')\n",
177 | "\n",
178 | " payload = []\n",
179 | " for item in data:\n",
180 | " if isinstance(item, PIL.Image.Image):\n",
181 | " buffered = io.BytesIO()\n",
182 | " item.save(buffered, format=\"PNG\")\n",
183 | " payload.append({'PNG': base64.b64encode(buffered.getvalue()).decode('ascii')})\n",
184 | " elif isinstance(item, pd.DataFrame):\n",
185 | " payload.append({'CSV': base64.b64encode(item.to_csv(index=False).encode('ascii')).decode(\"ascii\")})\n",
186 | " else:\n",
187 | " raise ValueError(f\"Unsupported data type: {type(item)}\")\n",
188 | "\n",
189 | " response = requests.post(\n",
190 | " \"https://api.heatonresearch.com/wu/submit\",\n",
191 | " headers={'x-api-key': key},\n",
192 | " json={\n",
193 | " 'payload': payload,\n",
194 | " 'assignment': no,\n",
195 | " 'course': course,\n",
196 | " 'ext': ext,\n",
197 | " 'py': encoded_python\n",
198 | " }\n",
199 | " )\n",
200 | "\n",
201 | " if response.status_code == 200:\n",
202 | " print(f\"Success: {response.text}\")\n",
203 | " else:\n",
204 | " print(f\"Failure: {response.text}\")"
205 | ]
206 | },
207 | {
208 | "cell_type": "markdown",
209 | "metadata": {
210 | "id": "ldLC0Tt2w33w"
211 | },
212 | "source": [
213 | "# Install Facenet-Pytorch"
214 | ]
215 | },
216 | {
217 | "cell_type": "code",
218 | "execution_count": null,
219 | "metadata": {
220 | "colab": {
221 | "base_uri": "https://localhost:8080/"
222 | },
223 | "id": "Hn0jgYFDw6Tw",
224 | "outputId": "9ae782f5-7b91-4f42-ba9e-1c2f1fca1c1f"
225 | },
226 | "outputs": [],
227 | "source": [
228 | "!pip install facenet-pytorch"
229 | ]
230 | },
231 | {
232 | "cell_type": "markdown",
233 | "metadata": {
234 | "id": "x5ajDHqXgIy6"
235 | },
236 | "source": [
237 | "Note: You will likely need to restart your session after running the previous block. Don't worry, rerunning the previous block is much faster the second time."
238 | ]
239 | },
240 | {
241 | "cell_type": "markdown",
242 | "metadata": {
243 | "collapsed": true,
244 | "id": "nNXM_k8olmJ7",
245 | "jupyter": {
246 | "outputs_hidden": true
247 | }
248 | },
249 | "source": [
250 | "# Assignment #9 Sample Code\n",
251 | "\n",
252 | "The following code provides a starting point for this assignment."
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": null,
258 | "metadata": {
259 | "colab": {
260 | "base_uri": "https://localhost:8080/"
261 | },
262 | "id": "_-HMgsaqlmJ7",
263 | "outputId": "88672633-79d6-4434-d614-6a656d0ee717"
264 | },
265 | "outputs": [],
266 | "source": [
267 | "import os\n",
268 | "import pandas as pd\n",
269 | "import torch\n",
270 | "from facenet_pytorch import MTCNN\n",
271 | "from PIL import Image\n",
272 | "import requests\n",
273 | "from io import BytesIO\n",
274 | "import torch\n",
275 | "from PIL import Image, ImageDraw\n",
276 | "from facenet_pytorch import MTCNN\n",
277 | "\n",
278 | "# You must identify your source file. (modify for your local setup)\n",
279 | "file=\"/content/drive/My Drive/Colab Notebooks/assignment_yourname_t81_558_class9.ipynb\" # Google CoLab\n",
280 | "# file='C:\\\\Users\\\\jeffh\\\\projects\\\\t81_558_deep_learning\\\\assignments\\\\assignment_yourname_class9.ipynb' # Windows\n",
281 | "# file='/Users/jheaton/projects/t81_558_deep_learning/assignments/assignment_yourname_class9.ipynb' # Mac/Linux\n",
282 | "\n",
283 | "mtcnn = MTCNN(keep_all=True, device=device)\n",
284 | "\n",
285 | "# Load image\n",
286 | "URLs = [\n",
287 | " \"https://data.heatonresearch.com/images/wustl/data/AdobeStock_158302589-low.jpg\",\n",
288 | " \"https://data.heatonresearch.com/images/wustl/data/AdobeStock_268797955-low.jpg\",\n",
289 | " \"https://data.heatonresearch.com/images/wustl/data/AdobeStock_319245189-low.jpg\",\n",
290 | " \"https://data.heatonresearch.com/images/wustl/data/AdobeStock_622573012-low.jpg\",\n",
291 | " \"https://data.heatonresearch.com/images/wustl/data/AdobeStock_632061559-low.jpg\"]\n",
292 | "\n",
293 | "\n",
294 | "## ... continue your code...\n",
295 | "\n",
296 | "## Submit assignment\n",
297 | "submit(source_file=file,data=[df_submit],key=key,no=9,course=\"t81-558\")\n",
298 | "#"
299 | ]
300 | }
301 | ],
302 | "metadata": {
303 | "anaconda-cloud": {},
304 | "colab": {
305 | "provenance": []
306 | },
307 | "kernelspec": {
308 | "display_name": "Python 3.11 (torch)",
309 | "language": "python",
310 | "name": "pytorch"
311 | },
312 | "language_info": {
313 | "codemirror_mode": {
314 | "name": "ipython",
315 | "version": 3
316 | },
317 | "file_extension": ".py",
318 | "mimetype": "text/x-python",
319 | "name": "python",
320 | "nbconvert_exporter": "python",
321 | "pygments_lexer": "ipython3",
322 | "version": "3.11.9"
323 | }
324 | },
325 | "nbformat": 4,
326 | "nbformat_minor": 4
327 | }
328 |
--------------------------------------------------------------------------------
/copyright.md:
--------------------------------------------------------------------------------
1 | > Publisher: Heaton Research, Inc.
2 | >
3 | > Applications of Deep Neural Networks
4 | >
5 | > May, 2022
6 | >
7 | > Author: [Jeffrey Heaton](https://orcid.org/0000-0003-1496-4049
8 | >
9 | > ISBN: 9798416344269
10 | >
11 | > Edition: 1
12 |
13 | The text and illustrations of Applications of Deep Neural Networks by Jeff Heaton are licensed under CC BY-NC-SA 4.0. To view a copy of this license, visit [CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0).
14 | All of the book's source code is licensed under the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the license or (at your option) any later version. [LGPL](https://www.gnu.org/licenses/lgpl-3.0.en.html)
15 |
16 | 
17 | 
18 |
19 | Heaton Research, Encog, the Encog Logo, and the Heaton Research logo are all trademarks of Jeff Heaton in the United States and/or other countries.
20 |
21 | TRADEMARKS: Heaton Research has attempted throughout this book to distinguish proprietary trademarks from descriptive terms by following the capitalization style used by the manufacturer.
22 |
23 | The author and publisher have done their best to prepare this book, so the content is based upon the final release of software whenever possible. Portions of the manuscript may be based upon pre-release versions supplied by software manufacturer(s). The author and the publisher make no representation or warranties of any kind about the completeness or accuracy of the contents herein and accept no liability of any kind, including but not limited to performance, merchantability, fitness for any particular purpose, or any losses or damages of any kind caused or alleged to be caused directly or indirectly from this book.
24 |
25 | **DISCLAIMER**
26 |
27 | The author, Jeffrey Heaton, makes no warranty or representation, either expressed or implied, concerning the Software or its contents, quality, performance, merchantability, or fitness for a particular purpose. In no event will Jeffrey Heaton, his distributors, or dealers be liable to you or any other party for direct, indirect, special, incidental, consequential, or other damages arising out of the use of or inability to use the Software or its contents even if advised of the possibility of such damage. In the event that the Software includes an online update feature, Heaton Research, Inc. further disclaims any obligation to provide this feature for any specific duration other than the initial posting.
28 |
29 | The exclusion of implied warranties is not permitted by some states. Therefore, the above exclusion may not apply to you. This warranty provides you with specific legal rights; there may be other rights that you may have that vary from state to state. The pricing of the book with the Software by Heaton Research, Inc. reflects the allocation of risk and limitations on liability contained in this agreement of Terms and Conditions.
30 |
--------------------------------------------------------------------------------
/install/pytorch-install-aug-2023.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "\n",
8 | "
\n",
9 | ""
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "# T81-558: Applications of Deep Neural Networks\n",
17 | "**Manual Python Setup**\n",
18 | "* Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), McKelvey School of Engineering, [Washington University in St. Louis](https://engineering.wustl.edu/Programs/Pages/default.aspx)\n",
19 | "* For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/)."
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "# Software Installation\n",
27 | "\n",
28 | "This notebook described how to install PyTorch for GPU (cuda), Apple Metal (MLS), or CPU.\n",
29 | "\n",
30 | "## Installing Python and PyTorch\n",
31 | "\n",
32 | "It is possible to install and run Python/PyTorch entirely from your computer, without the need for Google CoLab. Running PyTorch locally does require some software configuration and installation. If you are not confortable with software installation, just use Google CoLab. These instructions show you how to install PyTorch for CPU, GPU (cuda), and Apple M1/M2/Mx Metal Performance Shaders (MPS). Many of the examples in this class will achieve considerable performance improvement from a GPU/MPS.\n",
33 | "\n",
34 | "The first step is to install Python 3.9. I recommend using the Miniconda (Anaconda) release of Python, as it already includes many of the data science related packages that are needed by this class. Anaconda directly supports Windows, Mac, and Linux. If you have a Mac and wish to use M1 MPS make sure to install the ARM64 version of Miniconda. Miniconda is the minimal set of features from the extensive Anaconda Python distribution. Download Miniconda from the following URL:\n",
35 | "\n",
36 | "* [Miniconda](https://docs.conda.io/en/latest/miniconda.html)\n",
37 | "\n",
38 | "Make sure that you select the Miniconda version that corrisponds to your operating system. It is particularly important to choose M1/Metal if you have a later (non-Intel) Mac.\n",
39 | "\n",
40 | "Once you've installed Miniconda, we will first install Jupyter, which is the editor you will use in this course.\n",
41 | "\n",
42 | "```\n",
43 | "conda install -y jupyter\n",
44 | "```\n",
45 | "\n",
46 | "You must make sure that PyTorch has the version of Python that it is compatible with. The best way to accomplish this is with an Anaconda environment. Each environment that you create can have its own Python version, drivers, and Python libraries. I suggest that you create an environment to hold the Python instance for this class. Use the following command to create your environment. I am calling the environment **torch**, you can name yours whatever you like. We will create this environment from a YML configuration file. You can obtain this file [here](https://github.com/jeffheaton/app_deep_learning/blob/main/install/torch.yml). You should select from one of the following commands:\n",
47 | "\n",
48 | "\n",
49 | "* **Mac M1/M2**: conda env create -f torch-conda.yml\n",
50 | "* **NVIDIA CUDA GPU**: conda env create -f torch-cuda.yml\n",
51 | "* **CPU Only**: conda env create -f torch.yml\n",
52 | "\n",
53 | "To enter this environment, you must use the following command: \n",
54 | "\n",
55 | "```\n",
56 | "conda activate torch\n",
57 | "```\n",
58 | "\n",
59 | "\n",
60 | "## Register your Environment\n",
61 | "\n",
62 | "The following command registers your **pytorch** environment. Again, make sure you \"conda activate\" your new **pytorch** environment.\n",
63 | "\n",
64 | "```\n",
65 | "python -m ipykernel install --user --name pytorch --display-name \"Python 3.11 (torch)\"\n",
66 | "```\n",
67 | "\n",
68 | "## Testing your Environment\n",
69 | "\n",
70 | "You can now start Jupyter notebook. Use the following command.\n",
71 | "\n",
72 | "```\n",
73 | "jupyter notebook\n",
74 | "```\n",
75 | "\n",
76 | "You can now run the following code to check that you have the versions expected."
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": 1,
82 | "metadata": {},
83 | "outputs": [
84 | {
85 | "name": "stdout",
86 | "output_type": "stream",
87 | "text": [
88 | "Python Platform: Windows-10-10.0.26100-SP0\n",
89 | "PyTorch Version: 2.5.1\n",
90 | "\n",
91 | "Python 3.11.11 | packaged by conda-forge | (main, Dec 5 2024, 14:06:23) [MSC v.1942 64 bit (AMD64)]\n",
92 | "Pandas 2.2.3\n",
93 | "Scikit-Learn 1.6.0\n",
94 | "NVIDIA/CUDA GPU is NOT AVAILABLE\n",
95 | "MPS (Apple Metal) is NOT AVAILABLE\n",
96 | "Target device is cpu\n"
97 | ]
98 | }
99 | ],
100 | "source": [
101 | "# What version of Python do you have?\n",
102 | "import sys\n",
103 | "import platform\n",
104 | "import torch\n",
105 | "import pandas as pd\n",
106 | "import sklearn as sk\n",
107 | "\n",
108 | "has_gpu = torch.cuda.is_available()\n",
109 | "has_mps = torch.backends.mps.is_built()\n",
110 | "device = \"mps\" if has_mps else \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
111 | "\n",
112 | "print(f\"Python Platform: {platform.platform()}\")\n",
113 | "print(f\"PyTorch Version: {torch.__version__}\")\n",
114 | "print()\n",
115 | "print(f\"Python {sys.version}\")\n",
116 | "print(f\"Pandas {pd.__version__}\")\n",
117 | "print(f\"Scikit-Learn {sk.__version__}\")\n",
118 | "print(\"NVIDIA/CUDA GPU is\", \"available\" if has_gpu else \"NOT AVAILABLE\")\n",
119 | "print(\"MPS (Apple Metal) is\", \"AVAILABLE\" if has_mps else \"NOT AVAILABLE\")\n",
120 | "print(f\"Target device is {device}\")"
121 | ]
122 | },
123 | {
124 | "cell_type": "markdown",
125 | "metadata": {},
126 | "source": [
127 | "# Notes on MPS Incompatibilities\n",
128 | "\n",
129 | "## MPS Warnings\n",
130 | "\n",
131 | "You might get MPS warnings, such as the following.\n",
132 | "\n",
133 | "```\n",
134 | "/Users/jeff/miniconda3/envs/torch/lib/python3.9/site-packages/torch/autograd/function.py:539: UserWarning: The operator 'aten::native_dropout' is not currently supported on the MPS backend and will fall back to run on the CPU. This may have performance implications. (Triggered internally at /Users/runner/work/_temp/anaconda/conda-bld/pytorch_1702400227158/work/aten/src/ATen/mps/MPSFallback.mm:13.)\n",
135 | " return super().apply(*args, **kwargs) # type: ignore[misc]\n",
136 | "/Users/jeff/miniconda3/envs/torch/lib/python3.9/site-packages/torch/autograd/__init__.py:394: UserWarning: Error detected in LinearBackward0. Traceback of forward call that caused the error:\n",
137 | " File \"/Users/jeff/miniconda3/envs/torch/lib/python3.9/site-packages/torch/nn/modules/container.py\", line 215, in forward\n",
138 | " input = module(input)\n",
139 | " (Triggered internally at /Users/runner/work/_temp/anaconda/conda-bld/pytorch_1702400227158/work/torch/csrc/autograd/python_anomaly_mode.cpp:119.)\n",
140 | " result = Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n",
141 | "[2024-01-07 07:09:59,805] [0/2] torch._dynamo.exc: [WARNING] Backend compiler failed with a fake tensor exception at \n",
142 | "[2024-01-07 07:09:59,805] [0/2] torch._dynamo.exc: [WARNING] File \"/Users/jeff/miniconda3/envs/torch/lib/python3.9/site-packages/torch/nn/modules/container.py\", line 216, in forward\n",
143 | "[2024-01-07 07:09:59,805] [0/2] torch._dynamo.exc: [WARNING] return input\n",
144 | "[2024-01-07 07:09:59,805] [0/2] torch._dynamo.exc: [WARNING] Adding a graph break.\n",
145 | "/Users/jeff/miniconda3/envs/torch/lib/python3.9/site-packages/torch/autograd/__init__.py:394: UserWarning: Error detected in LinearBackward0. Traceback of forward call that caused the error:\n",
146 | " File \"/Users/jeff/miniconda3/envs/torch/lib/python3.9/site-packages/torch/nn/modules/container.py\", line 215, in forward\n",
147 | " input = module(input)\n",
148 | " (Triggered internally at /Users/runner/work/_temp/anaconda/conda-bld/pytorch_1702400227158/work/torch/csrc/autograd/python_anomaly_mode.cpp:119.)\n",
149 | " result = Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n",
150 | "[2024-01-07 07:09:59,846] [0/2] torch._dynamo.exc: [WARNING] Backend compiler failed with a fake tensor exception at \n",
151 | "[2024-01-07 07:09:59,846] [0/2] torch._dynamo.exc: [WARNING] File \"/Users/jeff/miniconda3/envs/torch/lib/python3.9/site-packages/torch/nn/modules/container.py\", line 216, in forward\n",
152 | "[2024-01-07 07:09:59,846] [0/2] torch._dynamo.exc: [WARNING] return input\n",
153 | "[2024-01-07 07:09:59,846] [0/2] torch._dynamo.exc: [WARNING] Adding a graph break.\n",
154 | "```\n",
155 | "\n",
156 | "These warnings are mostly (I believe) harmless; however, you can usually remove them by modifying code like this:\n",
157 | "\n",
158 | "```\n",
159 | "# PyTorch 2.0 Model Compile (improved performance), but does not work as well on MPS\n",
160 | "#model = torch.compile(model,backend=\"aot_eager\").to(device)\n",
161 | "model = model.to(device)\n",
162 | "```\n",
163 | "\n",
164 | "## NotImplementedError\n",
165 | "\n",
166 | "You will sometimes get a NotImplementedError, this just means that you are trying to use a portion of PyTorch that has not yet enabled MPS. \n",
167 | "\n",
168 | "```\n",
169 | "---------------------------------------------------------------------------\n",
170 | "NotImplementedError Traceback (most recent call last)\n",
171 | "Cell In[8], line 8\n",
172 | " 4 temp_device = device\n",
173 | " 5 #if device == \"mps\":\n",
174 | " 6 # device = \"cpu\"\n",
175 | "----> 8 counts = mandelbrot(\n",
176 | " 9 # render_size=(1920,1080), # HD\n",
177 | " 10 render_size=(640, 480),\n",
178 | " 11 center=(-0.5, 0),\n",
179 | " 12 zoom=4,\n",
180 | " 13 cycles=200,\n",
181 | " 14 )\n",
182 | " 16 img = render(counts)\n",
183 | " 17 print(img.size)\n",
184 | "\n",
185 | "Cell In[7], line 52, in mandelbrot(render_size, center, zoom, cycles)\n",
186 | " 48 imag_range = torch.arange(\n",
187 | " 49 imag_start, imag_end, f, dtype=torch.float32, device=device\n",
188 | " 50 )\n",
189 | " 51 real, imag = torch.meshgrid(real_range, imag_range, indexing=\"ij\")\n",
190 | "---> 52 grid_c = torch.complex(imag, real)\n",
191 | " 53 current_values = torch.clone(grid_c)\n",
192 | " 54 counts = torch.Tensor(torch.zeros_like(grid_c, dtype=torch.float32))\n",
193 | "\n",
194 | "NotImplementedError: The operator 'aten::complex.out' is not currently implemented for the MPS device. If you want this op to be added in priority during the prototype phase of this feature, please comment on https://github.com/pytorch/pytorch/issues/77764. As a temporary fix, you can set the environment variable `PYTORCH_ENABLE_MPS_FALLBACK=1` to use the CPU as a fallback for this op. WARNING: this will be slower than running natively on MPS.\n",
195 | "```\n",
196 | "\n",
197 | "You can sometimes fix this by adding these lines at the top of your code:\n",
198 | "\n",
199 | "```\n",
200 | "import os\n",
201 | "os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'\n",
202 | "```\n",
203 | "\n",
204 | "This will not always work, as not all PyTorch code honors this setting."
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": null,
210 | "metadata": {},
211 | "outputs": [],
212 | "source": []
213 | }
214 | ],
215 | "metadata": {
216 | "anaconda-cloud": {},
217 | "kernelspec": {
218 | "display_name": "Python 3.11 (torch)",
219 | "language": "python",
220 | "name": "pytorch"
221 | },
222 | "language_info": {
223 | "codemirror_mode": {
224 | "name": "ipython",
225 | "version": 3
226 | },
227 | "file_extension": ".py",
228 | "mimetype": "text/x-python",
229 | "name": "python",
230 | "nbconvert_exporter": "python",
231 | "pygments_lexer": "ipython3",
232 | "version": "3.11.11"
233 | }
234 | },
235 | "nbformat": 4,
236 | "nbformat_minor": 4
237 | }
238 |
--------------------------------------------------------------------------------
/install/torch-conda.yml:
--------------------------------------------------------------------------------
1 | name: torch
2 | channels:
3 | - pytorch
4 | - conda-forge
5 | dependencies:
6 | - python=3.11
7 | - pip>=19.0
8 | - pytorch
9 | - torchvision
10 | - torchaudio
11 | - jupyter
12 | - scikit-learn
13 | - scipy
14 | - pandas
15 | - pandas-datareader
16 | - matplotlib
17 | - pillow
18 | - tqdm
19 | - requests
20 | - h5py
21 | - pyyaml
22 | - flask
23 | - boto3
24 | - ipykernel
25 | - pip:
26 | - bayesian-optimization
27 | - gym
28 | - kaggle
29 |
30 |
--------------------------------------------------------------------------------
/install/torch-cuda.yml:
--------------------------------------------------------------------------------
1 | name: torch
2 | channels:
3 | - pytorch
4 | - nvidia
5 | dependencies:
6 | - python=3.11
7 | - pip>=19.0
8 | - pytorch
9 | - torchvision
10 | - torchaudio
11 | - jupyter
12 | - scikit-learn
13 | - scipy
14 | - pandas
15 | - pandas-datareader
16 | - matplotlib
17 | - pillow
18 | - tqdm
19 | - requests
20 | - h5py
21 | - pyyaml
22 | - flask
23 | - boto3
24 | - ipykernel
25 | - pip:
26 | - bayesian-optimization
27 | - facenet-pytorch
28 |
29 |
--------------------------------------------------------------------------------
/install/torch.yml:
--------------------------------------------------------------------------------
1 | name: torch
2 | channels:
3 | - pytorch
4 | - conda-forge
5 | dependencies:
6 | - python=3.11
7 | - pip>=19.0
8 | - pytorch
9 | - torchvision
10 | - jupyter
11 | - scikit-learn
12 | - scipy
13 | - pandas
14 | - pandas-datareader
15 | - matplotlib
16 | - pillow
17 | - tqdm
18 | - requests
19 | - h5py
20 | - pyyaml
21 | - flask
22 | - boto3
23 | - ipykernel
24 | - pip:
25 | - bayesian-optimization
26 | - gym
27 | - kaggle
28 |
29 |
--------------------------------------------------------------------------------
/intro.md:
--------------------------------------------------------------------------------
1 | # Introduction
2 |
3 | Starting in the spring semester of 2016, I began teaching the T81-558 Applications of Deep Learning course for Washington University in St. Louis. I never liked Microsoft Powerpoint for technical classes, so I placed my course material, examples, and assignments on GitHub. This material started with code and grew to include enough description that this information evolved into the book you see before you.
4 |
5 | I license the book's text under the Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) license. Similarly, I offer the book's code under the LGPL license. Though I provide this book both as a relatively inexpensive paperback and Amazon Kindle, you can obtain the book's PDF here:
6 |
7 | * [https://arxiv.org/abs/2009.05673](https://arxiv.org/abs/2009.05673)
8 |
9 | The book's code is available at the following GitHub repository:
10 |
11 | * [https://github.com/jeffheaton/t81_558_deep_learning](https://github.com/jeffheaton/t81_558_deep_learning)
12 |
13 | If you purchased this book from me, you have my sincere thanks for supporting my ongoing projects. I sell the book as a relatively low-cost paperback and Kindle ebook for those who prefer that format or wish to support my projects. I suggest that you look at the above GitHub site, as all of the code for this book is presented there as Jupyter notebooks that are entirely Google CoLab compatible.
14 |
15 | This book focuses on the application of deep neural networks. There is some theory; however, I do not focus on recreating neural network fundamentals that tech companies already provide in popular frameworks. The book begins with a quick review of the Python fundamentals needed to learn the subsequent chapters. With Python preliminaries covered, we start with classification and regression neural networks in Keras.
16 |
17 | In my opinion, PyTorch, Jax, and Keras are the top three deep learning frameworks. When I first created this course, neither PyTorch nor JAX existed. I began the course based on TensorFlow and migrated to Keras the following semester. I believe TensorFlow remains a good choice for a course focusing on the application of deep learning. Some of the third-party libraries used for this course use PyTorch; as a result, you will see a blend of both technologies. StyleGAN and TabGAN both make use of PyTorch.
18 |
19 | The technologies that this course is based on change rapidly. I update the Kindle and paperback books according to this schedule. Formal updates to this book typically occur just before each academic year's fall and spring semesters.
20 |
21 | The source document for this book is Jupyter notebooks. I wrote a Python utility that transforms my course Jupyter notebooks into this book. It is entirely custom, and I may release it as a project someday. However, because this book is based on code and updated twice a year, you may find the occasional typo. I try to minimize errors as much as possible, but please let me know if you see something. I use [Grammarly](https://www.grammarly.com/) to find textual issues, but due to the frequently updated nature of this book, I do not run it through a formal editing cycle for each release. I also double-check the code with each release to ensure CoLab, Keras, or another third-party library did not make a breaking change.
22 |
23 | The book and course continue to be a work in progress. Many have contributed code, suggestions, fixes, and clarifications to the GitHub repository. Please submit a GitHub issue or a push request with a solution if you find an error.
24 |
--------------------------------------------------------------------------------
/jeffs_helpful.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Collection of Helpful Functions for [Class](https://sites.wustl.edu/jeffheaton/t81-558/)\n",
8 | "\n",
9 | "This is a collection of helpful functions that I will introduce during this course. "
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "import base64\n",
19 | "import os\n",
20 | "\n",
21 | "import matplotlib.pyplot as plt\n",
22 | "import numpy as np\n",
23 | "import pandas as pd\n",
24 | "import requests\n",
25 | "from sklearn import preprocessing\n",
26 | "\n",
27 | "\n",
28 | "# Encode text values to dummy variables(i.e. [1,0,0],[0,1,0],[0,0,1] for red,green,blue)\n",
29 | "def encode_text_dummy(df, name):\n",
30 | " dummies = pd.get_dummies(df[name])\n",
31 | " for x in dummies.columns:\n",
32 | " dummy_name = f\"{name}-{x}\"\n",
33 | " df[dummy_name] = dummies[x]\n",
34 | " df.drop(name, axis=1, inplace=True)\n",
35 | "\n",
36 | "\n",
37 | "# Encode text values to a single dummy variable. The new columns (which do not replace the old) will have a 1\n",
38 | "# at every location where the original column (name) matches each of the target_values. One column is added for\n",
39 | "# each target value.\n",
40 | "def encode_text_single_dummy(df, name, target_values):\n",
41 | " for tv in target_values:\n",
42 | " l = list(df[name].astype(str))\n",
43 | " l = [1 if str(x) == str(tv) else 0 for x in l]\n",
44 | " name2 = f\"{name}-{tv}\"\n",
45 | " df[name2] = l\n",
46 | "\n",
47 | "\n",
48 | "# Encode text values to indexes(i.e. [1],[2],[3] for red,green,blue).\n",
49 | "def encode_text_index(df, name):\n",
50 | " le = preprocessing.LabelEncoder()\n",
51 | " df[name] = le.fit_transform(df[name])\n",
52 | " return le.classes_\n",
53 | "\n",
54 | "\n",
55 | "# Encode a numeric column as zscores\n",
56 | "def encode_numeric_zscore(df, name, mean=None, sd=None):\n",
57 | " if mean is None:\n",
58 | " mean = df[name].mean()\n",
59 | "\n",
60 | " if sd is None:\n",
61 | " sd = df[name].std()\n",
62 | "\n",
63 | " df[name] = (df[name] - mean) / sd\n",
64 | "\n",
65 | "\n",
66 | "# Convert all missing values in the specified column to the median\n",
67 | "def missing_median(df, name):\n",
68 | " med = df[name].median()\n",
69 | " df[name] = df[name].fillna(med)\n",
70 | "\n",
71 | "\n",
72 | "# Convert all missing values in the specified column to the default\n",
73 | "def missing_default(df, name, default_value):\n",
74 | " df[name] = df[name].fillna(default_value)\n",
75 | "\n",
76 | "\n",
77 | "# Convert a Pandas dataframe to the x,y inputs that TensorFlow needs\n",
78 | "def to_xy(df, target):\n",
79 | " result = []\n",
80 | " for x in df.columns:\n",
81 | " if x != target:\n",
82 | " result.append(x)\n",
83 | " # find out the type of the target column. Is it really this hard? :(\n",
84 | " target_type = df[target].dtypes\n",
85 | " target_type = target_type[0] if hasattr(\n",
86 | " target_type, '__iter__') else target_type\n",
87 | " # Encode to int for classification, float otherwise. TensorFlow likes 32 bits.\n",
88 | " if target_type in (np.int64, np.int32):\n",
89 | " # Classification\n",
90 | " dummies = pd.get_dummies(df[target])\n",
91 | " return df[result].values.astype(np.float32), dummies.values.astype(np.float32)\n",
92 | " # Regression\n",
93 | " return df[result].values.astype(np.float32), df[[target]].values.astype(np.float32)\n",
94 | "\n",
95 | "# Nicely formatted time string\n",
96 | "\n",
97 | "\n",
98 | "def hms_string(sec_elapsed):\n",
99 | " h = int(sec_elapsed / (60 * 60))\n",
100 | " m = int((sec_elapsed % (60 * 60)) / 60)\n",
101 | " s = sec_elapsed % 60\n",
102 | " return f\"{h}:{m:>02}:{s:>05.2f}\"\n",
103 | "\n",
104 | "\n",
105 | "# Regression chart.\n",
106 | "def chart_regression(pred, y, sort=True):\n",
107 | " t = pd.DataFrame({'pred': pred, 'y': y.flatten()})\n",
108 | " if sort:\n",
109 | " t.sort_values(by=['y'], inplace=True)\n",
110 | " plt.plot(t['y'].tolist(), label='expected')\n",
111 | " plt.plot(t['pred'].tolist(), label='prediction')\n",
112 | " plt.ylabel('output')\n",
113 | " plt.legend()\n",
114 | " plt.show()\n",
115 | "\n",
116 | "# Remove all rows where the specified column is +/- sd standard deviations\n",
117 | "\n",
118 | "\n",
119 | "def remove_outliers(df, name, sd):\n",
120 | " drop_rows = df.index[(np.abs(df[name] - df[name].mean())\n",
121 | " >= (sd * df[name].std()))]\n",
122 | " df.drop(drop_rows, axis=0, inplace=True)\n",
123 | "\n",
124 | "\n",
125 | "# Encode a column to a range between normalized_low and normalized_high.\n",
126 | "def encode_numeric_range(df, name, normalized_low=-1, normalized_high=1,\n",
127 | " data_low=None, data_high=None):\n",
128 | " if data_low is None:\n",
129 | " data_low = min(df[name])\n",
130 | " data_high = max(df[name])\n",
131 | "\n",
132 | " df[name] = ((df[name] - data_low) / (data_high - data_low)) \\\n",
133 | " * (normalized_high - normalized_low) + normalized_low\n"
134 | ]
135 | }
136 | ],
137 | "metadata": {
138 | "anaconda-cloud": {},
139 | "kernelspec": {
140 | "display_name": "Python 3.9 (tensorflow)",
141 | "language": "python",
142 | "name": "tensorflow"
143 | },
144 | "language_info": {
145 | "codemirror_mode": {
146 | "name": "ipython",
147 | "version": 3
148 | },
149 | "file_extension": ".py",
150 | "mimetype": "text/x-python",
151 | "name": "python",
152 | "nbconvert_exporter": "python",
153 | "pygments_lexer": "ipython3",
154 | "version": "3.9.7"
155 | },
156 | "varInspector": {
157 | "cols": {
158 | "lenName": 16,
159 | "lenType": 16,
160 | "lenVar": 40
161 | },
162 | "kernels_config": {
163 | "python": {
164 | "delete_cmd_postfix": "",
165 | "delete_cmd_prefix": "del ",
166 | "library": "var_list.py",
167 | "varRefreshCmd": "print(var_dic_list())"
168 | },
169 | "r": {
170 | "delete_cmd_postfix": ") ",
171 | "delete_cmd_prefix": "rm(",
172 | "library": "var_list.r",
173 | "varRefreshCmd": "cat(var_dic_list()) "
174 | }
175 | },
176 | "types_to_exclude": [
177 | "module",
178 | "function",
179 | "builtin_function_or_method",
180 | "instance",
181 | "_Feature"
182 | ],
183 | "window_display": false
184 | }
185 | },
186 | "nbformat": 4,
187 | "nbformat_minor": 1
188 | }
189 |
--------------------------------------------------------------------------------
/mpg.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jeffheaton/app_deep_learning/8531f9b9db8e55d16660d51135da20808c80cc7d/mpg.pkl
--------------------------------------------------------------------------------
/person.json:
--------------------------------------------------------------------------------
1 | {
2 | "firstName": "John",
3 | "lastName": "Smith",
4 | "isAlive": true,
5 | "age": 27,
6 | "address": {
7 | "streetAddress": "21 2nd Street",
8 | "city": "New York",
9 | "state": "NY",
10 | "postalCode": "10021-3100"
11 | },
12 | "phoneNumbers": [
13 | {
14 | "type": "home",
15 | "number": "212 555-1234"
16 | },
17 | {
18 | "type": "office",
19 | "number": "646 555-4567"
20 | },
21 | {
22 | "type": "mobile",
23 | "number": "123 456-7890"
24 | }
25 | ],
26 | "children": [],
27 | "spouse": null
28 | }
--------------------------------------------------------------------------------
/t81_558_class_01_5_python_functional.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "
\n"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# T81-558: Applications of Deep Neural Networks\n",
15 | "\n",
16 | "**Module 1: Python Preliminaries**\n",
17 | "\n",
18 | "- Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), McKelvey School of Engineering, [Washington University in St. Louis](https://engineering.wustl.edu/Programs/Pages/default.aspx)\n",
19 | "- For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/).\n"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "# Module 1 Material\n",
27 | "\n",
28 | "* Part 1.1: Course Overview [[Video]](https://www.youtube.com/watch?v=r7eExQWKzdc&list=PLjy4p-07OYzuy_lHcRW8lPTLPTTOmUpmi) [[Notebook]](t81_558_class_01_1_overview.ipynb)\n",
29 | "* Part 1.2: Introduction to Python [[Video]](https://www.youtube.com/watch?v=ZAOOinw51no&list=PLjy4p-07OYzuy_lHcRW8lPTLPTTOmUpmi) [[Notebook]](t81_558_class_01_2_intro_python.ipynb)\n",
30 | "* Part 1.3: Python Lists, Dictionaries, Sets and JSON [[Video]](https://www.youtube.com/watch?v=5jZWWLO71bE&list=PLjy4p-07OYzuy_lHcRW8lPTLPTTOmUpmi) [[Notebook]](t81_558_class_01_3_python_collections.ipynb)\n",
31 | "* Part 1.4: File Handling [[Video]](https://www.youtube.com/watch?v=CPrp1Sm-AhQ&list=PLjy4p-07OYzuy_lHcRW8lPTLPTTOmUpmi) [[Notebook]](t81_558_class_01_4_python_files.ipynb)\n",
32 | "* **Part 1.5: Functions, Lambdas, and Map/Reduce** [[Video]](https://www.youtube.com/watch?v=DEg8a22mtBs&list=PLjy4p-07OYzuy_lHcRW8lPTLPTTOmUpmi) [[Notebook]](t81_558_class_01_5_python_functional.ipynb)"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {},
38 | "source": [
39 | "# Google CoLab Instructions\n",
40 | "\n",
41 | "The following code ensures that Google CoLab is running and maps Google Drive if needed.\n"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 1,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "name": "stdout",
51 | "output_type": "stream",
52 | "text": [
53 | "Note: not using Google CoLab\n"
54 | ]
55 | }
56 | ],
57 | "source": [
58 | "try:\n",
59 | " from google.colab import drive\n",
60 | "\n",
61 | " COLAB = True\n",
62 | " print(\"Note: using Google CoLab\")\n",
63 | "except:\n",
64 | " print(\"Note: not using Google CoLab\")\n",
65 | " COLAB = False"
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "# Part 1.5: Functions, Lambdas, and Map/Reduce\n",
73 | "\n",
74 | "Functions, **lambdas**, and **map/reduce** can allow you to process your data in advanced ways. We will introduce these techniques here and expand on them in the next module, which will discuss Pandas.\n",
75 | "\n",
76 | "Function parameters can be named or unnamed in Python. Default values can also be used. Consider the following function.\n"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": 2,
82 | "metadata": {},
83 | "outputs": [
84 | {
85 | "name": "stdout",
86 | "output_type": "stream",
87 | "text": [
88 | "Hello John, this is Jeff.\n",
89 | "Goodbye John, this is Jeff.\n",
90 | "Goodbye John, this is Jeff.\n"
91 | ]
92 | }
93 | ],
94 | "source": [
95 | "def say_hello(speaker, person_to_greet, greeting=\"Hello\"):\n",
96 | " print(f\"{greeting} {person_to_greet}, this is {speaker}.\")\n",
97 | "\n",
98 | "\n",
99 | "say_hello(\"Jeff\", \"John\")\n",
100 | "say_hello(\"Jeff\", \"John\", \"Goodbye\")\n",
101 | "say_hello(speaker=\"Jeff\", person_to_greet=\"John\", greeting=\"Goodbye\")"
102 | ]
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "metadata": {},
107 | "source": [
108 | "A function is a way to capture code that is commonly executed. Consider the following function that can be used to trim white space from a string capitalize the first letter.\n"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": 3,
114 | "metadata": {},
115 | "outputs": [],
116 | "source": [
117 | "def process_string(str):\n",
118 | " t = str.strip()\n",
119 | " return t[0].upper() + t[1:]"
120 | ]
121 | },
122 | {
123 | "cell_type": "markdown",
124 | "metadata": {},
125 | "source": [
126 | "This function can now be called quite easily.\n"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": 4,
132 | "metadata": {},
133 | "outputs": [
134 | {
135 | "name": "stdout",
136 | "output_type": "stream",
137 | "text": [
138 | "\"Hello\"\n"
139 | ]
140 | }
141 | ],
142 | "source": [
143 | "str = process_string(\" hello \")\n",
144 | "print(f'\"{str}\"')"
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "metadata": {},
150 | "source": [
151 | "Python's **map** is a very useful function that is provided in many different programming languages. The **map** function takes a **list** and applies a function to each member of the **list** and returns a second **list** that is the same size as the first.\n"
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": 5,
157 | "metadata": {},
158 | "outputs": [
159 | {
160 | "data": {
161 | "text/plain": [
162 | "['Apple', 'Pear', 'Orange', 'Pine apple']"
163 | ]
164 | },
165 | "execution_count": 5,
166 | "metadata": {},
167 | "output_type": "execute_result"
168 | }
169 | ],
170 | "source": [
171 | "l = [\" apple \", \"pear \", \"orange\", \"pine apple \"]\n",
172 | "list(map(process_string, l))"
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {},
178 | "source": [
179 | "## Map\n",
180 | "\n",
181 | "The **map** function is very similar to the Python **comprehension** that we previously explored. The following **comprehension** accomplishes the same task as the previous call to **map**.\n"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": 6,
187 | "metadata": {},
188 | "outputs": [
189 | {
190 | "name": "stdout",
191 | "output_type": "stream",
192 | "text": [
193 | "['Apple', 'Pear', 'Orange', 'Pine apple']\n"
194 | ]
195 | }
196 | ],
197 | "source": [
198 | "l = [\" apple \", \"pear \", \"orange\", \"pine apple \"]\n",
199 | "l2 = [process_string(x) for x in l]\n",
200 | "print(l2)"
201 | ]
202 | },
203 | {
204 | "cell_type": "markdown",
205 | "metadata": {},
206 | "source": [
207 | "The choice of using a **map** function or **comprehension** is up to the programmer. I tend to prefer **map** since it is so common in other programming languages.\n"
208 | ]
209 | },
210 | {
211 | "cell_type": "markdown",
212 | "metadata": {},
213 | "source": [
214 | "## Filter\n",
215 | "\n",
216 | "While a **map function** always creates a new **list** of the same size as the original, the **filter** function creates a potentially smaller **list**.\n"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": 7,
222 | "metadata": {},
223 | "outputs": [
224 | {
225 | "name": "stdout",
226 | "output_type": "stream",
227 | "text": [
228 | "[10, 20]\n"
229 | ]
230 | }
231 | ],
232 | "source": [
233 | "def greater_than_five(x):\n",
234 | " return x > 5\n",
235 | "\n",
236 | "\n",
237 | "l = [1, 10, 20, 3, -2, 0]\n",
238 | "l2 = list(filter(greater_than_five, l))\n",
239 | "print(l2)"
240 | ]
241 | },
242 | {
243 | "cell_type": "markdown",
244 | "metadata": {},
245 | "source": [
246 | "## Lambda\n",
247 | "\n",
248 | "It might seem somewhat tedious to have to create an entire function just to check to see if a value is greater than 5. A **lambda** saves you this effort. A lambda is essentially an unnamed function.\n"
249 | ]
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": 8,
254 | "metadata": {},
255 | "outputs": [
256 | {
257 | "name": "stdout",
258 | "output_type": "stream",
259 | "text": [
260 | "[10, 20]\n"
261 | ]
262 | }
263 | ],
264 | "source": [
265 | "l = [1, 10, 20, 3, -2, 0]\n",
266 | "l2 = list(filter(lambda x: x > 5, l))\n",
267 | "print(l2)"
268 | ]
269 | },
270 | {
271 | "cell_type": "markdown",
272 | "metadata": {},
273 | "source": [
274 | "## Reduce\n",
275 | "\n",
276 | "Finally, we will make use of **reduce**. Like **filter** and **map** the **reduce** function also works on a **list**. However, the result of the **reduce** is a single value. Consider if you wanted to sum the **values** of a **list**. The sum is implemented by a **lambda**.\n"
277 | ]
278 | },
279 | {
280 | "cell_type": "code",
281 | "execution_count": 9,
282 | "metadata": {},
283 | "outputs": [
284 | {
285 | "name": "stdout",
286 | "output_type": "stream",
287 | "text": [
288 | "32\n"
289 | ]
290 | }
291 | ],
292 | "source": [
293 | "from functools import reduce\n",
294 | "\n",
295 | "l = [1, 10, 20, 3, -2, 0]\n",
296 | "result = reduce(lambda x, y: x + y, l)\n",
297 | "print(result)"
298 | ]
299 | }
300 | ],
301 | "metadata": {
302 | "anaconda-cloud": {},
303 | "kernelspec": {
304 | "display_name": "Python 3.9 (torch)",
305 | "language": "python",
306 | "name": "pytorch"
307 | },
308 | "language_info": {
309 | "codemirror_mode": {
310 | "name": "ipython",
311 | "version": 3
312 | },
313 | "file_extension": ".py",
314 | "mimetype": "text/x-python",
315 | "name": "python",
316 | "nbconvert_exporter": "python",
317 | "pygments_lexer": "ipython3",
318 | "version": "3.9.16"
319 | },
320 | "varInspector": {
321 | "cols": {
322 | "lenName": 16,
323 | "lenType": 16,
324 | "lenVar": 40
325 | },
326 | "kernels_config": {
327 | "python": {
328 | "delete_cmd_postfix": "",
329 | "delete_cmd_prefix": "del ",
330 | "library": "var_list.py",
331 | "varRefreshCmd": "print(var_dic_list())"
332 | },
333 | "r": {
334 | "delete_cmd_postfix": ") ",
335 | "delete_cmd_prefix": "rm(",
336 | "library": "var_list.r",
337 | "varRefreshCmd": "cat(var_dic_list()) "
338 | }
339 | },
340 | "types_to_exclude": [
341 | "module",
342 | "function",
343 | "builtin_function_or_method",
344 | "instance",
345 | "_Feature"
346 | ],
347 | "window_display": false
348 | }
349 | },
350 | "nbformat": 4,
351 | "nbformat_minor": 4
352 | }
353 |
--------------------------------------------------------------------------------
/t81_558_class_06_1_transformers.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "7bUMANd9dKsQ"
7 | },
8 | "source": [
9 | "
"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {
15 | "id": "nwue4RDMdKsS"
16 | },
17 | "source": [
18 | "# T81-558: Applications of Deep Neural Networks\n",
19 | "**Module 6: ChatGPT and Large Language Models**\n",
20 | "* Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), McKelvey School of Engineering, [Washington University in St. Louis](https://engineering.wustl.edu/Programs/Pages/default.aspx)\n",
21 | "* For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/)."
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {
27 | "id": "iZEDDiENdKsS"
28 | },
29 | "source": [
30 | "# Module 6 Material\n",
31 | "\n",
32 | "* **Part 6.1: Introduction to Transformers** [[Video]](https://www.youtube.com/watch?v=mn6r5PYJcu0&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_06_1_transformers.ipynb)\n",
33 | "* Part 6.2: Accessing the ChatGPT API [[Video]](https://www.youtube.com/watch?v=tcdscXl4o5w&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_06_2_chat_gpt.ipynb)\n",
34 | "* Part 6.3: LLM Memory [[Video]](https://www.youtube.com/watch?v=oGQ3TQx1Qs8&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_06_3_llm_memory.ipynb)\n",
35 | "* Part 6.4: Introduction to Embeddings [[Video]](https://www.youtube.com/watch?v=e6kcs9Uj_ps&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_06_4_embedding.ipynb)\n",
36 | "* Part 6.5: Prompt Engineering [[Video]](https://www.youtube.com/watch?v=miTpIDR7k6c&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_06_5_prompt_engineering.ipynb)"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {
42 | "id": "RrDcW1cUdKsT"
43 | },
44 | "source": [
45 | "# Google CoLab Instructions\n",
46 | "\n",
47 | "The following code ensures that Google CoLab is running the correct version of TensorFlow.\n",
48 | " Running the following code will map your GDrive to ```/content/drive```."
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": null,
54 | "metadata": {
55 | "id": "E97PfPCGdKsT",
56 | "outputId": "882f0b5f-005b-4055-e708-0b62433f67ad"
57 | },
58 | "outputs": [
59 | {
60 | "name": "stdout",
61 | "output_type": "stream",
62 | "text": [
63 | "Note: not using Google CoLab\n"
64 | ]
65 | }
66 | ],
67 | "source": [
68 | "try:\n",
69 | " from google.colab import drive\n",
70 | " COLAB = True\n",
71 | " print(\"Note: using Google CoLab\")\n",
72 | "except:\n",
73 | " print(\"Note: not using Google CoLab\")\n",
74 | " COLAB = False"
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {
80 | "id": "NVW5XjandKsU"
81 | },
82 | "source": [
83 | "# Part 6.1: Introduction to Transformers\n",
84 | "\n",
85 | "Transformers are neural networks that provide state-of-the-art solutions for many of the problems previously assigned to recurrent neural networks. [[Cite:vaswani2017attention]](https://arxiv.org/abs/1706.03762) Sequences can form both the input and the output of a neural network, examples of such configurations include::\n",
86 | "\n",
87 | "* Vector to Sequence - Image captioning\n",
88 | "* Sequence to Vector - Sentiment analysis\n",
89 | "* Sequence to Sequence - Language translation \n",
90 | "\n",
91 | "Sequence-to-sequence allows an input sequence to produce an output sequence based on an input sequence. Transformers focus primarily on this sequence-to-sequence configuration.\n",
92 | "\n",
93 | "## High-Level Overview of Transformers\n",
94 | "\n",
95 | "This course focuses primarily on the application of deep neural networks. The focus will be on presenting data to a transformer and a transformer's major components. As a result, we will not focus on implementing a transformer at the lowest level. The following section provides an overview of critical internal parts of a transformer, such as residual connections and attention. In the next chapter, we will use transformers from [Hugging Face](https://huggingface.co/) to perform natural language processing with transformers. If you are interested in implementing a transformer from scratch, Keras provides a comprehensive [example](https://www.tensorflow.org/text/tutorials/transformer).\n",
96 | "\n",
97 | "Figure 10.TRANS-1 presents a high-level view of a transformer for language translation.\n",
98 | "\n",
99 | "**Figure 10.TRANS-1: High Level View of a Translation Transformer**\n",
100 | "\n",
101 | "\n",
102 | "We use a transformer that translates between English and Spanish for this example. We present the English sentence \"the cat likes milk\" and receive a Spanish translation of \"al gato le gusta la leche.\" \n",
103 | "\n",
104 | "We begin by placing the English source sentence between the beginning and ending tokens. This input can be of any length, and we presented it to the neural network as a ragged Tensor. Because the Tensor is ragged, no padding is necessary. Such input is acceptable for the attention layer that will receive the source sentence. The encoder transforms this ragged input into a hidden state containing a series of key-value pairs representing the knowledge in the source sentence. The encoder understands to read English and convert to a hidden state. The decoder understands how to output Spanish from this hidden state.\n",
105 | "\n",
106 | "We initially present the decoder with the hidden state and the starting token. The decoder will predict the probabilities of all words in its vocabulary. The word with the highest probability is the first word of the sentence. \n",
107 | "\n",
108 | "The highest probability word is attached concatenated to the translated sentence, initially containing only the beginning token. This process continues, growing the translated sentence in each iteration until the decoder predicts the ending token. \n",
109 | "\n",
110 | "## Transformer Hyperparameters\n",
111 | "\n",
112 | "Before we describe how these layers fit together, we must consider the following transformer hyperparameters, along with default settings from the Keras transformer example:\n",
113 | "\n",
114 | "* num_layers = 4\n",
115 | "* d_model = 128\n",
116 | "* dff = 512\n",
117 | "* num_heads = 8\n",
118 | "* dropout_rate = 0.1\n",
119 | "\n",
120 | "Multiple encoder and decoder layers can be present. The **num_layers** hyperparameter specifies how many encoder and decoder layers there are. The expected tensor shape for the input to the encoder layer is the same as the output produced; as a result, you can easily stack these layers.\n",
121 | "\n",
122 | "We will see embedding layers in the next chapter. However, you can think of an embedding layer as a dictionary for now. Each entry in the embedding corresponds to each word in a fixed-size vocabulary. Similar words should have similar vectors. The **d_model** hyperparameter specifies the size of the embedding vector. Though you will sometimes preload embeddings from a project such as [Word2vec](https://radimrehurek.com/gensim/models/word2vec.html) or [GloVe](https://nlp.stanford.edu/projects/glove/), the optimizer can train these embeddings with the rest of the transformer. Training your embeddings allows the **d_model** hyperparameter to set to any desired value. If you transfer the embeddings, you must set the **d_model** hyperparameter to the same value as the transferred embeddings.\n",
123 | "\n",
124 | "The **dff** hyperparameter specifies the size of the dense feedforward layers. The **num_heads** hyperparameter sets the number of attention layers heads. Finally, the dropout_rate specifies a dropout percentage to combat overfitting. We discussed dropout previously in this book.\n",
125 | "\n",
126 | "## Inside a Transformer\n",
127 | "\n",
128 | "In this section, we will examine the internals of a transformer so that you become familiar with essential concepts such as:\n",
129 | "\n",
130 | "* Embeddings\n",
131 | "* Positional Encoding\n",
132 | "* Attention and Self-Attention\n",
133 | "* Residual Connection\n",
134 | "\n",
135 | "You can see a lower-level diagram of a transformer in Figure 10.TRANS-2.\n",
136 | "\n",
137 | "**Figure 10.TRANS-2: Architectural Diagram from the Paper**\n",
138 | "\n",
139 | "\n",
140 | "While the original transformer paper is titled \"Attention is All you Need,\" attention isn't the only layer type you need. The transformer also contains dense layers. However, the title \"Attention and Dense Layers are All You Need\" isn't as catchy.\n",
141 | "\n",
142 | "The transformer begins by tokenizing the input English sentence. Tokens may or may not be words. Generally, familiar parts of words are tokenized and become building blocks of longer words. This tokenization allows common suffixes and prefixes to be understood independently of their stem word. Each token becomes a numeric index that the transformer uses to look up the vector. There are several special tokens:\n",
143 | "\n",
144 | "* Index 0 = Pad\n",
145 | "* Index 1 = Unknow\n",
146 | "* Index 2 = Start token\n",
147 | "* Index 3 = End token\n",
148 | "\n",
149 | "The transformer uses index 0 when we must pad unused space at the end of a tensor. Index 1 is for unknown words. The starting and ending tokens are provided by indexes 2 and 3.\n",
150 | "\n",
151 | "The token vectors are simply the inputs to the attention layers; there is no implied order or position. The transformer adds the slopes of a sine and cosine wave to the token vectors to encode position. \n",
152 | "\n",
153 | "Attention layers have three inputs: key (k), value(v), and query (q). This layer is self-attention if the query, key, and value are the same. The key and value pairs specify the information that the query operates upon. The attention layer learns what positions of data to focus upon.\n",
154 | "\n",
155 | "The transformer presents the position encoded embedding vectors to the first self-attention segment in the encoder layer. The output from the attention is normalized and ultimately becomes the hidden state after all encoder layers are processed. \n",
156 | "\n",
157 | "The hidden state is only calculated once per query. Once the input Spanish sentence becomes a hidden state, this value is presented repeatedly to the decoder until the decoder forms the final Spanish sentence.\n",
158 | "\n",
159 | "This section presented a high-level introduction to transformers. In the next part, we will implement the encoder and apply it to time series. In the following chapter, we will use [Hugging Face](https://huggingface.co/) transformers to perform natural language processing.\n",
160 | "\n",
161 | "\n"
162 | ]
163 | },
164 | {
165 | "cell_type": "markdown",
166 | "metadata": {},
167 | "source": [
168 | "# Module 6 Assignment\n",
169 | "\n",
170 | "You can find the fifth assignment here: [assignment 6](https://github.com/jeffheaton/app_deep_learning/blob/main/assignments/assignment_yourname_t81_558_class6.ipynb)\n"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": null,
176 | "metadata": {},
177 | "outputs": [],
178 | "source": []
179 | }
180 | ],
181 | "metadata": {
182 | "anaconda-cloud": {},
183 | "colab": {
184 | "collapsed_sections": [],
185 | "name": "new_t81_558_class_10_4_intro_transformers.ipynb",
186 | "provenance": []
187 | },
188 | "kernelspec": {
189 | "display_name": "Python 3.11 (torch)",
190 | "language": "python",
191 | "name": "pytorch"
192 | },
193 | "language_info": {
194 | "codemirror_mode": {
195 | "name": "ipython",
196 | "version": 3
197 | },
198 | "file_extension": ".py",
199 | "mimetype": "text/x-python",
200 | "name": "python",
201 | "nbconvert_exporter": "python",
202 | "pygments_lexer": "ipython3",
203 | "version": "3.11.9"
204 | }
205 | },
206 | "nbformat": 4,
207 | "nbformat_minor": 4
208 | }
209 |
--------------------------------------------------------------------------------
/t81_558_class_06_5_prompt_engineering.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "hRuM9Pg2oBZQ"
7 | },
8 | "source": [
9 | "
\n"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {
15 | "id": "nPIDsF57oBZR"
16 | },
17 | "source": [
18 | "# T81-558: Applications of Deep Neural Networks\n",
19 | "**Module 11: Natural Language Processing and Speech Recognition**\n",
20 | "* Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), McKelvey School of Engineering, [Washington University in St. Louis](https://engineering.wustl.edu/Programs/Pages/default.aspx)\n",
21 | "* For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/)."
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {
27 | "id": "4iEXcrLBoBZS"
28 | },
29 | "source": [
30 | "# Module 6 Material\n",
31 | "\n",
32 | "* Part 6.1: Introduction to Transformers [[Video]](https://www.youtube.com/watch?v=mn6r5PYJcu0&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_06_1_transformers.ipynb)\n",
33 | "* Part 6.2: Accessing the ChatGPT API [[Video]](https://www.youtube.com/watch?v=tcdscXl4o5w&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_06_2_chat_gpt.ipynb)\n",
34 | "* Part 6.3: LLM Memory [[Video]](https://www.youtube.com/watch?v=oGQ3TQx1Qs8&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_06_3_llm_memory.ipynb)\n",
35 | "* Part 6.4: Introduction to Embeddings [[Video]](https://www.youtube.com/watch?v=e6kcs9Uj_ps&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_06_4_embedding.ipynb)\n",
36 | "* **Part 6.5: Prompt Engineering** [[Video]](https://www.youtube.com/watch?v=miTpIDR7k6c&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_06_5_prompt_engineering.ipynb)"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {
42 | "id": "92qJtJOToBZS"
43 | },
44 | "source": [
45 | "# Google CoLab Instructions\n",
46 | "\n",
47 | "The following code ensures that Google CoLab is running the correct version of TensorFlow."
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 1,
53 | "metadata": {
54 | "colab": {
55 | "base_uri": "https://localhost:8080/"
56 | },
57 | "id": "LvTVJyBMoBZS",
58 | "outputId": "13a4c525-a091-4ede-98dc-b814c34e0f44"
59 | },
60 | "outputs": [
61 | {
62 | "name": "stdout",
63 | "output_type": "stream",
64 | "text": [
65 | "Note: not using Google CoLab\n"
66 | ]
67 | }
68 | ],
69 | "source": [
70 | "try:\n",
71 | " from google.colab import drive\n",
72 | " COLAB = True\n",
73 | " print(\"Note: using Google CoLab\")\n",
74 | "except:\n",
75 | " print(\"Note: not using Google CoLab\")\n",
76 | " COLAB = False"
77 | ]
78 | },
79 | {
80 | "cell_type": "markdown",
81 | "metadata": {
82 | "id": "wTUM5nqjoBZT"
83 | },
84 | "source": [
85 | "# Part 6.5: Prompt Engineering\n",
86 | "\n",
87 | "Prompt engineering is an art and science of designing effective prompts to elicit desired responses from Large Language Models (LLMs). In the era of GPT-3, GPT-4, and subsequent models, crafting the right prompt can significantly impact the accuracy, relevance, and nuance of an LLM's response. This section delves into the intricacies of prompt engineering, its importance, and strategies for achieving desired outcomes.\n",
88 | "\n",
89 | "### Why Prompt Engineering Matters\n",
90 | "\n",
91 | "In the rapidly evolving landscape of machine learning and natural language processing, Large Language Models (LLMs) have emerged as a vanguard of computational prowess. While their sheer capacity to generate and understand text is undeniably impressive, the success of their applications heavily depends on the quality of interactions they receive. Enter the crucial role of prompt engineering. This discipline serves as the bridge between the user's intent and the model's vast knowledge, determining the efficiency, relevance, and even ethical considerations of the output. Just as a seasoned interviewer knows the right questions to extract deep insights, prompt engineering shapes the way we query LLMs, ensuring we derive maximum value from these sophisticated tools. Now, let's delve into the specifics of why this matters.\n",
92 | "\n",
93 | "* Quality Control: The way a prompt is phrased can determine whether the model’s response will be on-point or entirely off the mark. A well-engineered prompt can lead to more accurate and informative answers.\n",
94 | "\n",
95 | "* Cost Efficiency: With models that charge by token (words/characters), concise and precise prompts can save computation time and costs.\n",
96 | "\n",
97 | "* Bias Mitigation: Thoughtful prompt design can help in obtaining unbiased and objective responses, especially in topics prone to subjectivity.\n",
98 | "\n",
99 | "### Principles of Effective Prompt Engineering\n",
100 | "\n",
101 | "The art of crafting questions to elicit insightful answers is as old as human dialogue itself. But when directing questions at machine systems, especially the sophisticated realm of Large Language Models, the nuances are quite distinct. The principles of effective prompt engineering provide a roadmap, guiding users to shape their inquiries in ways that harness the full potential of LLMs. It's not merely about asking a question; it's about framing, clarity, and precision. These principles reflect an understanding of the model's architecture, biases, and response mechanisms, ensuring that we're not just communicating, but communicating effectively. As we delve into the core tenets of prompt engineering, we'll uncover the subtle art of eliciting the most accurate and comprehensive answers from these digital behemoths.\n",
102 | "\n",
103 | "* Clarity: A prompt should be clearly worded. Ambiguous prompts can yield unexpected or irrelevant answers.\n",
104 | "\n",
105 | "* Specificity: It's often useful to be specific about the kind of answer or format you want. For example, instead of asking, \"Tell me about whales,\" you could ask, \"Provide a concise overview of the life cycle of blue whales.\"\n",
106 | "\n",
107 | "* Iteration: Prompt engineering is an iterative process. If a response is unsatisfactory, adjust the prompt and try again. This iterative refinement often leads to better prompts over time.\n",
108 | "\n",
109 | "### Strategies and Techniques\n",
110 | "\n",
111 | "The realm of engaging with Large Language Models is akin to navigating a vast ocean with pockets of hidden treasures. While understanding the general contours of this ocean is valuable, it is the strategies and techniques of prompt engineering that act as the navigator's compass and map. These methodologies provide practical tools for traversing the complexities of LLM interactions, ensuring that users not only reach their desired destinations but also discover richer, more nuanced insights along the journey. As we explore these strategies and techniques, we'll learn how to tailor our approach, refining our prompts to extract clarity, depth, and precision from the vast knowledge reservoirs of LLMs.\n",
112 | "\n",
113 | "* Prompt Pivoting: If an initial approach doesn’t provide the desired answer, consider asking the question in a different way or from a different angle.\n",
114 | "\n",
115 | "* Explicit Instructions: Specify the format or structure of the answer you want. For example: \"List the following in bullet points...\" or \"Provide an answer in no more than three sentences.\"\n",
116 | "\n",
117 | "* Grounding Context: Give the model context if necessary. Instead of just asking, \"How does it work?\", you might say, \"Explain how photosynthesis works in plants.\"\n",
118 | "\n",
119 | "* Challenge Assumptions: LLMs might make assumptions based on the most common interpretation of a prompt. If you want to bypass these assumptions, state them explicitly. For example, \"Ignoring financial constraints, explain the process of space tourism.\"\n",
120 | "\n",
121 | "* Bias Checks: If you're concerned about potential biases in the model's answer, you can request the model to provide multiple viewpoints or explicitly ask for an unbiased response.\n",
122 | "\n",
123 | "* Temperature and Max Tokens: Some models allow for parameters like \"temperature\" (a measure of randomness) and \"max tokens\" (limit on response length). Playing with these parameters can affect the model's verbosity and creativity.\n",
124 | "\n",
125 | "### Challenges\n",
126 | "\n",
127 | "Venturing into the world of Large Language Models brings with it a promise of transformative insights, but like any expedition into uncharted territories, it's not without its challenges. Even as we harness sophisticated strategies and adhere to established principles, the dynamic nature of LLMs can present hurdles that require both foresight and adaptability. These challenges remind us that while technology has made leaps and bounds, it's not infallible. Recognizing and navigating these potential pitfalls is integral to ensuring a productive interaction with LLMs. As we delve deeper into these challenges, we will equip ourselves with a more holistic understanding, preparing to meet the unexpected and ensuring that our engagements with these digital giants are both meaningful and informed.\n",
128 | "\n",
129 | "* Overfitting to Prompts: Relying too heavily on a specific set of prompts for all situations can lead to overfitting, where the model might provide stereotyped or overly narrow responses.\n",
130 | "\n",
131 | "* Unexpected Responses: No matter how well you craft your prompt, LLMs might sometimes produce unexpected answers. It's essential to verify and fact-check critical information.\n",
132 | "\n",
133 | "* Bias and Ethical Concerns: LLMs can reflect biases present in their training data. Even with perfect prompt engineering, users should approach answers with a critical mindset, especially on sensitive topics.\n",
134 | "\n",
135 | "Prompt engineering is a powerful tool in harnessing the full potential of LLMs. As these models continue to evolve and become even more complex, the nuances of interacting with them effectively will only grow in importance. By mastering the principles and strategies of prompt engineering, users can achieve more accurate, efficient, and insightful interactions with LLMs.\n"
136 | ]
137 | }
138 | ],
139 | "metadata": {
140 | "anaconda-cloud": {},
141 | "colab": {
142 | "collapsed_sections": [],
143 | "name": "t81_558_class_11_05_embedding.ipynb",
144 | "provenance": []
145 | },
146 | "kernelspec": {
147 | "display_name": "Python 3.11 (torch)",
148 | "language": "python",
149 | "name": "pytorch"
150 | },
151 | "language_info": {
152 | "codemirror_mode": {
153 | "name": "ipython",
154 | "version": 3
155 | },
156 | "file_extension": ".py",
157 | "mimetype": "text/x-python",
158 | "name": "python",
159 | "nbconvert_exporter": "python",
160 | "pygments_lexer": "ipython3",
161 | "version": "3.9.18"
162 | },
163 | "varInspector": {
164 | "cols": {
165 | "lenName": 16,
166 | "lenType": 16,
167 | "lenVar": 40
168 | },
169 | "kernels_config": {
170 | "python": {
171 | "delete_cmd_postfix": "",
172 | "delete_cmd_prefix": "del ",
173 | "library": "var_list.py",
174 | "varRefreshCmd": "print(var_dic_list())"
175 | },
176 | "r": {
177 | "delete_cmd_postfix": ") ",
178 | "delete_cmd_prefix": "rm(",
179 | "library": "var_list.r",
180 | "varRefreshCmd": "cat(var_dic_list()) "
181 | }
182 | },
183 | "types_to_exclude": [
184 | "module",
185 | "function",
186 | "builtin_function_or_method",
187 | "instance",
188 | "_Feature"
189 | ],
190 | "window_display": false
191 | }
192 | },
193 | "nbformat": 4,
194 | "nbformat_minor": 1
195 | }
196 |
--------------------------------------------------------------------------------
/t81_558_class_07_1_img_generative.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "ZggjUZ5oPvzH"
7 | },
8 | "source": [
9 | "
"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {
15 | "id": "YDTXd8-Lmp8Q"
16 | },
17 | "source": [
18 | "# T81-558: Applications of Deep Neural Networks\n",
19 | "\n",
20 | "**Module 7: Image Generative Models**\n",
21 | "\n",
22 | "- Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), McKelvey School of Engineering, [Washington University in St. Louis](https://engineering.wustl.edu/Programs/Pages/default.aspx)\n",
23 | "- For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/).\n"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {
29 | "id": "ncNrAEpzmp8S"
30 | },
31 | "source": [
32 | "# Module 7 Material\n",
33 | "\n",
34 | "- **Part 7.1 Introduction to Generative AI** [[Video]](https://www.youtube.com/watch?v=2FbkbSnS8sg&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_07_1_img_generative.ipynb)\n",
35 | "- Part 7.2 Generating Faces with StyleGAN3 [[Video]](https://www.youtube.com/watch?v=VcI2o1yEQa0&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_07_2_gan_intro.ipynb)\n",
36 | "- Part 7.3 GANS to Enhance Old Photographs Deoldify [[Video]](https://www.youtube.com/watch?v=y7HvjfKsZ50&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_07_3_deoldify.ipynb)\n",
37 | "- Part 7.4 Text to Images with StableDiffusion [[Video]](https://www.youtube.com/watch?v=gLj6-gJ-lR4&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_07_4_stable_diff.ipynb)\n",
38 | "- Part 7.5 Finetuning with Dreambooth [[Video]](https://www.youtube.com/watch?v=G_FYFSzkB5Y&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_07_5_dream_booth.ipynb)"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {
44 | "id": "HKQqQljyPvzK"
45 | },
46 | "source": [
47 | "# Google CoLab Instructions\n",
48 | "\n",
49 | "The following code ensures that Google CoLab is running the correct version of TensorFlow.\n"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 20,
55 | "metadata": {
56 | "colab": {
57 | "base_uri": "https://localhost:8080/"
58 | },
59 | "id": "fU9UhAxTmp8S",
60 | "outputId": "2ac151e9-fbbe-4325-e166-380063f57018"
61 | },
62 | "outputs": [
63 | {
64 | "name": "stdout",
65 | "output_type": "stream",
66 | "text": [
67 | "Note: using Google CoLab\n"
68 | ]
69 | }
70 | ],
71 | "source": [
72 | "try:\n",
73 | " import google.colab\n",
74 | " COLAB = True\n",
75 | " print(\"Note: using Google CoLab\")\n",
76 | "except:\n",
77 | " print(\"Note: not using Google CoLab\")\n",
78 | " COLAB = False"
79 | ]
80 | },
81 | {
82 | "cell_type": "markdown",
83 | "metadata": {
84 | "id": "i-qb-mcqmp8U"
85 | },
86 | "source": [
87 | "# Part 7.1: Introduction to Generative AI\n",
88 | "\n",
89 | "Generative AI is a fascinating and rapidly evolving subfield of artificial intelligence that focuses on creating new content, from art, music, and stories to more technical applications such as generating code or predicting complex systems. This is in contrast to other types of AI that are more concerned with analyzing and interpreting existing data, such as classifying images or predicting stock prices. While traditional AI systems are built to respond with the most accurate or optimal answer from a set of possibilities, generative AI systems are designed to create new possibilities, generating new data that is similar in some way to the data they were trained on.\n",
90 | "\n",
91 | "For example, a generative AI system might be trained on a large dataset of classical music and then be able to create new compositions in the style of Bach or Beethoven. Another application could be training the model on a vast amount of text data to create a chatbot that can generate human-like responses to questions or prompts, as we've seen with OpenAI's GPT models. Other examples include generating realistic images, creating virtual environments, or even designing molecules for new drugs.\n",
92 | "\n",
93 | "This remarkable capability of generative AI opens up a world of possibilities and applications that are only beginning to be explored. In this section, we will delve deeper into the workings of generative AI, examining the technologies that power it, such as Generative Adversarial Networks (GANs) and Large Language Models (LLMs), and explore some of the most exciting applications and future directions of this innovative field.\n",
94 | "\n",
95 | "## The Beginnings of Generative AI\n",
96 | "\n",
97 | "The history of generative AI can be traced back to the early days of artificial intelligence itself. In the 1950s and 60s, researchers were already experimenting with algorithms that could generate new content, such as the ELIZA chatbot developed at MIT in the 1966, which could generate responses to user input in a manner that mimicked a psychotherapist. However, the true potential of generative AI remained largely untapped until the advent of more powerful computing hardware and larger datasets in the 21st century. While ELIZA seemed advanced for the time; its conversations lacked any significant depth:\n",
98 | "\n",
99 | "```\n",
100 | "> Hello, I am Eliza. \n",
101 | "* I am afraid I will never learn Python.\n",
102 | "> Do you believe it is normal to be afraid you will never learn Python? \n",
103 | "* Yes\n",
104 | "> I see.\n",
105 | "```\n",
106 | "\n",
107 | "In 2006, Geoffrey Hinton and his colleagues at the University of Toronto developed a type of neural network called a Deep Belief Network (DBN), which could be trained to generate new data that was similar to the data it was trained on. This marked a key milestone in the development of generative AI, as it demonstrated that neural networks could be used to not only classify and analyze data, but also to create new data. Initially, this DBN was trained to recognize MNIST digits, such as seen in Figure 7.MNIST.\n",
108 | "\n",
109 | "**Figure 7.MNIST: Sample of the MNIST Digits**\n",
110 | "\n",
111 | "\n",
112 | "\n",
113 | "The next major breakthrough came in 2014, with the introduction of Generative Adversarial Networks (GANs) by Ian Goodfellow and his colleagues. GANs consist of two neural networks, a generator and a discriminator, which are trained simultaneously through a kind of game. The generator tries to create data that is indistinguishable from real data, while the discriminator tries to tell if the data is real or fake. This adversarial training process results in the generator becoming increasingly adept at creating realistic data, and has been used to generate everything from realistic images of faces that do not exist to new video game levels. \n",
114 | "\n",
115 | "Figure 7.GAN-MNIST shows a GAN that generates new digits based on MNIST digits that it was trained on. \n",
116 | "\n",
117 | "**Figure 7.GAN-MNIST: Generated Digits**\n",
118 | "\n",
119 | "\n",
120 | "\n",
121 | "The same early technique was applied to create entirely new faces, as demonstrated by Figure 7.\n",
122 | "\n",
123 | "**Figure 7.GAN-FACE: Generated Faces**\n",
124 | "\n",
125 | "\n",
126 | "\n",
127 | "NVIDIA took GAN generated images to an entirely new level with StyleGAN, which could generate photo-realistic faces as seen in figure 7.GAN-STYLE.\n",
128 | "\n",
129 | "**Figure 7.GAN-STYLE: Generated Faces**\n",
130 | "\n",
131 | "\n",
132 | "\n",
133 | "Stable Diffusion is a recent development in the field of generative AI that has garnered significant attention. Traditional methods like GANs and VAEs have their drawbacks, such as mode collapse in GANs. Stable Diffusion, on the other hand, uses a different approach by adopting a diffusion-based probabilistic model. This approach involves transforming the data in a way that spreads or 'diffuses' it out, and then running the process in reverse to generate new data. The model is trained by predicting the next diffusion step in the reverse process from a given state, which allows it to learn a detailed and high-quality representation of the data. This results in the generation of more realistic and detailed images compared to other methods. Moreover, Stable Diffusion models have been found to be more stable and easier to train compared to GANs, making them a promising alternative for generating high-quality content in various applications.\n",
134 | "\n",
135 | "With stable diffusion you can use prompts to render the image, giving you great control. Figure 7.GAN-STYLE shows the same woman in multiple poses and settings.\n",
136 | "\n",
137 | "**Figure 7.GAN-STYLE: Same Person in Stable Diffusion**\n",
138 | "\n",
139 | "\n",
140 | "\n",
141 | "\n",
142 | "\n",
143 | "In recent years, the development of large language models (LLMs) such as OpenAI's GPT (Generative Pre-trained Transformer) series, has brought generative AI to the forefront of public attention. These models are trained on vast amounts of text data and can generate human-like text on a wide variety of topics. This has led to a plethora of applications, from chatbots and virtual assistants to creative writing and content generation.\n",
144 | "\n",
145 | "The development of generative AI is still ongoing, and there are many challenges to be addressed and exciting avenues to explore. However, the progress that has been made in recent years is nothing short of astounding, and has opened up a world of possibilities that were once the stuff of science fiction.\n",
146 | "\n"
147 | ]
148 | },
149 | {
150 | "cell_type": "markdown",
151 | "metadata": {
152 | "id": "0eBtaFbimp-M"
153 | },
154 | "source": [
155 | "# Module 7 Assignment\n",
156 | "\n",
157 | "You can find the seventh assignment here: [assignment 7](https://github.com/jeffheaton/app_deep_learning/blob/main/assignments/assignment_yourname_t81_558_class7.ipynb)\n"
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": null,
163 | "metadata": {},
164 | "outputs": [],
165 | "source": []
166 | }
167 | ],
168 | "metadata": {
169 | "anaconda-cloud": {},
170 | "colab": {
171 | "collapsed_sections": [],
172 | "name": "t81_558_class_06_1_python_images.ipynb",
173 | "provenance": []
174 | },
175 | "kernelspec": {
176 | "display_name": "Python 3.11 (torch)",
177 | "language": "python",
178 | "name": "pytorch"
179 | },
180 | "language_info": {
181 | "codemirror_mode": {
182 | "name": "ipython",
183 | "version": 3
184 | },
185 | "file_extension": ".py",
186 | "mimetype": "text/x-python",
187 | "name": "python",
188 | "nbconvert_exporter": "python",
189 | "pygments_lexer": "ipython3",
190 | "version": "3.11.9"
191 | }
192 | },
193 | "nbformat": 4,
194 | "nbformat_minor": 4
195 | }
196 |
--------------------------------------------------------------------------------
/t81_558_class_07_5_dream_booth.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "ZggjUZ5oPvzH"
7 | },
8 | "source": [
9 | "
"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {
15 | "id": "YDTXd8-Lmp8Q"
16 | },
17 | "source": [
18 | "# T81-558: Applications of Deep Neural Networks\n",
19 | "\n",
20 | "**Module 7: Image Generative Models**\n",
21 | "\n",
22 | "- Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), McKelvey School of Engineering, [Washington University in St. Louis](https://engineering.wustl.edu/Programs/Pages/default.aspx)\n",
23 | "- For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/).\n"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {
29 | "id": "ncNrAEpzmp8S"
30 | },
31 | "source": [
32 | "# Module 7 Material\n",
33 | "\n",
34 | "- Part 7.1 Introduction to Generative AI [[Video]](https://www.youtube.com/watch?v=2FbkbSnS8sg&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_07_1_img_generative.ipynb)\n",
35 | "- Part 7.2 Generating Faces with StyleGAN3 [[Video]](https://www.youtube.com/watch?v=VcI2o1yEQa0&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_07_2_gan_intro.ipynb)\n",
36 | "- Part 7.3 GANS to Enhance Old Photographs Deoldify [[Video]](https://www.youtube.com/watch?v=y7HvjfKsZ50&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_07_3_deoldify.ipynb)\n",
37 | "- Part 7.4 Text to Images with StableDiffusion [[Video]](https://www.youtube.com/watch?v=gLj6-gJ-lR4&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_07_4_stable_diff.ipynb)\n",
38 | "- **Part 7.5 Finetuning with Dreambooth** [[Video]](https://www.youtube.com/watch?v=G_FYFSzkB5Y&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_07_5_dream_booth.ipynb)"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {
44 | "id": "HKQqQljyPvzK"
45 | },
46 | "source": [
47 | "# Google CoLab Instructions\n",
48 | "\n",
49 | "The following code ensures that Google CoLab is running the correct version of TensorFlow.\n"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 20,
55 | "metadata": {
56 | "colab": {
57 | "base_uri": "https://localhost:8080/"
58 | },
59 | "id": "fU9UhAxTmp8S",
60 | "outputId": "2ac151e9-fbbe-4325-e166-380063f57018"
61 | },
62 | "outputs": [
63 | {
64 | "name": "stdout",
65 | "output_type": "stream",
66 | "text": [
67 | "Note: using Google CoLab\n"
68 | ]
69 | }
70 | ],
71 | "source": [
72 | "try:\n",
73 | " import google.colab\n",
74 | " COLAB = True\n",
75 | " print(\"Note: using Google CoLab\")\n",
76 | "except:\n",
77 | " print(\"Note: not using Google CoLab\")\n",
78 | " COLAB = False"
79 | ]
80 | },
81 | {
82 | "cell_type": "markdown",
83 | "metadata": {
84 | "id": "i-qb-mcqmp8U"
85 | },
86 | "source": [
87 | "# Part 7.5 Finetuning with Dreambooth\n",
88 | "\n",
89 | "It is common to finetune generative models. In this section we will finetune a generative model to include an additional object that the mode is familiar with. One application is to insert your own face into the model, which allows you to be rendered as a cartoon or in a variety of ways. Figure 7.JEFF shows myself rendered as a Star Trek character.\n",
90 | "\n",
91 | "**Figure 7.JEFF: Jeff as a Star Trek Character**\n",
92 | "\n",
93 | "\n",
94 | "\n",
95 | "The first step is to collect your data. You should have between 10-20 different images of your subject at different angles.\n",
96 | "\n",
97 | "**Figure 7.JEFFS: Multiple Pictures of Jeff at Different Angles**\n",
98 | "\n",
99 | "\n",
100 | "\n",
101 | "To finetine a model, you should use the following CoLab notebook. This allows Dream Booth to be run from CoLab. Dream Booth is a program commonly used to finetune Stable Diffusion models.\n",
102 | "\n",
103 | "* [Finetuning with Dreambooth](https://colab.research.google.com/github/ShivamShrirao/diffusers/blob/main/examples/dreambooth/DreamBooth_Stable_Diffusion.ipynb)\n",
104 | "\n",
105 | "There are also commercial services that sell time to create Stable Diffusion models, these can be very economical compared to obtaining and configuring your own hardware.\n",
106 | "\n",
107 | "* [Diffusion Hub](https://diffusionhub.firstpromoter.com/)\n",
108 | "\n",
109 | "\n",
110 | "\n",
111 | "\n"
112 | ]
113 | }
114 | ],
115 | "metadata": {
116 | "anaconda-cloud": {},
117 | "colab": {
118 | "collapsed_sections": [],
119 | "name": "t81_558_class_06_1_python_images.ipynb",
120 | "provenance": []
121 | },
122 | "kernelspec": {
123 | "display_name": "Python 3.9 (torch)",
124 | "language": "python",
125 | "name": "pytorch"
126 | },
127 | "language_info": {
128 | "codemirror_mode": {
129 | "name": "ipython",
130 | "version": 3
131 | },
132 | "file_extension": ".py",
133 | "mimetype": "text/x-python",
134 | "name": "python",
135 | "nbconvert_exporter": "python",
136 | "pygments_lexer": "ipython3",
137 | "version": "3.9.16"
138 | }
139 | },
140 | "nbformat": 4,
141 | "nbformat_minor": 1
142 | }
143 |
--------------------------------------------------------------------------------
/t81_558_class_08_1_kaggle_intro.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "
"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# T81-558: Applications of Deep Neural Networks\n",
15 | "**Module 8: Kaggle Data Sets**\n",
16 | "* Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), McKelvey School of Engineering, [Washington University in St. Louis](https://engineering.wustl.edu/Programs/Pages/default.aspx)\n",
17 | "* For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/)."
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "# Module 8 Material\n",
25 | "\n",
26 | "* **Part 8.1: Introduction to Kaggle** [[Video]](https://www.youtube.com/watch?v=7Mk46fb0Ayg&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_08_1_kaggle_intro.ipynb)\n",
27 | "* Part 8.2: Building Ensembles with Scikit-Learn and PyTorch [[Video]](https://www.youtube.com/watch?v=przbLRCRL24&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_08_2_pytorch_ensembles.ipynb)\n",
28 | "* Part 8.3: How Should you Architect Your PyTorch Neural Network: Hyperparameters [[Video]](https://www.youtube.com/watch?v=YTL2BR4U2Ng&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_08_3_pytorch_hyperparameters.ipynb)\n",
29 | "* Part 8.4: Bayesian Hyperparameter Optimization for PyTorch [[Video]](https://www.youtube.com/watch?v=1f4psgAcefU&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_08_4_bayesian_hyperparameter_opt.ipynb)\n",
30 | "* Part 8.5: Current Semester's Kaggle [[Video]] [[Notebook]](t81_558_class_08_5_kaggle_project.ipynb)"
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {},
36 | "source": [
37 | "# Part 8.1: Introduction to Kaggle\n",
38 | "\n",
39 | "[Kaggle](http://www.kaggle.com) runs competitions where data scientists compete to provide the best model to fit the data. A simple project to get started with Kaggle is the [Titanic data set](https://www.kaggle.com/c/titanic-gettingStarted). Most Kaggle competitions end on a specific date. Website organizers have scheduled the Titanic competition to end on December 31, 20xx (with the year usually rolling forward). However, they have already extended the deadline several times, and an extension beyond 2014 is also possible. Second, the Titanic data set is considered a tutorial data set. There is no prize, and your score in the competition does not count towards becoming a Kaggle Master. \n",
40 | "\n",
41 | "## Kaggle Ranks\n",
42 | "\n",
43 | "You achieve Kaggle ranks by earning gold, silver, and bronze medals.\n",
44 | "\n",
45 | "* [Kaggle Top Users](https://www.kaggle.com/rankings)\n",
46 | "* [Current Top Kaggle User's Profile Page](https://www.kaggle.com/stasg7)\n",
47 | "* [Jeff Heaton's (your instructor) Kaggle Profile](https://www.kaggle.com/jeffheaton)\n",
48 | "* [Current Kaggle Ranking System](https://www.kaggle.com/progression)\n",
49 | "\n",
50 | "## Typical Kaggle Competition\n",
51 | "\n",
52 | "A typical Kaggle competition will have several components. Consider the Titanic tutorial:\n",
53 | "\n",
54 | "* [Competition Summary Page](https://www.kaggle.com/c/titanic)\n",
55 | "* [Data Page](https://www.kaggle.com/c/titanic/data)\n",
56 | "* [Evaluation Description Page](https://www.kaggle.com/c/titanic/details/evaluation)\n",
57 | "* [Leaderboard](https://www.kaggle.com/c/titanic/leaderboard)\n",
58 | "\n",
59 | "## How Kaggle Competition Scoring\n",
60 | "\n",
61 | "Kaggle is provided with a data set by the competition sponsor, as seen in Figure 8.SCORE. Kaggle divides this data set as follows:\n",
62 | "\n",
63 | "* **Complete Data Set** - This is the complete data set.\n",
64 | " * **Training Data Set** - This dataset provides both the inputs and the outcomes for the training portion of the data set.\n",
65 | " * **Test Data Set** - This dataset provides the complete test data; however, it does not give the outcomes. Your submission file should contain the predicted results for this data set.\n",
66 | " * **Public Leaderboard** - Kaggle does not tell you what part of the test data set contributes to the public leaderboard. Your public score is calculated based on this part of the data set.\n",
67 | " * **Private Leaderboard** - Likewise, Kaggle does not tell you what part of the test data set contributes to the public leaderboard. Your final score/rank is calculated based on this part. You do not see your private leaderboard score until the end.\n",
68 | "\n",
69 | "**Figure 8.SCORE: How Kaggle Competition Scoring**\n",
70 | "\n",
71 | "\n",
72 | "## Preparing a Kaggle Submission\n",
73 | "\n",
74 | "You do not submit the code to your solution to Kaggle. For competitions, you are scored entirely on the accuracy of your submission file. A Kaggle submission file is always a CSV file that contains the **Id** of the row you are predicting and the answer. For the titanic competition, a submission file looks something like this:\n",
75 | "\n",
76 | "```\n",
77 | "PassengerId,Survived\n",
78 | "892,0\n",
79 | "893,1\n",
80 | "894,1\n",
81 | "895,0\n",
82 | "896,0\n",
83 | "897,1\n",
84 | "...\n",
85 | "```\n",
86 | "\n",
87 | "The above file states the prediction for each of the various passengers. You should only predict on ID's that are in the test file. Likewise, you should render a prediction for every row in the test file. Some competitions will have different formats for their answers. For example, a multi-classification will usually have a column for each class and your predictions for each class.\n",
88 | "\n",
89 | "## Select Kaggle Competitions\n",
90 | "\n",
91 | "There have been many exciting competitions on Kaggle; these are some of my favorites. Some select predictive modeling competitions which use tabular data include:\n",
92 | "\n",
93 | "* [Otto Group Product Classification Challenge](https://www.kaggle.com/c/otto-group-product-classification-challenge)\n",
94 | "* [Galaxy Zoo - The Galaxy Challenge](https://www.kaggle.com/c/galaxy-zoo-the-galaxy-challenge)\n",
95 | "* [Practice Fusion Diabetes Classification](https://www.kaggle.com/c/pf2012-diabetes)\n",
96 | "* [Predicting a Biological Response](https://www.kaggle.com/c/bioresponse)\n",
97 | "\n",
98 | "Many Kaggle competitions include computer vision datasets, such as:\n",
99 | "\n",
100 | "* [Diabetic Retinopathy Detection](https://www.kaggle.com/c/diabetic-retinopathy-detection)\n",
101 | "* [Cats vs Dogs](https://www.kaggle.com/c/dogs-vs-cats)\n",
102 | "* [State Farm Distracted Driver Detection](https://www.kaggle.com/c/state-farm-distracted-driver-detection)\n"
103 | ]
104 | },
105 | {
106 | "cell_type": "markdown",
107 | "metadata": {},
108 | "source": [
109 | "# Module 8 Assignment\n",
110 | "\n",
111 | "You can find the first assignment here: [assignment 8](https://github.com/jeffheaton/app_deep_learning/blob/master/assignments/assignment_yourname_t81_558_class8.ipynb)"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": null,
117 | "metadata": {},
118 | "outputs": [],
119 | "source": []
120 | }
121 | ],
122 | "metadata": {
123 | "anaconda-cloud": {},
124 | "kernelspec": {
125 | "display_name": "Python 3.11 (torch)",
126 | "language": "python",
127 | "name": "pytorch"
128 | },
129 | "language_info": {
130 | "codemirror_mode": {
131 | "name": "ipython",
132 | "version": 3
133 | },
134 | "file_extension": ".py",
135 | "mimetype": "text/x-python",
136 | "name": "python",
137 | "nbconvert_exporter": "python",
138 | "pygments_lexer": "ipython3",
139 | "version": "3.11.9"
140 | }
141 | },
142 | "nbformat": 4,
143 | "nbformat_minor": 4
144 | }
145 |
--------------------------------------------------------------------------------
/t81_558_class_08_3_pytorch_hyperparameters.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "
"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# T81-558: Applications of Deep Neural Networks\n",
15 | "**Module 8: Kaggle Data Sets**\n",
16 | "* Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), McKelvey School of Engineering, [Washington University in St. Louis](https://engineering.wustl.edu/Programs/Pages/default.aspx)\n",
17 | "* For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/)."
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "# Module 8 Material\n",
25 | "\n",
26 | "* Part 8.1: Introduction to Kaggle [[Video]](https://www.youtube.com/watch?v=7Mk46fb0Ayg&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_08_1_kaggle_intro.ipynb)\n",
27 | "* Part 8.2: Building Ensembles with Scikit-Learn and PyTorch [[Video]](https://www.youtube.com/watch?v=przbLRCRL24&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_08_2_pytorch_ensembles.ipynb)\n",
28 | "* **Part 8.3: How Should you Architect Your PyTorch Neural Network: Hyperparameters** [[Video]](https://www.youtube.com/watch?v=YTL2BR4U2Ng&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_08_3_pytorch_hyperparameters.ipynb)\n",
29 | "* Part 8.4: Bayesian Hyperparameter Optimization for PyTorch [[Video]](https://www.youtube.com/watch?v=1f4psgAcefU&list=PLjy4p-07OYzulelvJ5KVaT2pDlxivl_BN) [[Notebook]](t81_558_class_08_4_bayesian_hyperparameter_opt.ipynb)\n",
30 | "* Part 8.5: Current Semester's Kaggle [[Video]] [[Notebook]](t81_558_class_08_5_kaggle_project.ipynb)"
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {},
36 | "source": [
37 | "# Google CoLab Instructions\n",
38 | "\n",
39 | "The following code ensures that Google CoLab is running the correct version of TensorFlow."
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 1,
45 | "metadata": {},
46 | "outputs": [
47 | {
48 | "name": "stdout",
49 | "output_type": "stream",
50 | "text": [
51 | "Note: not using Google CoLab\n"
52 | ]
53 | }
54 | ],
55 | "source": [
56 | "# Startup CoLab\n",
57 | "try:\n",
58 | " import google.colab\n",
59 | " COLAB = True\n",
60 | " print(\"Note: using Google CoLab\")\n",
61 | "except:\n",
62 | " print(\"Note: not using Google CoLab\")\n",
63 | " COLAB = False\n",
64 | "\n",
65 | "\n",
66 | "# Nicely formatted time string\n",
67 | "def hms_string(sec_elapsed):\n",
68 | " h = int(sec_elapsed / (60 * 60))\n",
69 | " m = int((sec_elapsed % (60 * 60)) / 60)\n",
70 | " s = sec_elapsed % 60\n",
71 | " return \"{}:{:>02}:{:>05.2f}\".format(h, m, s)"
72 | ]
73 | },
74 | {
75 | "cell_type": "markdown",
76 | "metadata": {},
77 | "source": [
78 | "# Part 8.3: Architecting Network: Hyperparameters\n",
79 | "\n",
80 | "You have probably noticed several hyperparameters introduced previously in this course that you need to choose for your neural network. The number of layers, neuron counts per layer, layer types, and activation functions are all choices you must make to optimize your neural network. Some of the categories of hyperparameters for you to choose from coming from the following list:\n",
81 | "\n",
82 | "* Number of Hidden Layers and Neuron Counts\n",
83 | "* Activation Functions\n",
84 | "* Advanced Activation Functions\n",
85 | "* Regularization: L1, L2, Dropout\n",
86 | "* Batch Normalization\n",
87 | "* Training Parameters\n",
88 | "\n",
89 | "The following sections will introduce each of these categories for PyTorch. While I will provide some general guidelines for hyperparameter selection, no two tasks are the same. You will benefit from experimentation with these values to determine what works best for your neural network. In the next part, we will see how machine learning can select some of these values independently.\n",
90 | "\n",
91 | "## Number of Hidden Layers and Neuron Counts\n",
92 | "\n",
93 | "The structure of PyTorch layers is perhaps the hyperparameters that most become aware of first. How many layers should you have? How many neurons are on each layer? What activation function and layer type should you use? These are all questions that come up when designing a neural network. There are many different [types of layer](https://pytorch.org/docs/stable/nn.html) in PyTorch, listed here:\n",
94 | "\n",
95 | "* **Activation** - PyTorch allows you to add activation functions using torch.nn modules. Instead of an activation layer, you typically specify the activation function directly after a Linear (or other) layer type.\n",
96 | "* **Regularization** For L1/L2 regularization in PyTorch, you generally don't use a separate layer. Instead, you can add weight decay when setting up an optimizer like SGD or Adam. This works as an L2 regularization. For L1, you might need to implement it manually.\n",
97 | "* **Linear** - The original neural network layer type. In this layer type, every neuron connects to the next layer. The input vector is one-dimensional, and placing specific inputs next does not affect each other. \n",
98 | "* **Dropout** - It operates by randomly setting a fraction of input units to 0 at each forward pass, which helps in preventing overfitting. In PyTorch, Dropout is applied during training only by default.\n",
99 | "* **Flatten** - Flattens the input to 1D and does not affect the batch size.\n",
100 | "* **Permute** - PyTorch tensors have a permute method that can be used to rearrange the dimensions of a tensor, which is useful when working with different types of layers that expect certain input shapes and for tasks such as connecting RNNs and convolutional networks.\n",
101 | "* **RepeatVector** - Repeats the input n times.\n",
102 | " \n",
103 | "There is always trial and error for choosing a good number of neurons and hidden layers. Generally, the number of neurons on each layer will be larger closer to the hidden layer and smaller towards the output layer. This configuration gives the neural network a somewhat triangular or trapezoid appearance.\n",
104 | "\n",
105 | "## Activation Functions\n",
106 | "\n",
107 | "Activation functions are a choice that you must make for each layer. Generally, you can follow this guideline:\n",
108 | "* Hidden Layers - RELU\n",
109 | "* Output Layer - Softmax for classification, linear for regression.\n",
110 | "\n",
111 | "Some of the common activation functions in PyTorch are listed here:\n",
112 | "\n",
113 | "* **softmax** - Used for multi-class classification. Ensures all output neurons behave as probabilities and sum to 1.0.\n",
114 | "* **elu** - Exponential linear unit. Exponential Linear Unit or its widely known name ELU is a function that tends to converge cost to zero faster and produce more accurate results. Can produce negative outputs.\n",
115 | "* **selu** - Scaled Exponential Linear Unit (SELU), essentially **elu** multiplied by a scaling constant.\n",
116 | "* **softplus** - Softplus activation function. $log(exp(x) + 1)$ [Introduced](https://papers.nips.cc/paper/1920-incorporating-second-order-functional-knowledge-for-better-option-pricing.pdf) in 2001.\n",
117 | "* **softsign** Softsign activation function. $x / (abs(x) + 1)$ Similar to tanh, but not widely used.\n",
118 | "* **relu** - Very popular neural network activation function. Used for hidden layers, cannot output negative values. No trainable parameters.\n",
119 | "* **tanh** Classic neural network activation function, though often replaced by relu family on modern networks.\n",
120 | "* **sigmoid** - Classic neural network activation. Often used on output layer of a binary classifier.\n",
121 | "* **hard_sigmoid** - Less computationally expensive variant of sigmoid.\n",
122 | "* **exp** - Exponential (base e) activation function.\n",
123 | "\n",
124 | "For more information about PyTorch activation functions refer to the following:\n",
125 | "\n",
126 | "* [PyTorch Activation Functions](https://pytorch.org/docs/stable/nn.html)\n",
127 | "* [Activation Function Cheat Sheets](https://pytorch.org/docs/stable/nn.html)\n",
128 | "\n",
129 | "\n",
130 | "## Batch Normalization and Dropout\n",
131 | "\n",
132 | "* [PyTorch Dropout](https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html)\n",
133 | "* [PyTorch Batch Normalization](https://pytorch.org/docs/stable/generated/torch.nn.functional.batch_norm.html)\n",
134 | "\n",
135 | "* Ioffe, S., & Szegedy, C. (2015). [Batch normalization: Accelerating deep network training by reducing internal covariate shift](https://arxiv.org/abs/1502.03167). *arXiv preprint arXiv:1502.03167*.\n",
136 | "\n",
137 | "Normalize the activations of the previous layer at each batch, i.e. applies a transformation that maintains the mean activation close to 0 and the activation standard deviation close to 1. Can allow learning rate to be larger.\n",
138 | "\n",
139 | "\n",
140 | "## Training Parameters\n",
141 | "\n",
142 | "* [PyTorch Optimizers](https://pytorch.org/docs/stable/optim.html)\n",
143 | "\n",
144 | "* **Batch Size** - Usually small, such as 32 or so.\n",
145 | "* **Learning Rate** - Usually small, 1e-3 or so.\n",
146 | "\n"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": null,
152 | "metadata": {},
153 | "outputs": [],
154 | "source": []
155 | }
156 | ],
157 | "metadata": {
158 | "anaconda-cloud": {},
159 | "kernelspec": {
160 | "display_name": "Python 3.9 (torch)",
161 | "language": "python",
162 | "name": "pytorch"
163 | },
164 | "language_info": {
165 | "codemirror_mode": {
166 | "name": "ipython",
167 | "version": 3
168 | },
169 | "file_extension": ".py",
170 | "mimetype": "text/x-python",
171 | "name": "python",
172 | "nbconvert_exporter": "python",
173 | "pygments_lexer": "ipython3",
174 | "version": "3.9.16"
175 | }
176 | },
177 | "nbformat": 4,
178 | "nbformat_minor": 4
179 | }
180 |
--------------------------------------------------------------------------------
/t81_558_class_11_3_tokenizers.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "3CF2edFAI4Uj"
7 | },
8 | "source": [
9 | "
\n"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {
15 | "id": "RvbM-RwHI4Ul"
16 | },
17 | "source": [
18 | "# T81-558: Applications of Deep Neural Networks\n",
19 | "**Module 11: Natural Language Processing with Hugging Face**\n",
20 | "* Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), McKelvey School of Engineering, [Washington University in St. Louis](https://engineering.wustl.edu/Programs/Pages/default.aspx)\n",
21 | "* For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/)."
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {
27 | "id": "5dE7A-0aI4Ul"
28 | },
29 | "source": [
30 | "# Module 11 Material\n",
31 | "\n",
32 | "* Part 11.1: Introduction to Hugging Face [[Video]](https://www.youtube.com/watch?v=PzuL84ksRuE&list=PLjy4p-07OYzuy_lHcRW8lPTLPTTOmUpmi) [[Notebook]](t81_558_class_11_1_hf.ipynb)\n",
33 | "* Part 11.2: Hugging Face in Python [[Video]](https://www.youtube.com/watch?v=tkGIF4CFoV4&list=PLjy4p-07OYzuy_lHcRW8lPTLPTTOmUpmi) [[Notebook]](t81_558_class_11_2_py_huggingface.ipynb)\n",
34 | "* **Part 11.3: Hugging Face Tokenizers** [[Video]](https://www.youtube.com/watch?v=Cz2nvfK28eI&list=PLjy4p-07OYzuy_lHcRW8lPTLPTTOmUpmi) [[Notebook]](t81_558_class_11_3_tokenizers.ipynb)\n",
35 | "* Part 11.4: Hugging Face Datasets [[Video]](https://www.youtube.com/watch?v=yLlCZLzE2XU&list=PLjy4p-07OYzuy_lHcRW8lPTLPTTOmUpmi) [[Notebook]](t81_558_class_11_4_hf_datasets.ipynb)\n",
36 | "* Part 11.5: Training Hugging Face Models [[Video]](https://www.youtube.com/watch?v=7YZOik5S3vs&list=PLjy4p-07OYzuy_lHcRW8lPTLPTTOmUpmi) [[Notebook]](t81_558_class_11_5_hf_train.ipynb)\n"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {
42 | "id": "9Z4A091yI4Um"
43 | },
44 | "source": [
45 | "# Google CoLab Instructions\n",
46 | "\n",
47 | "The following code checks if Google CoLab is running."
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": null,
53 | "metadata": {
54 | "colab": {
55 | "base_uri": "https://localhost:8080/"
56 | },
57 | "id": "RJmzbge9I4Um",
58 | "outputId": "4685a76c-cc37-490a-d589-db16eb6e9c4c"
59 | },
60 | "outputs": [],
61 | "source": [
62 | "try:\n",
63 | " from google.colab import drive\n",
64 | " COLAB = True\n",
65 | " print(\"Note: using Google CoLab\")\n",
66 | "except:\n",
67 | " print(\"Note: not using Google CoLab\")\n",
68 | " COLAB = False"
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {
74 | "id": "vdNN6e45I4Un"
75 | },
76 | "source": [
77 | "# Part 11.3: Hugging Face Tokenizers\n",
78 | "\n",
79 | "Tokenization is the task of chopping it up into pieces, called tokens, perhaps at the same time throwing away certain characters, such as punctuation. Consider how the program might break up the following sentences into words.\n",
80 | "\n",
81 | "* This is a test.\n",
82 | "* Ok, but what about this?\n",
83 | "* Is U.S.A. the same as USA.?\n",
84 | "* What is the best data-set to use?\n",
85 | "* I think I will do this-no wait; I will do that.\n",
86 | "\n",
87 | "The hugging face includes tokenizers that can break these sentences into words and subwords. Because English, and some other languages, are made up of common word parts, we tokenize subwords. For example, a gerund word, such as \"sleeping,\" will be tokenized into \"sleep\" and \"##ing\".\n",
88 | "\n",
89 | "We begin by installing Hugging Face if needed.\n"
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": null,
95 | "metadata": {
96 | "colab": {
97 | "base_uri": "https://localhost:8080/"
98 | },
99 | "id": "3jGET2abMjcl",
100 | "outputId": "35b8be0f-bc7e-42d0-f177-ee097d4a076c"
101 | },
102 | "outputs": [],
103 | "source": [
104 | "# HIDE OUTPUT\n",
105 | "!pip install transformers\n",
106 | "!pip install transformers[sentencepiece]"
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "metadata": {
112 | "id": "Wa1ncodn8y0r"
113 | },
114 | "source": [
115 | "First, we create a Hugging Face tokenizer. There are several different tokenizers available from the Hugging Face hub. For this example, we will make use of the following tokenizer:\n",
116 | "\n",
117 | "* distilbert-base-uncased\n",
118 | "\n",
119 | "This tokenizer is based on BERT and assumes case-insensitive English text."
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": null,
125 | "metadata": {
126 | "id": "kSGtW0E7xcK9"
127 | },
128 | "outputs": [],
129 | "source": [
130 | "from transformers import AutoTokenizer\n",
131 | "model = \"distilbert-base-uncased\"\n",
132 | "tokenizer = AutoTokenizer.from_pretrained(model)\n"
133 | ]
134 | },
135 | {
136 | "cell_type": "markdown",
137 | "metadata": {
138 | "id": "YofjyJw59U2x"
139 | },
140 | "source": [
141 | "We can now tokenize a sample sentence."
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": null,
147 | "metadata": {
148 | "colab": {
149 | "base_uri": "https://localhost:8080/"
150 | },
151 | "id": "NVCmyao2zLQ3",
152 | "outputId": "6b33031a-8667-4431-aae2-26595195d69d"
153 | },
154 | "outputs": [],
155 | "source": [
156 | "encoded = tokenizer('Tokenizing text is easy.')\n",
157 | "print(encoded)\n"
158 | ]
159 | },
160 | {
161 | "cell_type": "markdown",
162 | "metadata": {
163 | "id": "4QgmlpyezhMy"
164 | },
165 | "source": [
166 | "The result of this tokenization contains two elements:\n",
167 | "* input_ids - The individual subword indexes, each index uniquely identifies a subword.\n",
168 | "* attention_mask - Which values in *input_ids* are meaningful and not padding.\n",
169 | "This sentence had no padding, so all elements have an attention mask of \"1\". Later, we will request the output to be of a fixed length, introducing padding, which always has an attention mask of \"0\". Though each tokenizer can be implemented differently, the attention mask of a tokenizer is generally either \"0\" or \"1\". \n",
170 | "\n",
171 | "Due to subwords and special tokens, the number of tokens may not match the number of words in the source string. We can see the meanings of the individual tokens by converting these IDs back to strings."
172 | ]
173 | },
174 | {
175 | "cell_type": "code",
176 | "execution_count": null,
177 | "metadata": {
178 | "colab": {
179 | "base_uri": "https://localhost:8080/"
180 | },
181 | "id": "Ww3XPc-i2Y6c",
182 | "outputId": "6179b416-61cd-4142-fa15-ae2296e9eab7"
183 | },
184 | "outputs": [],
185 | "source": [
186 | "tokenizer.convert_ids_to_tokens(encoded.input_ids)\n"
187 | ]
188 | },
189 | {
190 | "cell_type": "markdown",
191 | "metadata": {
192 | "id": "7KM7VRJECoGU"
193 | },
194 | "source": [
195 | "As you can see, there are two special tokens placed at the beginning and end of each sequence. We will soon see how we can include or exclude these special tokens. These special tokens can vary per tokenizer; however, [CLS] begins a sequence for this tokenizer, and [SEP] ends a sequence. You will also see that the gerund \"tokening\" is broken into \"token\" and \"*ing\".\n",
196 | "\n",
197 | "For this tokenizer, the special tokens occur between 100 and 103. Most Hugging Face tokenizers use this approximate range for special tokens. The value zero (0) typically represents padding. We can display all special tokens with this command.\n",
198 | "\n"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": null,
204 | "metadata": {
205 | "colab": {
206 | "base_uri": "https://localhost:8080/"
207 | },
208 | "id": "EtQiOmSl2rXt",
209 | "outputId": "6f5f2aef-d718-4675-d0a5-9c6c9d960c87"
210 | },
211 | "outputs": [],
212 | "source": [
213 | "tokenizer.convert_ids_to_tokens([0, 100, 101, 102, 103])\n"
214 | ]
215 | },
216 | {
217 | "cell_type": "markdown",
218 | "metadata": {
219 | "id": "1nQ-r6bz3ESN"
220 | },
221 | "source": [
222 | "This tokenizer supports these common tokens:\n",
223 | "\n",
224 | "* \\[CLS\\] - Sequence beginning.\n",
225 | "* \\[SEP\\] - Sequence end.\n",
226 | "* \\[PAD\\] - Padding.\n",
227 | "* \\[UNK\\] - Unknown token.\n",
228 | "* \\[MASK\\] - Mask out tokens for a neural network to predict. Not used in this book, see [MLM paper](https://arxiv.org/abs/2109.01819). \n",
229 | "\n",
230 | "It is also possible to tokenize lists of sequences. We can pad and truncate sequences to achieve a standard length by tokenizing many sequences at once.\n",
231 | "\n"
232 | ]
233 | },
234 | {
235 | "cell_type": "code",
236 | "execution_count": null,
237 | "metadata": {
238 | "colab": {
239 | "base_uri": "https://localhost:8080/"
240 | },
241 | "id": "TI4RZXhc4v9k",
242 | "outputId": "2ae7f2f9-3d8a-421e-c9b9-4dd933817cd3"
243 | },
244 | "outputs": [],
245 | "source": [
246 | "text = [\n",
247 | " \"This movie was great!\",\n",
248 | " \"I hated this move, waste of time!\",\n",
249 | " \"Epic?\"\n",
250 | "]\n",
251 | "\n",
252 | "encoded = tokenizer(text, padding=True, add_special_tokens=True)\n",
253 | "\n",
254 | "print(\"**Input IDs**\")\n",
255 | "for a in encoded.input_ids:\n",
256 | " print(a)\n",
257 | "\n",
258 | "print(\"**Attention Mask**\")\n",
259 | "for a in encoded.attention_mask:\n",
260 | " print(a)\n"
261 | ]
262 | },
263 | {
264 | "cell_type": "markdown",
265 | "metadata": {
266 | "id": "1rIF8TEEF9C-"
267 | },
268 | "source": [
269 | "Notice the **input_id**'s for the three movie review text sequences. Each of these sequences begins with 101 and we pad with zeros. Just before the padding, each group of IDs ends with 102. The attention masks also have zeros for each of the padding entries. \n",
270 | "\n",
271 | "We used two parameters to the tokenizer to control the tokenization process. Some other useful [parameters](https://huggingface.co/docs/transformers/main_classes/tokenizer) include:\n",
272 | "\n",
273 | "* add_special_tokens (defaults to True) Whether or not to encode the sequences with the special tokens relative to their model.\n",
274 | "* padding (defaults to False) Activates and controls truncation.\n",
275 | "* max_length (optional) Controls the maximum length to use by one of the truncation/padding parameters."
276 | ]
277 | },
278 | {
279 | "cell_type": "code",
280 | "execution_count": null,
281 | "metadata": {},
282 | "outputs": [],
283 | "source": []
284 | }
285 | ],
286 | "metadata": {
287 | "anaconda-cloud": {},
288 | "colab": {
289 | "collapsed_sections": [],
290 | "name": "t81_558_class_11_02_tokenizers.ipynb",
291 | "provenance": []
292 | },
293 | "kernelspec": {
294 | "display_name": "Python 3.9 (torch)",
295 | "language": "python",
296 | "name": "pytorch"
297 | },
298 | "language_info": {
299 | "codemirror_mode": {
300 | "name": "ipython",
301 | "version": 3
302 | },
303 | "file_extension": ".py",
304 | "mimetype": "text/x-python",
305 | "name": "python",
306 | "nbconvert_exporter": "python",
307 | "pygments_lexer": "ipython3",
308 | "version": "3.9.18"
309 | },
310 | "varInspector": {
311 | "cols": {
312 | "lenName": 16,
313 | "lenType": 16,
314 | "lenVar": 40
315 | },
316 | "kernels_config": {
317 | "python": {
318 | "delete_cmd_postfix": "",
319 | "delete_cmd_prefix": "del ",
320 | "library": "var_list.py",
321 | "varRefreshCmd": "print(var_dic_list())"
322 | },
323 | "r": {
324 | "delete_cmd_postfix": ") ",
325 | "delete_cmd_prefix": "rm(",
326 | "library": "var_list.r",
327 | "varRefreshCmd": "cat(var_dic_list()) "
328 | }
329 | },
330 | "types_to_exclude": [
331 | "module",
332 | "function",
333 | "builtin_function_or_method",
334 | "instance",
335 | "_Feature"
336 | ],
337 | "window_display": false
338 | }
339 | },
340 | "nbformat": 4,
341 | "nbformat_minor": 1
342 | }
343 |
--------------------------------------------------------------------------------
/t81_558_class_12_5_rl_future.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "klGNgWREsvQv"
7 | },
8 | "source": [
9 | "
"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {
15 | "id": "pmDI-h7cI0tI"
16 | },
17 | "source": [
18 | "# T81-558: Applications of Deep Neural Networks\n",
19 | "**Module 12: Reinforcement Learning**\n",
20 | "* Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), McKelvey School of Engineering, [Washington University in St. Louis](https://engineering.wustl.edu/Programs/Pages/default.aspx)\n",
21 | "* For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/)."
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {
27 | "id": "lsaQlK8fFQqH"
28 | },
29 | "source": [
30 | "# Module 12 Video Material\n",
31 | "\n",
32 | "* Part 12.1: Introduction to Introduction to Gymnasium [[Video]](https://www.youtube.com/watch?v=FvuyrpzvwdI&list=PLjy4p-07OYzuy_lHcRW8lPTLPTTOmUpmi) [[Notebook]](t81_558_class_12_1_reinforcement.ipynb)\n",
33 | "* Part 12.2: Introduction to Q-Learning [[Video]](https://www.youtube.com/watch?v=VKuqvbG_KAw&list=PLjy4p-07OYzuy_lHcRW8lPTLPTTOmUpmi) [[Notebook]](t81_558_class_12_2_qlearningreinforcement.ipynb)\n",
34 | "* Part 12.3: Stable Baselines Q-Learning [[Video]](https://www.youtube.com/watch?v=kl7zsCjULN0&list=PLjy4p-07OYzuy_lHcRW8lPTLPTTOmUpmi) [[Notebook]](t81_558_class_12_3_pytorch_reinforce.ipynb)\n",
35 | "* Part 12.4: Atari Games with Stable Baselines Neural Networks [[Video]](https://www.youtube.com/watch?v=maLA1_d4pzQ&list=PLjy4p-07OYzuy_lHcRW8lPTLPTTOmUpmi) [[Notebook]](t81_558_class_12_4_atari.ipynb)\n",
36 | "* **Part 12.5: Future of Reinforcement Learning** [[Video]](https://www.youtube.com/watch?v=-euo5pTjP8E&list=PLjy4p-07OYzuy_lHcRW8lPTLPTTOmUpmi) [[Notebook]](t81_558_class_12_5_rl_future.ipynb)\n"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {
42 | "id": "1u9QVVsShC9X"
43 | },
44 | "source": [
45 | "# Part 12.5: Future Directions of Reinforcement Learning\n",
46 | "\n",
47 | "Reinforcement Learning (RL) initially showed great promise as it defeated humans in games such as chess and Go. The 1990s heralded the birth of RL, a time when the field was a conclave of a few passionate souls. In 1995, the first community began to form at the inaugural National Science Foundation workshop on RL. \n",
48 | "\n",
49 | "As the new millennium dawned, RL's presence grew silently yet steadfastly, never quite breaking into mainstream machine learning research within machine learning until the advent of DeepMind. Their innovative synthesis of deep learning with RL demonstrated with Atari gaming, was a revelation. It sparked a renaissance, suddenly making RL the object of desire for tech conglomerates and startups, evidenced by Google's princely acquisition of DeepMind.\n",
50 | "\n",
51 | "To many, RL appears triumphant, destined to continue its march forward. Sessions dedicated to RL are overflowing at AI and ML conferences, and the influx of papers is ongoing, reinforcing a narrative of success. \n",
52 | "\n",
53 | "However, some researchers believe that RL may be at a dead end. When we impose on RL the stringent criteria of learning in real-time, common in the real world, the picture shifts. The current methods of deep RL, which shine in the simulated realities where failure is inconsequential, and repetition is infinite, falter against the complex learning that real life demands. Consider how humans learn to master the art of driving. It's not merely a function of operating the vehicle but a culmination of years of passive observation, an intricate dance of the senses and cognition that no current RL algorithm can claim to replicate. OpenAI, once the leading proponent of RL, has divested itself of the OpenAI Gym.\n",
54 | "\n",
55 | "The foray into teaching machines through RL has often ignored this fundamental aspect of human learning—the vast repository of implicit knowledge we bring to each new learning experience. The stark contrast in learning efficiencies between humans and RL algorithms becomes apparent. Where deep RL takes days and millions of iterations to grasp a game like Frostbite, a human requires only a minute and a few hundred trials. This disparity cannot be understated.\n",
56 | "\n",
57 | "So, where does this leave RL? Can it be rescued from the potential impasse where current trends lead? Some believe the answer lies not in the abandonment of RL but in its evolution. The future of RL should embrace a model that integrates observation, mimicking, transfer learning, and the scaffolding provided by prior knowledge. This paradigm resonates with how humans acquire complex skills. The musings of Richard Feynman on education resonate deeply with this issue. Despite his unparalleled eloquence in teaching physics, Feynman acknowledged the instruction limitations. This sentiment refers to an older, perhaps more universal understanding of learning: We absorb best what we are already primed to receive.\n",
58 | "\n",
59 | "This recognition brings us to a pivotal juncture for RL. The way forward is not to mimic the human learning process but to create a symbiotic framework where machines can benefit from the richness of the human experience. We stand on the brink of a transformative evolution in RL or its decline into obsolescence. The choice is ours to make, the direction ours to steer. The future of RL will depend on whether we choose to learn from the depth of human experience or continue down a path of isolated, knowledge-free computation.\n",
60 | "\n",
61 | "For a more indepth analysis of the future of reinforcement learning, consider the Quora post by [Sridhar Mahadevan](https://www.quora.com/Is-reinforcement-learning-a-dead-end).\n",
62 | "\n"
63 | ]
64 | }
65 | ],
66 | "metadata": {
67 | "accelerator": "GPU",
68 | "colab": {
69 | "collapsed_sections": [],
70 | "name": "Copy of t81_558_class_12_05_apply_rl.ipynb",
71 | "provenance": []
72 | },
73 | "kernelspec": {
74 | "display_name": "Python 3.9 (torch)",
75 | "language": "python",
76 | "name": "pytorch"
77 | },
78 | "language_info": {
79 | "codemirror_mode": {
80 | "name": "ipython",
81 | "version": 3
82 | },
83 | "file_extension": ".py",
84 | "mimetype": "text/x-python",
85 | "name": "python",
86 | "nbconvert_exporter": "python",
87 | "pygments_lexer": "ipython3",
88 | "version": "3.9.18"
89 | },
90 | "pycharm": {
91 | "stem_cell": {
92 | "cell_type": "raw",
93 | "metadata": {
94 | "collapsed": false
95 | },
96 | "source": []
97 | }
98 | },
99 | "varInspector": {
100 | "cols": {
101 | "lenName": 16,
102 | "lenType": 16,
103 | "lenVar": 40
104 | },
105 | "kernels_config": {
106 | "python": {
107 | "delete_cmd_postfix": "",
108 | "delete_cmd_prefix": "del ",
109 | "library": "var_list.py",
110 | "varRefreshCmd": "print(var_dic_list())"
111 | },
112 | "r": {
113 | "delete_cmd_postfix": ") ",
114 | "delete_cmd_prefix": "rm(",
115 | "library": "var_list.r",
116 | "varRefreshCmd": "cat(var_dic_list()) "
117 | }
118 | },
119 | "types_to_exclude": [
120 | "module",
121 | "function",
122 | "builtin_function_or_method",
123 | "instance",
124 | "_Feature"
125 | ],
126 | "window_display": false
127 | }
128 | },
129 | "nbformat": 4,
130 | "nbformat_minor": 1
131 | }
132 |
--------------------------------------------------------------------------------
/t81_558_class_13_5_new_tech.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "
"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# T81-558: Applications of Deep Neural Networks\n",
15 | "**Module 14: Other Neural Network Techniques**\n",
16 | "* Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), McKelvey School of Engineering, [Washington University in St. Louis](https://engineering.wustl.edu/Programs/Pages/default.aspx)\n",
17 | "* For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/)."
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "# Module 13 Video Material\n",
25 | "\n",
26 | "* Part 13.1: Using Denoising AutoEncoders [[Video]]() [[Notebook]](t81_558_class_13_1_auto_encode.ipynb)\n",
27 | "* Part 13.2: Anomaly Detection [[Video]]() [[Notebook]](t81_558_class_13_2_anomaly.ipynb)\n",
28 | "* Part 13.3: Model Drift and Retraining [[Video]]() [[Notebook]](t81_558_class_13_3_retrain.ipynb)\n",
29 | "* Part 13.4: Tensor Processing Units (TPUs) [[Video]]() [[Notebook]](t81_558_class_13_4_tpu.ipynb)\n",
30 | "* **Part 13.5: Future Directions in Artificial Intelligence** [[Video]]() [[Notebook]](t81_558_class_13_5_new_tech.ipynb)\n"
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {},
36 | "source": [
37 | "# Google CoLab Instructions\n",
38 | "\n",
39 | "The following code checks that Google CoLab is and sets up the correct hardware settings for PyTorch.\n"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": null,
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "import torch\n",
49 | "\n",
50 | "try:\n",
51 | " import google.colab\n",
52 | " COLAB = True\n",
53 | " print(\"Note: using Google CoLab\")\n",
54 | "except:\n",
55 | " print(\"Note: not using Google CoLab\")\n",
56 | " COLAB = False\n",
57 | "\n",
58 | "# Make use of a GPU or MPS (Apple) if one is available. (see module 3.2)\n",
59 | "has_mps = torch.backends.mps.is_built()\n",
60 | "device = \"mps\" if has_mps else \"gpu\" if torch.cuda.is_available() else \"cpu\"\n",
61 | "print(f\"Using device: {device}\")"
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {},
67 | "source": [
68 | "# Part 13.5: Future Directions in Artificial Intelligence\n",
69 | "\n",
70 | "This course changes often to keep up with the rapidly evolving deep learning landscape. If you would like to continue to monitor this class, I suggest following me on the following:\n",
71 | "\n",
72 | "* [GitHub](https://github.com/jeffheaton) - I post all changes to GitHub.\n",
73 | "* [Jeff Heaton's YouTube Channel](https://www.youtube.com/user/HeatonResearch) - I add new videos for this class on my channel.\n",
74 | "\n",
75 | "\n",
76 | "I hope that you have enjoyed this course. If you have any suggestions for improvement or technology suggestions, please get in touch with me. This course is always evolving, and I invite you to subscribe to my [YouTube channel](https://www.youtube.com/user/HeatonResearch) for my latest updates. I also frequently post videos beyond the scope of this course, so the channel itself is a good next step. Thank you very much for your interest and focus on this course. Other social media links for me include:\n",
77 | "\n",
78 | "* [Jeff Heaton GitHub](https://github.com/jeffheaton)\n",
79 | "* [Jeff Heaton Twitter](https://twitter.com/jeffheaton)\n",
80 | "* [Jeff Heaton Medium](https://medium.com/@heatonresearch)\n",
81 | "\n",
82 | "\n"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": null,
88 | "metadata": {},
89 | "outputs": [],
90 | "source": []
91 | }
92 | ],
93 | "metadata": {
94 | "anaconda-cloud": {},
95 | "kernelspec": {
96 | "display_name": "Python 3.9 (torch)",
97 | "language": "python",
98 | "name": "pytorch"
99 | },
100 | "language_info": {
101 | "codemirror_mode": {
102 | "name": "ipython",
103 | "version": 3
104 | },
105 | "file_extension": ".py",
106 | "mimetype": "text/x-python",
107 | "name": "python",
108 | "nbconvert_exporter": "python",
109 | "pygments_lexer": "ipython3",
110 | "version": "3.9.18"
111 | },
112 | "varInspector": {
113 | "cols": {
114 | "lenName": 16,
115 | "lenType": 16,
116 | "lenVar": 40
117 | },
118 | "kernels_config": {
119 | "python": {
120 | "delete_cmd_postfix": "",
121 | "delete_cmd_prefix": "del ",
122 | "library": "var_list.py",
123 | "varRefreshCmd": "print(var_dic_list())"
124 | },
125 | "r": {
126 | "delete_cmd_postfix": ") ",
127 | "delete_cmd_prefix": "rm(",
128 | "library": "var_list.r",
129 | "varRefreshCmd": "cat(var_dic_list()) "
130 | }
131 | },
132 | "types_to_exclude": [
133 | "module",
134 | "function",
135 | "builtin_function_or_method",
136 | "instance",
137 | "_Feature"
138 | ],
139 | "window_display": false
140 | }
141 | },
142 | "nbformat": 4,
143 | "nbformat_minor": 4
144 | }
145 |
--------------------------------------------------------------------------------
/t81_559_class_08_4_kaggle_llm.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "83GJJF9fAgyP"
7 | },
8 | "source": [
9 | "
"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {
15 | "id": "HL640ydsAgyQ"
16 | },
17 | "source": [
18 | "# T81-559: Applications of Generative Artificial Intelligence\n",
19 | "**Module 8: Kaggle**\n",
20 | "* Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), McKelvey School of Engineering, [Washington University in St. Louis](https://engineering.wustl.edu/Programs/Pages/default.aspx)\n",
21 | "* For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/)."
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {
27 | "id": "a4ih9V7vAgyR"
28 | },
29 | "source": [
30 | "# Module 8 Material\n",
31 | "\n",
32 | "* Part 8.1: Introduction to Kaggle [[Video]]() [[Notebook]](t81_559_class_08_1_kaggle_intro.ipynb)\n",
33 | "* Part 8.2: Kaggle Notebooks [[Video]]() [[Notebook]](t81_559_class_08_2_kaggle_notebooks.ipynb)\n",
34 | "* Part 8.3: Small Large Language Models [[Video]]() [[Notebook]](t81_559_class_08_3_small_llm.ipynb)\n",
35 | "* **Part 8.4: Accessing Small LLM from Kaggle** [[Video]]() [[Notebook]](t81_559_class_08_4_kaggle_llm.ipynb)\n",
36 | "* Part 8.5: Current Semester's Kaggle [[Video]]() [[Notebook]](t81_559_class_08_5_kaggle_project.ipynb)"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {
42 | "id": "uU7OTe1DAgyR"
43 | },
44 | "source": [
45 | "# Google CoLab Instructions\n",
46 | "\n",
47 | "The following code ensures that Google CoLab is running the correct version of TensorFlow."
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 1,
53 | "metadata": {
54 | "colab": {
55 | "base_uri": "https://localhost:8080/"
56 | },
57 | "id": "NOdFRzaXAgyS",
58 | "outputId": "2475bc8b-19b2-487a-916a-3667060e76cf"
59 | },
60 | "outputs": [
61 | {
62 | "name": "stdout",
63 | "output_type": "stream",
64 | "text": [
65 | "Note: using Google CoLab\n",
66 | "Using device: mps\n"
67 | ]
68 | }
69 | ],
70 | "source": [
71 | "# Start CoLab\n",
72 | "try:\n",
73 | " COLAB = True\n",
74 | " print(\"Note: using Google CoLab\")\n",
75 | "except:\n",
76 | " print(\"Note: not using Google CoLab\")\n",
77 | " COLAB = False\n",
78 | "\n",
79 | "# Make use of a GPU or MPS (Apple) if one is available. (see module 3.2)\n",
80 | "import torch\n",
81 | "device = (\n",
82 | " \"mps\"\n",
83 | " if getattr(torch, \"has_mps\", False)\n",
84 | " else \"cuda\"\n",
85 | " if torch.cuda.is_available()\n",
86 | " else \"cpu\"\n",
87 | ")\n",
88 | "print(f\"Using device: {device}\")"
89 | ]
90 | },
91 | {
92 | "cell_type": "markdown",
93 | "metadata": {
94 | "id": "LFMTMsOWAgyS"
95 | },
96 | "source": [
97 | "# 8.4: Accessing Small LLM from Kaggle\n",
98 | "\n",
99 | "coming soon"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": null,
105 | "metadata": {},
106 | "outputs": [],
107 | "source": []
108 | }
109 | ],
110 | "metadata": {
111 | "anaconda-cloud": {},
112 | "colab": {
113 | "collapsed_sections": [],
114 | "name": "Copy of t81_558_class_08_5_kaggle_project.ipynb",
115 | "provenance": []
116 | },
117 | "kernelspec": {
118 | "display_name": "Python 3.11 (genai)",
119 | "language": "python",
120 | "name": "pytorch"
121 | },
122 | "language_info": {
123 | "codemirror_mode": {
124 | "name": "ipython",
125 | "version": 3
126 | },
127 | "file_extension": ".py",
128 | "mimetype": "text/x-python",
129 | "name": "python",
130 | "nbconvert_exporter": "python",
131 | "pygments_lexer": "ipython3",
132 | "version": "3.11.8"
133 | }
134 | },
135 | "nbformat": 4,
136 | "nbformat_minor": 1
137 | }
138 |
--------------------------------------------------------------------------------