├── .gitignore
├── LICENSE
├── Lecture01
    └── README.md
├── Lecture02
    └── README.md
├── Lecture03
    ├── 03_exercise.ipynb
    ├── 03_solution.ipynb
    └── README.md
├── Lecture04
    ├── 04_exercise.ipynb
    ├── 04_solution.ipynb
    ├── 04_warm_up.ipynb
    ├── README.md
    ├── reaction_yields.txt
    └── reaction_yields_with_errors.txt
├── Lecture05
    ├── 05_exercises.ipynb
    ├── 05_solution.ipynb
    ├── README.md
    └── molecular_data.txt
├── Lecture06
    ├── 06_exercise.ipynb
    ├── 06_solution.ipynb
    ├── README.md
    └── chembl_drugs.csv
├── Lecture07
    ├── 07_exercises.ipynb
    ├── 07_solutions.ipynb
    └── README.md
├── Lecture08
    ├── Part1
    │   ├── 08_drfp_partA.ipynb
    │   ├── 08_drfp_partB.ipynb
    │   └── README.md
    ├── Part2
    │   ├── 08_from_script_to_package.ipynb
    │   ├── README.md
    │   ├── amremover
    │   │   ├── pyproject.toml
    │   │   └── src
    │   │   │   └── amremover
    │   │   │       └── utils.py
    │   ├── amremover_script.py
    │   ├── solution_advanced
    │   │   └── amremover
    │   │   │   ├── pyproject.toml
    │   │   │   └── src
    │   │   │       └── amremover
    │   │   │           ├── __init__.py
    │   │   │           ├── cli.py
    │   │   │           └── utils.py
    │   └── solution_standard
    │   │   └── amremover
    │   │       ├── pyproject.toml
    │   │       └── src
    │   │           └── amremover
    │   │               ├── __init__.py
    │   │               └── utils.py
    └── README.md
├── Lecture09
    ├── 09_exercise.ipynb
    ├── 09_solution.ipynb
    └── README.md
├── Lecture10
    ├── Part-1
    │   ├── README.md
    │   └── solution_code.ipynb
    ├── Part-2
    │   └── README.md
    └── README.md
├── Lecture11
    ├── 11_exercise.ipynb
    ├── 11_solutions.ipynb
    └── README.md
├── Lecture12
    ├── 12_exercises.ipynb
    ├── README.md
    ├── app.py
    └── solutions_app.py
├── README.md
└── assets
    ├── Lecture05
        ├── 1.png
        ├── 2.png
        ├── 3.png
        ├── 4.png
        ├── 5.png
        ├── 6.png
        ├── 7.png
        └── 8.png
    ├── Lecture08
        └── clone_drfp.jpg
    ├── PAT.png
    ├── git_bash_download.png
    ├── git_workflow.jpeg
    ├── ipython.png
    ├── kernel_env.png
    ├── kernel_select.png
    ├── python_env.png
    ├── vscode_extensions.png
    ├── vscode_install_ipython.png
    ├── vscode_install_ipython.png.html
    ├── vscode_python_env.png
    ├── vscode_python_env.png.html
    ├── vscode_select_env.png
    ├── vscode_select_env.png.html
    ├── vscode_select_kernel.png
    ├── vscode_select_kernel.png.html
    └── windows_conda_path.png


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # UV
 98 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #uv.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 | 
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | 
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 | 
143 | # Rope project settings
144 | .ropeproject
145 | 
146 | # mkdocs documentation
147 | /site
148 | 
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 | 
154 | # Pyre type checker
155 | .pyre/
156 | 
157 | # pytype static type analyzer
158 | .pytype/
159 | 
160 | # Cython debug symbols
161 | cython_debug/
162 | 
163 | # PyCharm
164 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
167 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 | 
170 | # PyPI configuration file
171 | .pypirc
172 | 
173 | 2024-practical-programming-in-chemistry-exercises
174 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 schwallergroup
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Lecture01/README.md:
--------------------------------------------------------------------------------
  1 | # Lecture 01: Setting Up Your Computer
  2 | 
  3 | This lecture focuses on setting up the essential tools you'll need throughout the course. We'll walk through installing the necessary software and getting comfortable with the command line.
  4 | 
  5 | ## Required Software Installation
  6 | 
  7 | ### 1. Terminal Setup
  8 | - **Windows Users**: Install Git Bash
  9 |   1. Download Git Bash from the [official website](https://git-scm.com/download/win)
 10 |      ![Downloading git bash](/assets/git_bash_download.png)
 11 |   3. Run the installer, accepting default settings
 12 |   4. Verify installation by opening Git Bash and typing `git --version`
 13 | 
 14 | - **macOS Users**: Use the built-in Terminal
 15 |   1. Press `Cmd + Space`, type "Terminal", and press Enter
 16 |   2. Update your command line tools by running: `xcode-select --install`
 17 |   3. Check that Git is installed by typing `git --version` in your terminal.
 18 | 
 19 | ### 2. Visual Studio Code
 20 | 1. Download VS Code from the [official website](https://code.visualstudio.com/)
 21 | 2. Open VS Code and install the following extensions:
 22 |    - Python (by Microsoft)
 23 |    - Jupyter
 24 |    ![Install Extensions in VS Code](/assets/vscode_extensions.png)
 25 | 
 26 | More about VS Code extensions from the [official documentation](https://code.visualstudio.com/docs/editor/extension-marketplace).
 27 | 
 28 | ### 3. Anaconda Installation
 29 | 
 30 | **macOS users**
 31 | 1. Download Anaconda from the [official website](https://www.anaconda.com/download/success))
 32 |    - [Check your processor type](https://support.apple.com/en-us/HT211814) (Intel or Apple Silicon:  > `About This Mac` > Check `Chip` line) 
 33 | 2. Run the installer
 34 | 3. Verify installation by opening your terminal (`Cmd + Space`, type "Terminal") and running:
 35 |    ```bash
 36 |    conda --version
 37 |    ```
 38 | **Windows users**
 39 | 1. Download Anaconda from the [official website](https://www.anaconda.com/download/success))
 40 |    - Choose the appropriate installer (64-bit recommended)
 41 | 2. Run the installer
 42 |    - ⚠️ Folder destination: Do **not** accept the default path (`C:\Users\<your-username>\Anaconda3\`) **if** your path contains spaces (e.g. `C:\Name Surname`) or characters outside the 7-bit ASCII character set like accents (e.g. `C:\Joëlle-Françoise L’Héritier`). Instead, **save it here**: `C:\anaconda`
 43 |    - ⚠️ When prompted, check "Add Anaconda to my PATH environment variable"
 44 |       ![Add conda to PATH](../assets/windows_conda_path.png)
 45 | 4. Verify installation by opening git bash (might have to reopen for changes to take effect) and running:
 46 |    ```bash
 47 |    conda --version
 48 |    ```
 49 | 
 50 | <details>
 51 | <summary>Some help for Windows installation</summary>
 52 |   
 53 |   These are mostly for the TAs to help you
 54 |   
 55 | - [Detailed Anaconda installation instructions](https://docs.anaconda.com/working-with-conda/reference/faq/#installing-anaconda)
 56 | - [conda: command not found on Windows](https://stackoverflow.com/questions/54501167/anaconda-and-git-bash-in-windows-conda-command-not-found)
 57 |       
 58 | </details>
 59 | 
 60 | ## GitHub Setup
 61 | 
 62 | ### 1. Create a GitHub Account
 63 | 1. Go to [GitHub](https://github.com) and click "Sign Up"
 64 | 2. Use your academic email for potential benefits
 65 | 3. Choose a professional username
 66 | 4. Complete the verification process
 67 | 
 68 | <details>
 69 | <summary>Detailed steps</summary>
 70 | 
 71 | 1. Open your web browser and navigate to https://github.com/.
 72 | 2. Click on the `Sign Up` button located in the top right corner of GitHub’s homepage.
 73 | 3. On the next page, provide the required details including a new `Username`, a `valid Email Address` (EPFL address recommended for step 8.), and a `Password`. Make sure to verify that the password is at least 15 characters long or at least 8 characters long with a combination of letters, numbers, and symbols.
 74 | 4. Review GitHub’s Terms of Service and Privacy Statement, and if you agree, click on `Create an account`.
 75 | 5. Next, you might be guided through a few survey questions. You can answer them or directly click on `Complete Setup`.
 76 | 6. You’ll be sent an email to the address you provided. In that email, click `Verify email address`.
 77 | 7. That’s it! You should now have a GitHub account.
 78 | 8. (Optional) The GitHub Student Developer Pack is a free offer from GitHub specially for students. It provides access to a variety of premium development tools and services free of charge for as long as you’re a student. [GitHub Student Developer Pack](https://education.github.com/pack)
 79 | </details>
 80 | 
 81 | ### 2. Configure GitHub locally (on your computer)
 82 | 1. Configure your username and email address by typing the following commands in your terminal/Git Bash:
 83 | ```bash
 84 | git config --global user.name "Your Name" # Replace with your GitHub username
 85 | git config --global user.email "user@epfl.ch" # Replace with the associated email address
 86 | ```
 87 | 2. Check that your configuration was successful by typing (leave file by pressing `q`):
 88 | ```bash
 89 | git config --global --list
 90 | ```
 91 | 
 92 | ### 3. Log into GitHub on VS Code
 93 | 
 94 | 1. Open VS Code
 95 | 2. Insall the required extension `GitHub Pull Requests and Issues`
 96 | 3. Sign in by clicking on the GitHub tab on the left (cat-like icon below the extensions tab)
 97 | 
 98 | ## Terminal Commands Overview
 99 | 
100 | Here are the essential terminal commands you'll need for navigating your computer and working with files:
101 | 
102 | | Function | macOS/Linux Command/Windows Git Bash |
103 | |----------|------------------------|
104 | | Navigate to home directory | `cd ~` | 
105 | | Change directory | `cd directoryName` | 
106 | | Go up one folder | `cd ..` | 
107 | | Navigate to directory with spaces | `cd "directory Name"` |
108 | | Show current directory | `pwd` | 
109 | | List files in current directory | `ls` | 
110 | | Make a new directory in the current directory | `mkdir directoryName` | 
111 | | Remove a file | `rm fileName` | 
112 | | Remove a directory | `rm -r directoryName` |
113 | | Copy a file with path `source` to path `destination` | `cp source destination` | 
114 | | Move or rename a file with name `source` to new name `destination` | `mv source destination` | 
115 | 
116 | ## Terminal Exercises
117 | 
118 | ### Exercise 1: Basic Navigation
119 | 1. Open your terminal (Git Bash for Windows)
120 | 2. Navigate to your home directory using `cd ~`
121 | 3. Check your current location using `pwd`
122 | 4. List all files in your current directory using `ls`
123 | 5. Create a new directory called `python-course` using `mkdir`
124 | 6. Navigate into that directory
125 | 7. Verify you're in the correct directory using `pwd`
126 | 
127 | ### Exercise 2: Working with Files and Directories
128 | 1. Inside `python-course`, create three directories (using `mkdir`):
129 |    - `exercises`
130 |    - `notes`
131 |    - `projects`
132 | 2. List the contents of `python-course` to verify the directories were created (using `ls`)
133 | 3. Navigate into `exercises`
134 | 4. Create a file called `week1.txt` (you can use `touch week1.txt` on Mac/Unix/Git Bash)
135 | 5. Move back up to the `python-course` directory (using `cd ..`)
136 | 6. Create another file called `todo.txt` in the `notes` directory
137 | 7. Copy `todo.txt` from `notes` to `projects` (using `cp`)
138 | 
139 | ### Exercise 3: Advanced File Operations
140 | 1. Create a directory called `temp` in `python-course`
141 | 2. Create files called `file1.txt`, `file2.txt`, and `file3.txt` in `temp`
142 | 3. Create a new directory called `backup`
143 | 4. Copy all files from `temp` to `backup`
144 | 5. List the contents of both directories to verify the files were copied
145 | 6. Remove the `temp` directory and its contents
146 | 7. Verify the files still exist in `backup`
147 | 
148 | ### Exercise 4: Play the bashcrawl game (optional)
149 | This is a fun way to learn a lot more bash commands, play as long as you want (we don't expect you to finish it).
150 | 
151 | You can either download the game locally or play it through Binder online.
152 | 
153 | **Local game**
154 | 1. Download [BashCrawl game](https://gitlab.com/slackermedia/bashcrawl/-/archive/stable-2024.02.09/bashcrawl-stable-2024.02.09.zip).
155 | 2. Unzip it. 
156 | 3. In your terminal/Git bash using `cd`, navigate to the folder `bashcrawl-stable-2024.02.09`
157 | 4. cd `entrance`
158 | 5. `cat scroll` to read the instructions
159 | 
160 | **Through Binder (online)**
161 | 1. Open [https://mybinder.org/v2/gl/nthiery%2Fbashcrawl/HEAD](https://mybinder.org/v2/gl/nthiery%2Fbashcrawl/HEAD).
162 | 2. Wait for a minute or two
163 | 3. Once the interface has loaded, click on Terminal (bottom left)
164 | 4. Run `cat scroll` to read the instructions
165 | 
166 | 
167 | ## Common Issues and Solutions
168 | 
169 | 1. **Git Bash not recognized in Windows**
170 |    - Restart your computer after installation
171 |    - Verify PATH variables in System Environment Variables
172 | 
173 | 2. **Anaconda not found in terminal**
174 |    - Restart your terminal
175 |    - Check if PATH was properly set during installation
176 | 
177 | 3. **GitHub authentication fails**
178 |    - Make sure you're using your PAT as the password, not your GitHub password
179 |    - Regenerate PAT if necessary
180 | 
181 | 


--------------------------------------------------------------------------------
/Lecture02/README.md:
--------------------------------------------------------------------------------
  1 | # Lecture 02: Setting up GitHub and creating first repositories
  2 | 
  3 | ## What is GitHub?
  4 | 
  5 | GitHub is a web-based platform that allows you to store and manage your code. It is widely used by developers and data scientists to collaborate on projects and share their work with others. In this course, we will be using GitHub to store and share our code, both for the exercises as well as the final assignment. Below, you can see how a local environment (laptop) interacts with the remote environment (GitHub browser) by first downloading (`clone`) a code-based project (`repository`) and then updating the remote version with changes done locally on your computer (arrows going up) or updating the local version with changes being done by colleagues (arrows going down).
  6 | 
  7 | ![Git Workflow](../assets/git_workflow.jpeg)
  8 | 
  9 | *Figure: Scheme explaining the Git Workflow taken from [this blogpost](https://medium.com/@itsmepankaj/git-workflow-add-commit-push-pull-69adf44cf812), which has more detailed information on it.*
 10 | 
 11 | This Git workflow ensures that changes are tracked, saved, and shared in a structured way, preventing data loss and enabling collaboration. Staging (`add`) selects changes, committing (`commit`) saves them with a message, and pushing (`push`) syncs them with a remote repository for others to access.
 12 | 
 13 | Now, you will create your first repository, a profile README that will appear on your user page. 
 14 | 
 15 | <details>
 16 | <summary>What is the difference between Git and GitHub?</summary>
 17 | 
 18 | Git is a distributed version control system that allows developers to track changes, manage branches, and collaborate on code efficiently, while GitHub is a cloud-based platform that provides hosting for Git repositories along with additional collaboration features like issue tracking, pull requests, and web-based interfaces to facilitate teamwork and open-source contributions.
 19 | 
 20 | In this course, we will use `Git` commands for version control, but will be using GitHub as the remote storage for our repositories.
 21 | </details>
 22 | 
 23 | ## 1. Create a Profile README
 24 | A profile README is a special repository that is automatically displayed on your GitHub profile. It is a great way to introduce yourself and showcase your work. Take your time to create such a README on the GitHub website.
 25 | 
 26 | <details>
 27 | <summary>Detailed steps</summary>
 28 | 
 29 | 1. On GitHub, in the upper-right corner of any page, click on the `+` and then click `New repository`.
 30 | 2. Name the repository with your GitHub username (must match exactly!).
 31 | 3. Select the `Public` option.
 32 | 4. Check the box to `Initialize this repository with a README`.
 33 | 5. Click `Create repository`.
 34 | 6. Above the right sidebar, click on `Edit README` and start editing the file.
 35 | 7. You can use the [GitHub Flavored Markdown](https://guides.github.com/features/mastering-markdown/) to format your README.
 36 | 8. Once you are done, click on `Commit changes`.
 37 | 
 38 | </details>
 39 | 
 40 | You can take some inspiration from your TAs ([@jwa7](https://github.com/jwa7), [@sarina-kopf](https://github.com/Sarina-kopf) or [@rneeser](https://github.com/rneeser)) or get some tips for creative profiles from [this blogpost](https://dev.to/kshyun28/how-to-make-your-awesome-github-profile-hog).
 41 | 
 42 | 
 43 | ## 2. GitHub Basics: Create a new repository
 44 | Finally, we will create our first repository and update it via the command line. Please make sure to create a public repository (so the TAs can see it) and to add a README file.
 45 | 
 46 | ### Creating a new repository
 47 | 
 48 | 1. Go to the GitHub website and click on the `+` in the top right corner and then `New repository`.
 49 | 2. Name the repository `ppchem` and select the `Public` option. Also check the box to `Initialize this repository with a README`.
 50 | 
 51 | ### Create a `Personal Access Token` (PAT)
 52 | 
 53 | Git will ask for your password when you clone a repository (next step). In order to increase security, GitHub requires one to add a password that is different from your login password. For that, we can create a PAT like so:
 54 | 
 55 | ⚠️ This is crucial for connecting to GitHub from your computer!
 56 | 
 57 | 1. Go to the GitHub website and click on your profile in the upper right corner and then `⚙️ Settings`
 58 | 2. Scroll to the end of the option bar on the left and click on `<> Developer settings` > `🔑 Personal access tokens` > `Tokens (classic)`
 59 | 3. Next, in the upper right, click on `Generate new token` and choose the classic version.
 60 | 4. Set token name (e.g., "Python-Course-Access").
 61 | 5. Select scopes:
 62 |    - `repo`
 63 |    - `workflow`
 64 |    - `write:packages`
 65 |    - `delete:packages`
 66 |      
 67 |    ![PAT](../assets/PAT.png)
 68 | 6. Generate token
 69 | 7. ⚠️ **IMPORTANT**: Copy and save your token somewhere safe! You'll need it when Git asks for your password. It will start with `ghp_` followed by a bunch of numbers and letters. **After it is generated and shown once, it will not be accessible again via the GitHub website. Store it safely!**
 70 | 
 71 | ### Cloning the repository
 72 | 
 73 | 8. Open your terminal and **navigate (`cd`) to the directory where you want to store the repository** (replace `~/git` with that folder). This is often a folder called `git` in your home directory (`~`). You have to create the folder with e.g. `mkdir git` if it does not exist yet.
 74 |    ```bash
 75 |    cd ~/git
 76 |    ```
 77 | 9. Type the following command to clone (download) the repository to your local machine (don't forget to replace `username` with your username):
 78 |    ```bash
 79 |    git clone https://github.com/username/ppchem.git
 80 |    ```
 81 | 10. When prompted type in your GitHub username and PAT for the password.
 82 |     - :bulb: *Note*: You will not see any characters appearing in the password prompt when typing/copying your PAT. This is for security reasons. You can just `enter` when you are done typing.
 83 | 11. Navigate into the repository by typing `cd ppchem`.
 84 | 
 85 | In order to clone any repository, you need to have the URL of the repository. You can find the URL by clicking on the green `Code` button on the repository's page.
 86 | 
 87 | ### Making changes and committing them
 88 | 
 89 | Whenever you make changes to your repository, you need to commit them to save the changes to the repository's history. We will add an image to the repository and praktice the git workflow.
 90 | 
 91 | 12. Download an image of your favorite molecule from the internet. Your TAs recommend `Caffeine` :coffee:.
 92 | 13. Add this image to your `repository` (the project folder). You can do this in one of these ways:
 93 |     - drag and drop the image on your file system (this can be done in a VSCode project with the relevant directories open)
 94 |     - use the `mv` command shown in the [command table](#summary-of-important-general-commands). The `source` is the downloaded image and `destination` is your repository
 95 | 14. Check if the file is in the directory by typing `ls`
 96 | 15. Type the following command to stage the changes and **replace `<caffeine.png>` with your filename**:
 97 |       ```bash
 98 |       git add <caffeine.png> # stages only the added file - replace with the actual file name
 99 |       ```
100 | 16. Check if the changes have been staged by typing `git status`.
101 | 17. Type the following command to commit the changes:
102 |       ```bash
103 |       git commit -m "Some message" # Replace with a meaningful message
104 |       ```
105 | 18. Use `git status` again to see if the changes are now committed.
106 | 
107 | ### Pushing the changes to GitHub
108 | 
109 | For now, we only made changes to the local repository and committed them (prepared them for upload). We need to push the changes to GitHub to make them available to others.
110 | 
111 | 19. Type the following command to push the changes to GitHub:
112 |       ```bash
113 |       git push origin main # Replace with the branch name if you are not on the main branch
114 |       ```
115 | 20. Check in the browser (you might have to refresh the website) if your changes have been updated online.
116 | 
117 | You can check the status of your repository at any time by typing `git status` in your terminal in the folder of the repository. This will also show you the changes you made and the files you staged as well as the branch you are currently on.
118 | 
119 | 
120 | ### Working with branches
121 | 
122 | 21. Type the following command to create a new branch and switch to it:
123 |       ```bash
124 |       git checkout -b new-branch-name
125 |       ```
126 | 22. Publish the branch to GitHub by typing:
127 |       ```bash
128 |       git push -u origin new-branch-name
129 |       ```
130 | 
131 | Changing branches locally will change the version that you have access to locally. So if you have a specific file only in the new branch you cannot find it in `main` and therefore also not push changes from there. If you simply want to switch branch, type:
132 |    ```bash
133 |    git checkout <branch-name> # replace <branch-name> with the actual name
134 |    ```
135 | 
136 | ### Pulling changes from GitHub
137 | 
138 | If you are working on a repository with others, you might want to pull the changes they made to your local repository. You can do this by typing `git pull` in your terminal in the folder of the repository.
139 | 
140 | If working collaboratively on a project, where each project member is working on local copies of the same remote repository, and especially if working on the same file, extra care need to be taken when pushing and pulling changes to the remote repository.
141 | 
142 | In these cases, best practice is often to work in a branch of the repo that is yours alone, and merge changes with other periodically. This will be covered later in the course.
143 | 
144 | ## Summary of important commands
145 | 
146 | | Command | Description |
147 | |---------|-------------|
148 | | `git config --global user.name "name"` | Set your name in git configuration (only once)|
149 | | `git config --global user.email "email"` | Set your email in git configuration (only once)|
150 | | `git clone https://github.com/username/repository.git` | Clone a GitHub repository to your local machine |
151 | | `git branch branch-name` | Create a new branch |
152 | | `git checkout branch-name` | Switch to another branch |
153 | | `git add .` | Track all changes in directory |
154 | | `git commit -m "commit message"` | Commit your tracked changes |
155 | | `git push origin branch-name` | Push your local commits to the GitHub |
156 | | `git pull origin branch-name` | Fetch the newest updates from the remote branch |
157 | | `git status` | Check the status of your local repository |
158 | | `git diff` | Show changes between your working directory and the last commit |
159 | | `git diff --staged` | Show changes between your staging area and the last commit |
160 | 
161 | Above are the basic Git commands you'll frequently use which cover most of the general use cases from cloning repositories to making changes and updating your remote branches. Each command is a powerful tool in Git, and they are designed to work together to create a seamless workflow.
162 | 
163 | Remember, the best way to become comfortable with Git is practice. Try using these commands to manage a test project and experiment until you're confident with the functionality of each one.
164 | 
165 | 


--------------------------------------------------------------------------------
/Lecture03/03_exercise.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# 1. Python Data Types"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "markdown",
  12 |    "metadata": {},
  13 |    "source": [
  14 |     "## Recap on data types\n",
  15 |     "\n",
  16 |     "Python has several built-in data types, which are categorized into:\n",
  17 |     "- Numeric: `int`, `float`\n",
  18 |     "- Sequence: `str`, `list`, `tuple`\n",
  19 |     "- Mapping: `dict`\n",
  20 |     "- Set: `set`\n",
  21 |     "- Boolean: `bool`"
  22 |    ]
  23 |   },
  24 |   {
  25 |    "cell_type": "markdown",
  26 |    "metadata": {},
  27 |    "source": [
  28 |     "### Numeric Data Types\n",
  29 |     "- `int`: Integer, e.g. 1, 2, 3\n",
  30 |     "- `float`: Floating point number, e.g. 1.0, -2.5, 3.14\n",
  31 |     "- `complex`: Complex number, e.g. 1 + 2j, 3 - 4j"
  32 |    ]
  33 |   },
  34 |   {
  35 |    "cell_type": "code",
  36 |    "execution_count": null,
  37 |    "metadata": {},
  38 |    "outputs": [],
  39 |    "source": [
  40 |     "# Integer\n",
  41 |     "int_var = 10\n",
  42 |     "print(type(int_var))  # <class 'int'>\n",
  43 |     "\n",
  44 |     "# Float\n",
  45 |     "float_var = 10.5\n",
  46 |     "print(type(float_var))  # <class 'float'>\n",
  47 |     "\n",
  48 |     "# Complex\n",
  49 |     "complex_var = 10 + 5j\n",
  50 |     "print(type(complex_var))  # <class 'complex'>"
  51 |    ]
  52 |   },
  53 |   {
  54 |    "cell_type": "markdown",
  55 |    "metadata": {},
  56 |    "source": [
  57 |     "### Sequence Data Types\n",
  58 |     "Sequence data types are **ordered** collections of similar or different data types. The elements in a sequence can be accessed using **indexing**.\n",
  59 |     "- `str`: String, e.g. \"hello\", 'world'\n",
  60 |     "- `list`: List, e.g. [1, 2, 3], ['a', 'b', 'c']\n",
  61 |     "- `tuple`: Tuple, e.g. (1, 2, 3), ('a', 'b', 'c')"
  62 |    ]
  63 |   },
  64 |   {
  65 |    "cell_type": "code",
  66 |    "execution_count": null,
  67 |    "metadata": {},
  68 |    "outputs": [],
  69 |    "source": [
  70 |     "# String\n",
  71 |     "str_var = \"Hello, Python!\"\n",
  72 |     "print(type(str_var))  # <class 'str'>\n",
  73 |     "\n",
  74 |     "# List\n",
  75 |     "list_var = [1, 2, 3, 4, 5]\n",
  76 |     "print(type(list_var))  # <class 'list'>\n",
  77 |     "\n",
  78 |     "# Tuple\n",
  79 |     "tuple_var = (1, 2, 3, 4, 5)\n",
  80 |     "print(type(tuple_var))  # <class 'tuple'>"
  81 |    ]
  82 |   },
  83 |   {
  84 |    "cell_type": "markdown",
  85 |    "metadata": {},
  86 |    "source": [
  87 |     "You can include anything you want in lists, from other lists, to strings to tuples. Although this behaviour nis allowed, in practise this should be avoided as it can lead ot code which behvaes unpredictably and is tricky for others ( and future you ) to debug."
  88 |    ]
  89 |   },
  90 |   {
  91 |    "cell_type": "code",
  92 |    "execution_count": null,
  93 |    "metadata": {},
  94 |    "outputs": [],
  95 |    "source": [
  96 |     "# You can put anything you want in a list, including other lists\n",
  97 |     "\n",
  98 |     "elements = [[\"Hydrogen\", \"Helium\", \"Lithium\"], [\"Beryllium\", \"Boron\", \"Carbon\"], [\"Nitrogen\", \"Oxygen\", \"Fluorine\"]]\n",
  99 |     "\n",
 100 |     "# You can also declare whacky lists like this\n",
 101 |     "\n",
 102 |     "whacky_list = [1, 'dog', 3.14, [4, 5, 6]]"
 103 |    ]
 104 |   },
 105 |   {
 106 |    "cell_type": "markdown",
 107 |    "metadata": {},
 108 |    "source": [
 109 |     "### List Operations\n",
 110 |     "\n",
 111 |     "You can add elements to the list using the `.append` method."
 112 |    ]
 113 |   },
 114 |   {
 115 |    "cell_type": "code",
 116 |    "execution_count": null,
 117 |    "metadata": {},
 118 |    "outputs": [],
 119 |    "source": [
 120 |     "element_list = [\"Hydrogen\", \"Helium\", \"Lithium\", \"Beryllium\", \"Boron\"]\n",
 121 |     "element_list.append(\"Lead\")\n",
 122 |     "print(element_list)"
 123 |    ]
 124 |   },
 125 |   {
 126 |    "cell_type": "markdown",
 127 |    "metadata": {},
 128 |    "source": [
 129 |     "To add an element to a list at a specific index, you can use the insert method. Write code to add the missing element to the list."
 130 |    ]
 131 |   },
 132 |   {
 133 |    "cell_type": "code",
 134 |    "execution_count": null,
 135 |    "metadata": {},
 136 |    "outputs": [],
 137 |    "source": [
 138 |     "element_list = [\"Hydrogen\", \"Lithium\", \"Beryllium\", \"Boron\"]\n",
 139 |     "element_list.insert(1, 'Helium')\n",
 140 |     "print(element_list)"
 141 |    ]
 142 |   },
 143 |   {
 144 |    "cell_type": "markdown",
 145 |    "metadata": {},
 146 |    "source": [
 147 |     "To remove an element from a list, you can use the remove method. When removing an element from a list, you must specify the value of the element you want to remove. Write code to remove the first element from the list"
 148 |    ]
 149 |   },
 150 |   {
 151 |    "cell_type": "code",
 152 |    "execution_count": null,
 153 |    "metadata": {},
 154 |    "outputs": [],
 155 |    "source": [
 156 |     "element_list.remove(\"Hydrogen\")\n",
 157 |     "print(element_list)"
 158 |    ]
 159 |   },
 160 |   {
 161 |    "cell_type": "markdown",
 162 |    "metadata": {},
 163 |    "source": [
 164 |     "The 'remove' function simply deletes the element, what If you want to retrieve the element and then delete it? You can use the pop method for this purpose. The pop method takes one argument, the index of the element you want to remove. It has interesting behavior when you don't specify an index, in this case it by default removes the last element from the list. Write code to remove the last element from the list, and then remove the second element. You can access lists in the reverse direction using negative indices, where '-1' refers to the last element, '-2' refers to the second last element and so on."
 165 |    ]
 166 |   },
 167 |   {
 168 |    "cell_type": "code",
 169 |    "execution_count": null,
 170 |    "metadata": {},
 171 |    "outputs": [],
 172 |    "source": [
 173 |     "element_list = [\"Hydrogen\", \"Helium\", \"Lithium\", \"Beryllium\", \"Boron\"]\n",
 174 |     "\n",
 175 |     "a = element_list.pop(-1)\n",
 176 |     "print(a)\n",
 177 |     "\n",
 178 |     "b = element_list.pop(1)\n",
 179 |     "print(b)\n",
 180 |     "\n",
 181 |     "print(element_list)"
 182 |    ]
 183 |   },
 184 |   {
 185 |    "cell_type": "markdown",
 186 |    "metadata": {},
 187 |    "source": [
 188 |     "You can delete all the elements using the clear method"
 189 |    ]
 190 |   },
 191 |   {
 192 |    "cell_type": "code",
 193 |    "execution_count": null,
 194 |    "metadata": {},
 195 |    "outputs": [],
 196 |    "source": [
 197 |     "element_list.clear()\n",
 198 |     "print(element_list)"
 199 |    ]
 200 |   },
 201 |   {
 202 |    "cell_type": "markdown",
 203 |    "metadata": {},
 204 |    "source": [
 205 |     "Now how about if we have two lists, and we want to combine them into a single list. For this we can just add them using the '+' operator"
 206 |    ]
 207 |   },
 208 |   {
 209 |    "cell_type": "code",
 210 |    "execution_count": null,
 211 |    "metadata": {},
 212 |    "outputs": [],
 213 |    "source": [
 214 |     "element_list_1 = [\"Hydrogen\", \"Helium\", \"Lithium\", \"Beryllium\", \"Boron\"]\n",
 215 |     "element_list_2= [\"Nitrogen\", \"Oxygen\", \"Fluorine\"]\n",
 216 |     "element_list_3 = element_list_1 + element_list_2\n",
 217 |     "print(element_list_3)\n",
 218 |     "\n",
 219 |     "print(len(element_list_3))"
 220 |    ]
 221 |   },
 222 |   {
 223 |    "cell_type": "markdown",
 224 |    "metadata": {},
 225 |    "source": [
 226 |     "#### Indexing\n",
 227 |     "- Indexing in Python starts from 0.\n",
 228 |     "- Negative indexing is also possible, where -1 refers to the last element, -2 refers to the second last element, and so on.\n",
 229 |     "- Slicing can be used to access a range of elements in a sequence.\n",
 230 |     "    - The syntax for slicing is `sequence[start:stop:step]`."
 231 |    ]
 232 |   },
 233 |   {
 234 |    "cell_type": "code",
 235 |    "execution_count": null,
 236 |    "metadata": {},
 237 |    "outputs": [],
 238 |    "source": [
 239 |     "# These types are ordered and can be indexed\n",
 240 |     "print(str_var[0])\n",
 241 |     "print(list_var[3])\n",
 242 |     "print(tuple_var[-1])"
 243 |    ]
 244 |   },
 245 |   {
 246 |    "cell_type": "code",
 247 |    "execution_count": null,
 248 |    "metadata": {},
 249 |    "outputs": [],
 250 |    "source": [
 251 |     "print(str_var[0:5])"
 252 |    ]
 253 |   },
 254 |   {
 255 |    "cell_type": "markdown",
 256 |    "metadata": {},
 257 |    "source": [
 258 |     "#### Difference between `list` and `tuple`\n",
 259 |     "- `list` is mutable, i.e. the elements in a list can be changed or modified.\n",
 260 |     "- `tuple` is immutable, i.e. the elements in a tuple cannot be changed or modified."
 261 |    ]
 262 |   },
 263 |   {
 264 |    "cell_type": "code",
 265 |    "execution_count": null,
 266 |    "metadata": {},
 267 |    "outputs": [],
 268 |    "source": [
 269 |     "list_var[0] = 10\n",
 270 |     "print(list_var)  # [10, 2, 3, 4, 5]"
 271 |    ]
 272 |   },
 273 |   {
 274 |    "cell_type": "code",
 275 |    "execution_count": null,
 276 |    "metadata": {},
 277 |    "outputs": [],
 278 |    "source": [
 279 |     "tuple_var[0] = 10  # TypeError: 'tuple' object does not support item assignment"
 280 |    ]
 281 |   },
 282 |   {
 283 |    "cell_type": "markdown",
 284 |    "metadata": {},
 285 |    "source": [
 286 |     "#### String Methods\n",
 287 |     "- `str` has several built-in methods, such as `upper()`, `lower()`, `strip()`, `split()`, `join()`, `find()` etc.\n",
 288 |     "- `str` is immutable, i.e. the elements in a string cannot be changed or modified.\n",
 289 |     "- String concatenation can be done using the `+` operator.\n",
 290 |     "- String formatting can be done using f-strings."
 291 |    ]
 292 |   },
 293 |   {
 294 |    "cell_type": "code",
 295 |    "execution_count": null,
 296 |    "metadata": {},
 297 |    "outputs": [],
 298 |    "source": [
 299 |     "# built in string methods\n",
 300 |     "print(str_var.lower())\n",
 301 |     "print(str_var.upper())\n",
 302 |     "print(str_var.split(\",\"))\n",
 303 |     "print(str_var.replace(\"Hello\", \"Hi\"))\n",
 304 |     "print(str_var.find(\"Python\"))"
 305 |    ]
 306 |   },
 307 |   {
 308 |    "cell_type": "code",
 309 |    "execution_count": null,
 310 |    "metadata": {},
 311 |    "outputs": [],
 312 |    "source": [
 313 |     "# f string\n",
 314 |     "molecules = 'hydrogen oxide'\n",
 315 |     "atoms = 3\n",
 316 |     "print(f'Water is composed of mostly {molecules} and it has {atoms} atoms')"
 317 |    ]
 318 |   },
 319 |   {
 320 |    "cell_type": "markdown",
 321 |    "metadata": {},
 322 |    "source": [
 323 |     "### Set\n",
 324 |     "A set is an unordered collection of **unique** elements. It is defined by a pair of curly braces `{}`.\n",
 325 |     "- `set`: Set, e.g. {1, 2, 3}, {'a', 'b', 'c'}"
 326 |    ]
 327 |   },
 328 |   {
 329 |    "cell_type": "code",
 330 |    "execution_count": null,
 331 |    "metadata": {},
 332 |    "outputs": [],
 333 |    "source": [
 334 |     "set_var = {1, 2, 3, 4, 5}\n",
 335 |     "print(type(set_var))  # <class 'set'>"
 336 |    ]
 337 |   },
 338 |   {
 339 |    "cell_type": "code",
 340 |    "execution_count": null,
 341 |    "metadata": {},
 342 |    "outputs": [],
 343 |    "source": [
 344 |     "# type is unordered and unindexed\n",
 345 |     "print(set_var[0])  # TypeError: 'set' object is not subscriptable"
 346 |    ]
 347 |   },
 348 |   {
 349 |    "cell_type": "code",
 350 |    "execution_count": null,
 351 |    "metadata": {},
 352 |    "outputs": [],
 353 |    "source": [
 354 |     "# showcase that unique elements are stored in set\n",
 355 |     "set_var = {1, 2, 3, 4, 5, 5, 5, 5, 5}\n",
 356 |     "print(set_var)"
 357 |    ]
 358 |   },
 359 |   {
 360 |    "cell_type": "markdown",
 361 |    "metadata": {},
 362 |    "source": [
 363 |     "#### Usage of Sets\n",
 364 |     "- To eliminate duplicate elements from a list. (*See above*)\n",
 365 |     "- To perform mathematical set operations like union, intersection, difference, etc."
 366 |    ]
 367 |   },
 368 |   {
 369 |    "cell_type": "code",
 370 |    "execution_count": null,
 371 |    "metadata": {},
 372 |    "outputs": [],
 373 |    "source": [
 374 |     "# show use cases for sets\n",
 375 |     "set_var1 = {1, 2, 3, 4, 5}\n",
 376 |     "set_var2 = {4, 5, 6, 7, 8}\n",
 377 |     "\n",
 378 |     "print(set_var1.union(set_var2))  # {1, 2, 3, 4, 5, 6, 7, 8}\n",
 379 |     "print(set_var1.intersection(set_var2))  # {4, 5}\n",
 380 |     "print(set_var1.difference(set_var2))  # {1, 2, 3}\n",
 381 |     "print(set_var1.symmetric_difference(set_var2))  # {1, 2, 3, 6, 7, 8}"
 382 |    ]
 383 |   },
 384 |   {
 385 |    "cell_type": "markdown",
 386 |    "metadata": {},
 387 |    "source": [
 388 |     "### Mapping Data Types\n",
 389 |     "A dictionary is a collection which is unordered, changeable and indexed. In Python dictionaries are written with curly brackets, and they have keys and values.\n",
 390 |     "- `dict`: Dictionary, e.g. {'amino acid': ['alanine', 'valine'], 'nucleotide': ['adenine', 'thymine']}"
 391 |    ]
 392 |   },
 393 |   {
 394 |    "cell_type": "code",
 395 |    "execution_count": null,
 396 |    "metadata": {},
 397 |    "outputs": [],
 398 |    "source": [
 399 |     "dict_var = {\"halogen\": \"fluorine\", \"noble_gas\": \"helium\", \"alkali_metal\": \"lithium\"}\n",
 400 |     "print(type(dict_var))  # <class 'dict'>\n",
 401 |     "print('keys:', dict_var.keys())\n",
 402 |     "print('values:', dict_var.values())"
 403 |    ]
 404 |   },
 405 |   {
 406 |    "cell_type": "markdown",
 407 |    "metadata": {},
 408 |    "source": [
 409 |     "## Exercises"
 410 |    ]
 411 |   },
 412 |   {
 413 |    "cell_type": "markdown",
 414 |    "metadata": {},
 415 |    "source": [
 416 |     "### Exercise 1.1:\n",
 417 |     "What does this code return?\n",
 418 |     "\n",
 419 |     "```python\n",
 420 |     "my_string = \"2cfo6njs[pwi2r3adcvy\"\n",
 421 |     "my_string[0:10:2]\n",
 422 |     "```\n",
 423 |     "What could the 2 mean in that context?"
 424 |    ]
 425 |   },
 426 |   {
 427 |    "cell_type": "code",
 428 |    "execution_count": 1,
 429 |    "metadata": {},
 430 |    "outputs": [
 431 |     {
 432 |      "data": {
 433 |       "text/plain": [
 434 |        "'2f6j['"
 435 |       ]
 436 |      },
 437 |      "execution_count": 1,
 438 |      "metadata": {},
 439 |      "output_type": "execute_result"
 440 |     }
 441 |    ],
 442 |    "source": [
 443 |     "my_string = \"2cfo6njs[pwi2r3adcvy\"\n",
 444 |     "my_string[0:10:2]"
 445 |    ]
 446 |   },
 447 |   {
 448 |    "cell_type": "markdown",
 449 |    "metadata": {},
 450 |    "source": [
 451 |     "As a final excerise on slicing, you will write code for finding the middle index of a list and then use list slicing to split the list into two sublists. Put it inside the function `split_list` and test it using the `test_split` function. When dividing a list in two, think about the edge cases you must consider. Will your code work for both even and odd length's of lists? What about empty lists? Remember that lists in Python have indexes which start from `0`, so the `7th` element has index `6`. Not correctly accounting for this is an extremely common problem in programming and can be tricky to debug. \n",
 452 |     "\n",
 453 |     "First, implement the simplest case, where the list length is an even number and write code for this. Your output should be the middle index, and the two equal lenght halves of the list. Make sure to calculate the list slices using simple mathematical operations in the code"
 454 |    ]
 455 |   },
 456 |   {
 457 |    "cell_type": "code",
 458 |    "execution_count": null,
 459 |    "metadata": {},
 460 |    "outputs": [],
 461 |    "source": [
 462 |     "test_even = [\"Hydrogen\", \"Helium\", \"Lithium\", \"Beryllium\", \"Boron\", \"Carbon\", \"Nitrogen\", \"Oxygen\", \"Fluorine\", \"Sodium\"]\n",
 463 |     "test_odd = [\"Hydrogen\", \"Helium\", \"Lithium\", \"Beryllium\", \"Boron\", \"Carbon\", \"Nitrogen\", \"Oxygen\", \"Fluorine\"]\n",
 464 |     "\n",
 465 |     "\n",
 466 |     "middle_index = ...\n",
 467 |     "first_half = ...\n",
 468 |     "second_half = ...\n"
 469 |    ]
 470 |   },
 471 |   {
 472 |    "cell_type": "markdown",
 473 |    "metadata": {},
 474 |    "source": [
 475 |     "Now extend it to work with odd numbered lists. It is good practise when splitting lists into an even and odd partition, to have the longest segement be the lowest segement, this is indicated in the test case."
 476 |    ]
 477 |   },
 478 |   {
 479 |    "cell_type": "code",
 480 |    "execution_count": null,
 481 |    "metadata": {},
 482 |    "outputs": [],
 483 |    "source": [
 484 |     "test_even = [\"Hydrogen\", \"Helium\", \"Lithium\", \"Beryllium\", \"Boron\", \"Carbon\", \"Nitrogen\", \"Oxygen\", \"Fluorine\", \"Sodium\"]\n",
 485 |     "test_odd = [\"Hydrogen\", \"Helium\", \"Lithium\", \"Beryllium\", \"Boron\", \"Carbon\", \"Nitrogen\", \"Oxygen\", \"Fluorine\"]\n",
 486 |     "\n",
 487 |     "\n",
 488 |     "\n",
 489 |     "middle_index = ...\n",
 490 |     "first_half = ...\n",
 491 |     "second_half = ...\n",
 492 |     "\n"
 493 |    ]
 494 |   },
 495 |   {
 496 |    "cell_type": "markdown",
 497 |    "metadata": {},
 498 |    "source": [
 499 |     "### Exercise 1.2:\n",
 500 |     "\n",
 501 |     "How can you make this calculation work?\n",
 502 |     "\n",
 503 |     "```python\n",
 504 |     "a = 5\n",
 505 |     "b = \"6\"\n",
 506 |     "a + b\n",
 507 |     "```"
 508 |    ]
 509 |   },
 510 |   {
 511 |    "cell_type": "code",
 512 |    "execution_count": null,
 513 |    "metadata": {},
 514 |    "outputs": [],
 515 |    "source": [
 516 |     "a = 5\n",
 517 |     "b = \"6\"\n",
 518 |     "# correct here\n"
 519 |    ]
 520 |   },
 521 |   {
 522 |    "cell_type": "markdown",
 523 |    "metadata": {},
 524 |    "source": [
 525 |     "### Exercise 1.3:\n",
 526 |     "\n",
 527 |     "Now you have seen that Python has interoperability of certain variable types. \n",
 528 |     "\n",
 529 |     "Next we will look at boolean variables. These hold a single value, True or False. You can perform operations on them: 'AND', 'OR' and 'NOT'\n",
 530 |     "\n",
 531 |     "First, evaluate the expressions below by hand, then check yours answers with some python code.\n",
 532 |     "\n",
 533 |     "a = False\n",
 534 |     "b = True\n",
 535 |     "c = False\n",
 536 |     "\n",
 537 |     "1. a and b        = ...\n",
 538 |     "2. a or b         = ...\n",
 539 |     "3. not a          = ...\n",
 540 |     "4. not b and c    = ...\n",
 541 |     "5. (a and b) or c = ..."
 542 |    ]
 543 |   },
 544 |   {
 545 |    "cell_type": "code",
 546 |    "execution_count": null,
 547 |    "metadata": {},
 548 |    "outputs": [],
 549 |    "source": [
 550 |     "### in python, you can simply represent the boolean operators by their english language name\n",
 551 |     "\n",
 552 |     "### Your code here"
 553 |    ]
 554 |   },
 555 |   {
 556 |    "cell_type": "markdown",
 557 |    "metadata": {},
 558 |    "source": [
 559 |     "Now we have a concept of booleans we can think about conditional statements. These are useful if you want to be able to execute seperate branches of code, depending on your input. An 'if' statement evaluates a boolean expression and in the case of an expression 'True' allows the code to enter the execution block. Blocks are marked by indents. \n",
 560 |     "\n",
 561 |     "First, WITHOUT running the code below, determine its output by hand. It is an important skill to be able to understand what a piece of code does without running it."
 562 |    ]
 563 |   },
 564 |   {
 565 |    "cell_type": "code",
 566 |    "execution_count": null,
 567 |    "metadata": {},
 568 |    "outputs": [],
 569 |    "source": [
 570 |     "a = True\n",
 571 |     "b = False\n",
 572 |     "c = True\n",
 573 |     "\n",
 574 |     "if a:\n",
 575 |     "    if not c:\n",
 576 |     "        print('Answer 1')\n",
 577 |     "    elif c and b:\n",
 578 |     "        print('Answer 2')\n",
 579 |     "    print('Answer 3')\n",
 580 |     "else:\n",
 581 |     "    print('Answer 4')"
 582 |    ]
 583 |   },
 584 |   {
 585 |    "cell_type": "markdown",
 586 |    "metadata": {},
 587 |    "source": [
 588 |     "### Exercise 1.4\n",
 589 |     "\n",
 590 |     "Sometimes we won't have the option of using booleans in our code, for example we might want to evaluate if a String or and Integer evaluates as True or False. For this case, Python allows the evaluation of conditional statements on non-boolean inputs. Try out various combinations of the below variables with the goal of finding out what values for strings and integer data types evaluate to True or False."
 591 |    ]
 592 |   },
 593 |   {
 594 |    "cell_type": "code",
 595 |    "execution_count": null,
 596 |    "metadata": {},
 597 |    "outputs": [],
 598 |    "source": [
 599 |     "a = 'Hydrogen'\n",
 600 |     "b = 'oxygen'\n",
 601 |     "c = 1\n",
 602 |     "d = 0\n",
 603 |     "e = ''\n",
 604 |     "f = -3\n",
 605 |     "d = None\n",
 606 |     "\n",
 607 |     "### Your code here"
 608 |    ]
 609 |   },
 610 |   {
 611 |    "cell_type": "markdown",
 612 |    "metadata": {},
 613 |    "source": [
 614 |     "### Exercise 1.3:\n",
 615 |     "Print out the first letter of every word in the string.\n",
 616 |     "\n",
 617 |     "```python\n",
 618 |     "sentence = 'Sober Physicists Don’t Find Giraffes Hiding In Kitchens'\n",
 619 |     "```\n",
 620 |     "What do you observe?"
 621 |    ]
 622 |   },
 623 |   {
 624 |    "cell_type": "code",
 625 |    "execution_count": null,
 626 |    "metadata": {},
 627 |    "outputs": [],
 628 |    "source": [
 629 |     "sentence = 'Sober Physicists Don’t Find Giraffes Hiding In Kitchens'\n",
 630 |     "\n",
 631 |     "# print solution here"
 632 |    ]
 633 |   },
 634 |   {
 635 |    "cell_type": "markdown",
 636 |    "metadata": {},
 637 |    "source": [
 638 |     "### Exercise 1.4:\n",
 639 |     "\n",
 640 |     "1. Create a dictionary that represents the following table:\n",
 641 |     " \n",
 642 |     "| Base | Acid |\n",
 643 |     "|------|------|\n",
 644 |     "| 'NaOH' | 'HCl' |\n",
 645 |     "| 'KOH' | 'H2SO4' |\n",
 646 |     "| 'Ca(OH)2' | 'HNO3' |\n",
 647 |     "\n",
 648 |     "2. Add a new base to the dictionary: `NH4OH`.\n",
 649 |     "3. Print out the categories and chemicals. "
 650 |    ]
 651 |   },
 652 |   {
 653 |    "cell_type": "code",
 654 |    "execution_count": null,
 655 |    "metadata": {},
 656 |    "outputs": [],
 657 |    "source": [
 658 |     "# 1.\n",
 659 |     "\n",
 660 |     "# 2.\n",
 661 |     "\n",
 662 |     "# 3."
 663 |    ]
 664 |   },
 665 |   {
 666 |    "cell_type": "markdown",
 667 |    "metadata": {},
 668 |    "source": [
 669 |     "# 2. Control Structures - Loops\n",
 670 |     "\n",
 671 |     "Now we will have a look at control flow in code. If you have a collection of elements like a list, you might want to iterate over each element and peform an action. First, lets look at the `while` loop. This loops checks a condition, and then if the condition evaluates to `True`, executes a block of code. After the code block is executed it returns to the condition and checks it again. "
 672 |    ]
 673 |   },
 674 |   {
 675 |    "cell_type": "code",
 676 |    "execution_count": null,
 677 |    "metadata": {},
 678 |    "outputs": [],
 679 |    "source": [
 680 |     "pH = 2  # Assume we start the pH at 2 (which is acidic)\n",
 681 |     "\n",
 682 |     "while pH != 7:  # while the pH is not neutral\n",
 683 |     "    print(f\"Current pH: {pH}\")\n",
 684 |     "    if pH < 7:  # if the environment is acidic\n",
 685 |     "        print(\"Solution is too acidic. Adding a base to increase pH.\")\n",
 686 |     "        pH += 1  # add a base to increase the pH\n",
 687 |     "    elif pH > 7:  # if the environment is basic\n",
 688 |     "        print(\"Solution is too basic. Adding an acid to decrease pH.\")\n",
 689 |     "        pH -= 1  # add an acid to reduce the pH\n",
 690 |     "        \n",
 691 |     "print(\"Solution is now neutral.\")"
 692 |    ]
 693 |   },
 694 |   {
 695 |    "cell_type": "markdown",
 696 |    "metadata": {},
 697 |    "source": [
 698 |     "We can also use `while` loops to iterate over a sequence of numbers."
 699 |    ]
 700 |   },
 701 |   {
 702 |    "cell_type": "code",
 703 |    "execution_count": null,
 704 |    "metadata": {},
 705 |    "outputs": [],
 706 |    "source": [
 707 |     "counter = 0\n",
 708 |     "max_count = 9\n",
 709 |     "\n",
 710 |     "# Here is the list of the first nine chemical elements:\n",
 711 |     "elements = [\"Hydrogen\", \"Helium\", \"Lithium\", \"Beryllium\", \"Boron\", \"Carbon\", \"Nitrogen\", \"Oxygen\", \"Fluorine\"]\n",
 712 |     "\n",
 713 |     "while counter < max_count:\n",
 714 |     "    # Here we print the element at the current index\n",
 715 |     "    # Note the adjustment for 0-based indexing\n",
 716 |     "    print(f\"Element {counter + 1}: {elements[counter]}\")\n",
 717 |     "    counter += 1"
 718 |    ]
 719 |   },
 720 |   {
 721 |    "cell_type": "markdown",
 722 |    "metadata": {},
 723 |    "source": [
 724 |     "We can use two additional control flows in iterations.. `break` immediately terminates the loop iterations and `continue` skips the current iteration of the loop, but the loop continues to run.\n",
 725 |     "\n",
 726 |     "Given this information, what will be the output of the program below?"
 727 |    ]
 728 |   },
 729 |   {
 730 |    "cell_type": "code",
 731 |    "execution_count": null,
 732 |    "metadata": {},
 733 |    "outputs": [],
 734 |    "source": [
 735 |     "elements = [\"Iron\", \"Copper\", \"Zinc\", \"Gold\", \"Silver\", \"Platinum\"]\n",
 736 |     "\n",
 737 |     "for element in elements:\n",
 738 |     "    if element == \"Copper\":\n",
 739 |     "        continue\n",
 740 |     "    if element == \"Gold\":\n",
 741 |     "        break\n",
 742 |     "    print(element)"
 743 |    ]
 744 |   },
 745 |   {
 746 |    "cell_type": "markdown",
 747 |    "metadata": {},
 748 |    "source": [
 749 |     "### For Loops\n",
 750 |     "\n",
 751 |     "A for loop in Python is a way to repeat code for each item in a sequence. The basic syntax looks like this:"
 752 |    ]
 753 |   },
 754 |   {
 755 |    "cell_type": "code",
 756 |    "execution_count": null,
 757 |    "metadata": {},
 758 |    "outputs": [],
 759 |    "source": [
 760 |     "for item in iterable:\n",
 761 |     "    # do something with item"
 762 |    ]
 763 |   },
 764 |   {
 765 |    "cell_type": "markdown",
 766 |    "metadata": {},
 767 |    "source": [
 768 |     "Iterables are objects in Python that contain a sequence of elements - they can be \"iterated over\" one element at a time. Common iterables include:"
 769 |    ]
 770 |   },
 771 |   {
 772 |    "cell_type": "code",
 773 |    "execution_count": null,
 774 |    "metadata": {},
 775 |    "outputs": [],
 776 |    "source": [
 777 |     "noble_gases = [\"Hydrogen\", \"Neon\", \"Argon\"]\n",
 778 |     "for gas in noble_gases:\n",
 779 |     "    print(gas)\n",
 780 |     "\n",
 781 |     "# We can also iterate in reverse\n",
 782 |     "for gas in reversed(noble_gases):\n",
 783 |     "    print(gas)\n",
 784 |     "\n",
 785 |     "# Strings (iterate over each character)\n",
 786 |     "name = \"Lithium\"\n",
 787 |     "for letter in name:\n",
 788 |     "    print(letter)\n",
 789 |     "\n",
 790 |     "# Range (generates a sequence of numbers)\n",
 791 |     "for number in range(3):\n",
 792 |     "    print(number)  # Prints 0, 1, 2"
 793 |    ]
 794 |   },
 795 |   {
 796 |    "cell_type": "markdown",
 797 |    "metadata": {},
 798 |    "source": [
 799 |     "The beauty of for loops is their simplicity - you don't need to manage indexes or worry about when to stop. Python automatically handles iterating through all elements and stops when it reaches the end."
 800 |    ]
 801 |   },
 802 |   {
 803 |    "cell_type": "markdown",
 804 |    "metadata": {},
 805 |    "source": [
 806 |     "However, if you want to iterate over a list via its index using a for loop you can do it in one of the two the following ways."
 807 |    ]
 808 |   },
 809 |   {
 810 |    "cell_type": "code",
 811 |    "execution_count": null,
 812 |    "metadata": {},
 813 |    "outputs": [],
 814 |    "source": [
 815 |     "elements = [\"Hydrogen\", \"Helium\", \"Lithium\", \"Beryllium\", \"Boron\", \"Carbon\", \"Nitrogen\", \"Oxygen\", \"Fluorine\"]\n",
 816 |     "\n",
 817 |     "for idx in range(len(elements)):\n",
 818 |     "    print(f\"{idx}, {elements[idx]}\")\n",
 819 |     "\n",
 820 |     "for idx, element in enumerate(elements):\n",
 821 |     "    print(f\"{idx}, {element}\")"
 822 |    ]
 823 |   },
 824 |   {
 825 |    "cell_type": "markdown",
 826 |    "metadata": {},
 827 |    "source": [
 828 |     "Its important to know that modifying the 'element' that the for loop produces does not alter the original list."
 829 |    ]
 830 |   },
 831 |   {
 832 |    "cell_type": "code",
 833 |    "execution_count": null,
 834 |    "metadata": {},
 835 |    "outputs": [],
 836 |    "source": [
 837 |     "elements = [\"Hydrogen\", \"Helium\", \"Lithium\", \"Beryllium\", \"Boron\", \"Carbon\", \"Nitrogen\", \"Oxygen\", \"Fluorine\"]\n",
 838 |     "\n",
 839 |     "for idx, element in enumerate(reversed(elements)):\n",
 840 |     "    element = element.lower() + ' : ' + str(idx + 1)\n",
 841 |     "\n",
 842 |     "print(elements)"
 843 |    ]
 844 |   },
 845 |   {
 846 |    "cell_type": "markdown",
 847 |    "metadata": {},
 848 |    "source": [
 849 |     "If we wish to modify the original list, we can try the naive approach below. The code is trying to reverse the list and add atomic numbers. Before running the code, can you see what will go wrong?"
 850 |    ]
 851 |   },
 852 |   {
 853 |    "cell_type": "code",
 854 |    "execution_count": null,
 855 |    "metadata": {},
 856 |    "outputs": [],
 857 |    "source": [
 858 |     "elements = [\"Hydrogen\", \"Helium\", \"Lithium\", \"Beryllium\", \"Boron\", \"Carbon\", \"Nitrogen\", \"Oxygen\", \"Fluorine\"]\n",
 859 |     "\n",
 860 |     "for idx, element in enumerate(reversed(elements)):\n",
 861 |     "    elements[idx] = element.lower() + ' : ' + str(len(elements) - idx)"
 862 |    ]
 863 |   },
 864 |   {
 865 |    "cell_type": "markdown",
 866 |    "metadata": {},
 867 |    "source": [
 868 |     "### Exercise 2.1:\n",
 869 |     "\n",
 870 |     "Implement a method to reverse a list and the corresponding atom numbers. As a hint, consider creating a new list."
 871 |    ]
 872 |   },
 873 |   {
 874 |    "cell_type": "code",
 875 |    "execution_count": null,
 876 |    "metadata": {},
 877 |    "outputs": [],
 878 |    "source": [
 879 |     "elements = [\"Hydrogen\", \"Helium\", \"Lithium\", \"Beryllium\", \"Boron\", \"Carbon\", \"Nitrogen\", \"Oxygen\", \"Fluorine\"]\n",
 880 |     "new_elements = []\n",
 881 |     "\n",
 882 |     "... # Your code here\n",
 883 |     "\n",
 884 |     "print(elements)"
 885 |    ]
 886 |   },
 887 |   {
 888 |    "cell_type": "markdown",
 889 |    "metadata": {},
 890 |    "source": [
 891 |     "### Exercise 2.2:\n",
 892 |     "\n",
 893 |     "Can you think of a way to reverse the list *in-place*, ie without creating an entirely new list?"
 894 |    ]
 895 |   },
 896 |   {
 897 |    "cell_type": "code",
 898 |    "execution_count": null,
 899 |    "metadata": {},
 900 |    "outputs": [],
 901 |    "source": [
 902 |     "elements = [\"Hydrogen\", \"Helium\", \"Lithium\", \"Beryllium\", \"Boron\", \"Carbon\", \"Nitrogen\", \"Oxygen\", \"Fluorine\"]\n",
 903 |     "new_elements = []\n",
 904 |     "\n",
 905 |     "... # Your code here\n",
 906 |     "\n",
 907 |     "print(elements)"
 908 |    ]
 909 |   },
 910 |   {
 911 |    "cell_type": "markdown",
 912 |    "metadata": {},
 913 |    "source": [
 914 |     "### Exercise 2.3:\n",
 915 |     "\n",
 916 |     "For the following group of problems, your task is to work out what the output of the code will be without running it. Check your answer by executing the program. If you get them wrong, try to go through the code step by step and double check your assumptions about how each line of code works."
 917 |    ]
 918 |   },
 919 |   {
 920 |    "cell_type": "code",
 921 |    "execution_count": null,
 922 |    "metadata": {},
 923 |    "outputs": [],
 924 |    "source": [
 925 |     "numa = 11\n",
 926 |     "while numa > 2.5:\n",
 927 |     "    numa = numa - 1\n",
 928 |     "    print(numa)"
 929 |    ]
 930 |   },
 931 |   {
 932 |    "cell_type": "code",
 933 |    "execution_count": null,
 934 |    "metadata": {},
 935 |    "outputs": [],
 936 |    "source": [
 937 |     "numb = 2.5\n",
 938 |     "for i in range(0, 10, 2):\n",
 939 |     "    pass\n",
 940 |     "    print(i/numb)\n"
 941 |    ]
 942 |   },
 943 |   {
 944 |    "cell_type": "code",
 945 |    "execution_count": null,
 946 |    "metadata": {},
 947 |    "outputs": [],
 948 |    "source": [
 949 |     "numc = 10.2 \n",
 950 |     "while True:\n",
 951 |     "    if numc < 6.2:\n",
 952 |     "        break\n",
 953 |     "    print(numc)\n",
 954 |     "    numc -= 1"
 955 |    ]
 956 |   },
 957 |   {
 958 |    "cell_type": "code",
 959 |    "execution_count": null,
 960 |    "metadata": {},
 961 |    "outputs": [],
 962 |    "source": [
 963 |     "collected_strings = []\n",
 964 |     "\n",
 965 |     "for i in range(1, 5):\n",
 966 |     "    if i % 2 == 0:  \n",
 967 |     "        for j in range(5):\n",
 968 |     "            if j == 3:\n",
 969 |     "                break\n",
 970 |     "                collected_strings.append(str(j))\n",
 971 |     "        collected_strings.append(str('F'))\n",
 972 |     "    else:  \n",
 973 |     "        for j in range(5):\n",
 974 |     "            if j == 3:\n",
 975 |     "                continue\n",
 976 |     "            elif j == 4:\n",
 977 |     "                pass\n",
 978 |     "            collected_strings.append(str(j))\n",
 979 |     "\n",
 980 |     "for i in range(3):\n",
 981 |     "    if i == 1:\n",
 982 |     "        collected_strings.append(\"!\")\n",
 983 |     "        continue\n",
 984 |     "    collected_strings.append(\"?\")\n",
 985 |     "\n",
 986 |     "collect_str = \"\".join(collected_strings)\n",
 987 |     "print(f'Collected string is {collect_str}')"
 988 |    ]
 989 |   },
 990 |   {
 991 |    "cell_type": "markdown",
 992 |    "metadata": {},
 993 |    "source": [
 994 |     "The code provided in this question is buggy. Do not execute it. What do you think the programmer intended this code to do? Jot down a table that shows the value of the variables at each iteration. This shoudl give you a clear understand of why the code is buggy. Once you have done so, modify the code such that it is no longer buggy. Note due to the lack of comments indicating what the code is attempting to do, there are several possible answers for this."
 995 |    ]
 996 |   },
 997 |   {
 998 |    "cell_type": "code",
 999 |    "execution_count": null,
1000 |    "metadata": {},
1001 |    "outputs": [],
1002 |    "source": [
1003 |     "n = 10 \n",
1004 |     "i = 10\n",
1005 |     "while i > 0:\n",
1006 |     "    if i % 2 == 0:\n",
1007 |     "        i=i/2\n",
1008 |     "    else: \n",
1009 |     "        i=i+1"
1010 |    ]
1011 |   },
1012 |   {
1013 |    "cell_type": "markdown",
1014 |    "metadata": {},
1015 |    "source": [
1016 |     "# 3. Setting paths\n",
1017 |     "\n",
1018 |     "Setting paths when coding is important. It is a good practice to set the paths to folders/data in a way that is reproducible and especially shareable. This is important when sharing code with others, or when you are working on a project that requires data from different sources. Paths also look different on different operating systems (Windows, Mac, Linux), so it is important to set paths in a way that is compatible with all operating systems. Luckily, there are libraries like `os` and `pathlib` that can help us with that. We will look into `pathlib` in this notebook."
1019 |    ]
1020 |   },
1021 |   {
1022 |    "cell_type": "markdown",
1023 |    "metadata": {},
1024 |    "source": [
1025 |     "## Introduction to Pathlib\n",
1026 |     "An introduction to the pathlib module, which provides a way to handle filesystem paths."
1027 |    ]
1028 |   },
1029 |   {
1030 |    "cell_type": "code",
1031 |    "execution_count": null,
1032 |    "metadata": {},
1033 |    "outputs": [],
1034 |    "source": [
1035 |     "# Introduction to Pathlib\n",
1036 |     "\n",
1037 |     "# Importing the pathlib module\n",
1038 |     "from pathlib import Path\n",
1039 |     "\n",
1040 |     "# Creating a Path object\n",
1041 |     "p = Path('.')\n",
1042 |     "\n",
1043 |     "# Displaying the current directory\n",
1044 |     "print(p.resolve())\n",
1045 |     "\n",
1046 |     "# Listing all files in the current directory\n",
1047 |     "for file in p.iterdir():\n",
1048 |     "    print(file)\n",
1049 |     "\n",
1050 |     "# Creating a new directory\n",
1051 |     "new_dir = p / 'new_directory'\n",
1052 |     "new_dir.mkdir(exist_ok=True)\n",
1053 |     "\n",
1054 |     "# Checking if the new directory exists\n",
1055 |     "print(new_dir.exists())\n",
1056 |     "\n",
1057 |     "# Creating a new file in the new directory\n",
1058 |     "new_file = new_dir / 'new_file.txt'\n",
1059 |     "new_file.touch()\n",
1060 |     "\n",
1061 |     "# Checking if the new file exists\n",
1062 |     "print(new_file.exists())\n",
1063 |     "\n",
1064 |     "# Deleting the new file\n",
1065 |     "new_file.unlink()\n",
1066 |     "\n",
1067 |     "# Checking if the new file exists\n",
1068 |     "print(new_file.exists())\n",
1069 |     "\n",
1070 |     "# Deleting the new directory\n",
1071 |     "new_dir.rmdir()\n",
1072 |     "\n",
1073 |     "# Checking if the new directory exists\n",
1074 |     "print(new_dir.exists())"
1075 |    ]
1076 |   },
1077 |   {
1078 |    "cell_type": "markdown",
1079 |    "metadata": {},
1080 |    "source": [
1081 |     "## Exercises"
1082 |    ]
1083 |   },
1084 |   {
1085 |    "cell_type": "markdown",
1086 |    "metadata": {},
1087 |    "source": [
1088 |     "### Exercise 2.1:\n",
1089 |     "\n",
1090 |     "1. Create a directory called `ex_folder` in the current working directory. \n",
1091 |     "2. Check after creation if the directory exists.\n",
1092 |     "3. Create a file called `ex_file.txt` in the `ex_folder` directory."
1093 |    ]
1094 |   },
1095 |   {
1096 |    "cell_type": "code",
1097 |    "execution_count": null,
1098 |    "metadata": {},
1099 |    "outputs": [],
1100 |    "source": [
1101 |     "# 1."
1102 |    ]
1103 |   },
1104 |   {
1105 |    "cell_type": "code",
1106 |    "execution_count": null,
1107 |    "metadata": {},
1108 |    "outputs": [],
1109 |    "source": [
1110 |     "# 2."
1111 |    ]
1112 |   },
1113 |   {
1114 |    "cell_type": "code",
1115 |    "execution_count": null,
1116 |    "metadata": {},
1117 |    "outputs": [],
1118 |    "source": [
1119 |     "# 3."
1120 |    ]
1121 |   },
1122 |   {
1123 |    "cell_type": "markdown",
1124 |    "metadata": {},
1125 |    "source": [
1126 |     "### Exercise 2.2:\n",
1127 |     "\n",
1128 |     "Correct these paths so that it works on all operating systems, if possible. \n",
1129 |     "```python\n",
1130 |     "path1 = 'C:\\Path\\to\\your\\working\\dir\\ex_file.txt'\n",
1131 |     "path2 = 'Path/to/your/working/dir/ex_file.txt'\n",
1132 |     "path3 = '/Users/neeser/Documents/teaching/CH-200_PracticalProgrammingChem/practical-programming-in-chemistry-exercises/week_01/ex_folder/ex_file.txt\n",
1133 |     "```\n",
1134 |     "\n",
1135 |     "What are the issues with these paths?"
1136 |    ]
1137 |   },
1138 |   {
1139 |    "cell_type": "code",
1140 |    "execution_count": null,
1141 |    "metadata": {},
1142 |    "outputs": [],
1143 |    "source": [
1144 |     "path1 = 'C:\\Path\\to\\your\\working\\dir\\ex_file.txt'\n",
1145 |     "path2 = 'Path/to/your/working/dir/ex_file.txt'\n",
1146 |     "path3 = Path('/Users/neeser/Documents/teaching/CH-200_PracticalProgrammingChem/practical-programming-in-chemistry-exercises/week_01/ex_folder/ex_file.txt')\n",
1147 |     "# correct here\n",
1148 |     "\n",
1149 |     "print(path1.exists())\n",
1150 |     "print(path2.exists())\n",
1151 |     "print(path3.exists())"
1152 |    ]
1153 |   },
1154 |   {
1155 |    "cell_type": "markdown",
1156 |    "metadata": {},
1157 |    "source": [
1158 |     "### Exercise 2.3:\n",
1159 |     "\n",
1160 |     "Delete the `ex_folder` directory and its contents. Check if the directory exists after deletion."
1161 |    ]
1162 |   },
1163 |   {
1164 |    "cell_type": "code",
1165 |    "execution_count": null,
1166 |    "metadata": {},
1167 |    "outputs": [],
1168 |    "source": [
1169 |     "# delete the directory and its contents\n"
1170 |    ]
1171 |   },
1172 |   {
1173 |    "cell_type": "code",
1174 |    "execution_count": null,
1175 |    "metadata": {},
1176 |    "outputs": [],
1177 |    "source": [
1178 |     "# check if ex_folder exists\n"
1179 |    ]
1180 |   }
1181 |  ],
1182 |  "metadata": {
1183 |   "kernelspec": {
1184 |    "display_name": "Python 3",
1185 |    "language": "python",
1186 |    "name": "python3"
1187 |   },
1188 |   "language_info": {
1189 |    "codemirror_mode": {
1190 |     "name": "ipython",
1191 |     "version": 3
1192 |    },
1193 |    "file_extension": ".py",
1194 |    "mimetype": "text/x-python",
1195 |    "name": "python",
1196 |    "nbconvert_exporter": "python",
1197 |    "pygments_lexer": "ipython3",
1198 |    "version": "3.11.5"
1199 |   },
1200 |   "orig_nbformat": 4
1201 |  },
1202 |  "nbformat": 4,
1203 |  "nbformat_minor": 2
1204 | }
1205 | 


--------------------------------------------------------------------------------
/Lecture03/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Lecture 03: Conda and Jupyter notebooks
  3 | 
  4 | ## Conda
  5 | 
  6 | ### 1. Create a new environment
  7 | 
  8 | Having set up `conda`, we will now create a new environment for this course. We will use this environment to install the necessary packages for the exercises and the final assignment. Please make a new environment with the following properties:
  9 | 
 10 | - Name: `ppchem`
 11 | - Python version: `3.10`
 12 | - Packages:
 13 |     - `pandas`
 14 |     - `rdkit` (version `2022.09.5`)
 15 |     - `jupyter`
 16 | 
 17 | You can find all necessary information on what commands to use in the [conda documentation](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html).
 18 | 
 19 | <details>
 20 | <summary>Solution: Commands</summary>
 21 | In a terminal, type the following commands:
 22 | 
 23 | ```bash
 24 | conda create -n ppchem python=3.10
 25 | conda activate ppchem
 26 | pip install pandas  # installs numpy as dependency
 27 | pip install rdkit==2022.09.5
 28 | pip install jupyter
 29 | ```
 30 | 
 31 | </details>
 32 | 
 33 | ### 2. Export the environment to a file
 34 | 
 35 | Once you have installed all necessary packages, you should export the environment to a file. This file can be used by others to recreate the same environment on their computer. Please use the following command to export the environment to a file called `env.yml`. Save the environment file in the repository you created in the GitHub section and push it to GitHub.
 36 | 
 37 | <details>
 38 | <summary>Solution: Commands</summary>
 39 | 
 40 | Don't forget to repalce `path/to/ppchem` with your path!
 41 | 
 42 | ```bash
 43 | cd <path/to/ppchem>
 44 | git checkout main # Make sure to be on the main branch
 45 | conda activate ppchem
 46 | conda env export > env.yml
 47 | git add env.yml
 48 | git commit -m "Add environment file"
 49 | git push origin main
 50 | ```
 51 | 
 52 | </details>
 53 | 
 54 | ### Some useful commands
 55 | 
 56 | | Command | Description |
 57 | |---------|-------------|
 58 | | `conda env list` | List all environments on your computer with their respective paths.|
 59 | | `conda activate env-name` | activate an environment. |
 60 | | `conda list` | List all packages installed in the activated environment. |
 61 | | `conda env remove -n env-name` | Remove an environment from your computer. |
 62 | 
 63 | ## Jupyter notebooks in VS Code
 64 | 
 65 | A Jupyter notebook is a Python interface useful for testing and debuging small segments of code. Instead of running at at once, it enables you to set break points and run individual lines of code one at at time,
 66 | First install the Jupyter extension in VSCode
 67 | 
 68 | <img width="764" alt="Screenshot 2025-02-24 at 16 03 43" src="https://github.com/user-attachments/assets/2b111201-8a3a-4ec5-956d-a6a2cd4bd948" />
 69 | 
 70 | Open your python-course folder on VSCode and open a terminal in this directory.
 71 | 
 72 | <img width="570" alt="Screenshot 2025-02-24 at 16 56 51" src="https://github.com/user-attachments/assets/f8e33518-a9b2-4f55-ae07-ee135b16c41f" />
 73 | 
 74 | Clone the code using the following command:
 75 | ```bash
 76 | git clone https://github.com/schwallergroup/practical-programming-in-chemistry-exercises.git
 77 | ```
 78 | 
 79 | Then double click on the Lecture 3 exercise notebook to open it. Selected a kernel by clicking
 80 | Select Kernel -> Python Environments -> ppchem
 81 | ![Git Workflow](../assets/kernel_select.png)
 82 | 
 83 | ![Git Workflow](../assets/kernel_env.png)
 84 | 
 85 | ![Git Workflow](../assets/python_env.png)
 86 | Then install IPython if prompted. 
 87 | ![Git Workflow](../assets/ipython.png)
 88 | # Python Basics Tutorial Notebook
 89 | 
 90 | This notebook provides an introduction to fundamental Python programming concepts, focusing on data types, control structures, and file path handling.
 91 | Work through the exercises notebook and check your answers against the solutions notebook at the end of the class.
 92 | 
 93 | ## Contents
 94 | 
 95 | 1. **Python Data Types**
 96 |    - Numeric types (int, float, complex)
 97 |    - Sequence types (str, list, tuple)
 98 |    - Mapping types (dict)
 99 |    - Sets and Boolean types
100 |    - Comprehensive examples and exercises
101 |    - List operations and methods
102 | 
103 | 2. **Control Structures - Loops**
104 |    - While loops with practical examples
105 |    - For loops and iteration
106 |    - Loop control (break, continue)
107 |    - Index-based iteration
108 |    - List modification exercises
109 | 
110 | 3. **Path Handling**
111 |    - Introduction to pathlib
112 |    - Cross-platform path management
113 |    - Directory and file operations
114 |    - Path manipulation exercises
115 | 
116 | ## Exercises
117 | 
118 | The notebook includes practical exercises covering:
119 | - List manipulation and slicing
120 | - Boolean logic and conditional statements
121 | - Loop implementation and control flow
122 | - File system operations
123 | - Type conversion and data handling
124 | 
125 | Each section contains both guided examples and challenge exercises with real-world applications in scientific contexts.
126 | 
127 | ## Usage
128 | 
129 | Work through the notebook sequentially, as concepts build upon each other. Complete all exercises to reinforce learning. Code examples can be run independently to experiment with different scenarios.
130 | 
131 | ## Notes
132 | 
133 | - The notebook uses chemistry-related examples to demonstrate programming concepts
134 | - Pay special attention to the path handling section for cross-platform compatibility
135 | - Some exercises are designed to identify and fix common programming mistakes
136 | 


--------------------------------------------------------------------------------
/Lecture04/04_warm_up.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Recap!"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Python Data Types and Paths"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "### Recap on Data Types\n",
 22 |     "\n",
 23 |     "* Python has several built-in data types:\n",
 24 |     "* Numeric: `int`, `float`\n",
 25 |     "* Sequence: `str`, `list`, `tuple`\n",
 26 |     "* Mapping: `dict`\n",
 27 |     "* Set: `set`\n",
 28 |     "* Boolean: `bool`"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "### Numeric Data Types"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "# Integers and floats\n",
 45 |     "int_example = 3\n",
 46 |     "float_example = 3.14\n",
 47 |     "\n",
 48 |     "## Sequence Data Types\n",
 49 |     "# Strings, lists, and tuples\n",
 50 |     "str_example = \"Hello, Python!\"\n",
 51 |     "list_example = [1, 2, 3]\n",
 52 |     "tuple_example = (\"a\", \"b\", \"c\")\n",
 53 |     "\n",
 54 |     "# Indexing and slicing\n",
 55 |     "print(list_example[0])  # First element of list\n",
 56 |     "print(str_example[-1])  # Last character of string\n",
 57 |     "print(tuple_example[1:])  # Slicing tuple from second element to end\n"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "## Lists, Dictionaries, Loops, and Iterations"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "### Lists and Dictionaries"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "# Adding and accessing elements\n",
 81 |     "list_example.append(4)  # Adding an element to a list\n",
 82 |     "dict_example = {\"key1\": \"value1\", \"key2\": \"value2\"}\n",
 83 |     "print(dict_example[\"key1\"])  # Accessing a value from a dictionary\n",
 84 |     "\n",
 85 |     "## Loops and Iterations\n",
 86 |     "# For loop with a list\n",
 87 |     "for item in list_example:\n",
 88 |     "    print(item)\n"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "# While loop example\n",
 98 |     "print(\"While loop...\")\n",
 99 |     "i = 0\n",
100 |     "while i < len(list_example):\n",
101 |     "    print(list_example[i])\n",
102 |     "    i += 1\n",
103 |     "\n",
104 |     "# Or equivalently in a for loop\n",
105 |     "print(\"For loop...\")\n",
106 |     "for element in list_example:\n",
107 |     "    print(element)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": [
116 |     "# Boolean variables and conditional statements\n",
117 |     "a = True\n",
118 |     "b = False\n",
119 |     "if a and b:\n",
120 |     "    print(\"Both are true\")\n",
121 |     "elif a or b:\n",
122 |     "    print(\"At least one is true\")  # this will be printed!\n",
123 |     "else:\n",
124 |     "    print(\"Neither is true\")"
125 |    ]
126 |   }
127 |  ],
128 |  "metadata": {
129 |   "kernelspec": {
130 |    "display_name": "ppchem",
131 |    "language": "python",
132 |    "name": "python3"
133 |   },
134 |   "language_info": {
135 |    "codemirror_mode": {
136 |     "name": "ipython",
137 |     "version": 3
138 |    },
139 |    "file_extension": ".py",
140 |    "mimetype": "text/x-python",
141 |    "name": "python",
142 |    "nbconvert_exporter": "python",
143 |    "pygments_lexer": "ipython3",
144 |    "version": "3.12.0"
145 |   }
146 |  },
147 |  "nbformat": 4,
148 |  "nbformat_minor": 2
149 | }
150 | 


--------------------------------------------------------------------------------
/Lecture04/README.md:
--------------------------------------------------------------------------------
 1 | # Lecture 04: Advanced Python - file I/O, functions, error handling, and classes
 2 | 
 3 | This lecture focuses on developing some more advanced skills in Python. In this exercise, you will learn how to:
 4 | 
 5 | 1. **write and read files** using what we learn previously about paths and file objects,
 6 | 1. **use functions** as a re-usable way to perform specific tasks,
 7 | 1. perform **error handling** to build more robust code and deal with errors more descriptively,
 8 | 1. and finally **build classes** as a way to demonstrate object-oriented programming,
 9 | 
10 | and as always, with some chemical themes!
11 | 
12 | 
13 | ## Setup
14 | 
15 | There are no particular package dependencies required for this weeks exercises. However you still need to pull the repo updates from GitHub as you have done before. 
16 | 
17 | **Important**: ensure you have saved your previous weeks exercises in a folder **different from your local copy of the practical programming repository**. You only need to do this if you made changes to (for example) exercise 3 from within the repository. In this case, copy the notebook into a different folder and double check that it is backed up. Then revert the changes inside the repository, and pull this weeks exercises.
18 | 
19 | 
20 | Copy Exercise 4 to a different folder before starting to work on them, opening the jupyter notebooks directly in VSCode.
21 | 
22 | ## Warm-up
23 | 
24 | Make sure you have completed Exercise 3, and check out the [warm-up exercise](04_warm_up.ipynb) for a short recap on some python basics.
25 | 
26 | 
27 | ## Exercise
28 | 
29 | Now you're ready to give the [exercises](04_exercise.ipynb) a go!
30 | 
31 | 
32 | ## Extra reading
33 | 
34 | Being an effective computational scientist means writing code that (as a minimum) works, but very importantly is understandable and maintainable by human developers, ideally efficient, and useful. 
35 | 
36 | It is more and more common that computational science publications are published with well-documented and open-source code, both for the peer-review process and for community access post-publication. 
37 | 
38 | Even before this, following good coding practices can help you work and colllaborate more effectively. If you're interested in learning more, have a read of this paper ([arxiv.org/pdf/1210.0530](https://arxiv.org/pdf/1210.0530)) that outlines some general best practices for computational scientists.
39 | 
40 | As a bonus, you can check out the F.A.I.R. principles for data management (at [this link](https://www.go-fair.org/fair-principles/), but also explained in [this video](https://www.youtube.com/watch?v=5OeCrQE3HhE)) that serve as good guidelines when compiling data for collaborators or publications. Knowing all the fine details is not necessary at this point, but it's good to be aware that they exist and keep them in mind when working on projects.
41 | 


--------------------------------------------------------------------------------
/Lecture04/reaction_yields.txt:
--------------------------------------------------------------------------------
1 | # Scientist Name | Theoretical Yield (g) | Actual Yield (g)
2 | Philippe 5.0 2.78
3 | Rebecca 4.0 2.0
4 | Sarina 0.1 0.09
5 | Joe 1000.0 1.0
6 | Daniel 3.14 2.0


--------------------------------------------------------------------------------
/Lecture04/reaction_yields_with_errors.txt:
--------------------------------------------------------------------------------
1 | # Scientist Name | Theoretical Yield (g) | Actual Yield (g)
2 | Philippe 5.0 2.78
3 | Rebecca is myname
4 | Sarina 0.1 0.09
5 | Joe 1000.0 -1.0
6 | Daniel 3.14 3.15


--------------------------------------------------------------------------------
/Lecture05/README.md:
--------------------------------------------------------------------------------
  1 | # Week 5: Numerical operations, data handling, data visualization
  2 | 
  3 | 
  4 | # Setup
  5 | 
  6 | ## Checking our conda environment
  7 | 
  8 | Welcome to week 5! Before running the exercise, we need to do a bit of set up. This week
  9 | we'll be learning about the Python packages `numpy`, `pandas`, and `matplotlib`.
 10 | 
 11 | Think back to exercise session 3 when we set up a conda environment. You'll remember that
 12 | we installed certain packages. More specifically, in the file
 13 | [Lecture03/README.md](../Lecture03/README.md), we ran the following commands:
 14 | 
 15 | ```
 16 | conda create -n ppchem python=3.10
 17 | conda activate ppchem
 18 | pip install pandas  # installs numpy as dependency
 19 | pip install rdkit==2022.09.5
 20 | ```
 21 | 
 22 | You can see that by running these, in our environment `ppchem`, we should already have
 23 | `numpy` and `pandas` installed. 
 24 | 
 25 | Open your terminal, activate your environment with `conda activate ppchem` and run the command `conda list`. The output should look something like this (output shortened):
 26 | 
 27 | ```
 28 | # packages in environment at /opt/miniforge3/envs/ppchem:
 29 | #
 30 | # Name                    Version                   Build  Channel
 31 | anyio                     4.2.0                    pypi_0    pypi
 32 | appnope                   0.1.4                    pypi_0    pypi
 33 | ...
 34 | numpy                     1.26.4                   pypi_0    pypi
 35 | ...
 36 | pandas                    2.2.0                    pypi_0    pypi
 37 | pandocfilters             1.5.1                    pypi_0    pypi
 38 | parso                     0.8.3                    pypi_0    pypi
 39 | pexpect                   4.9.0                    pypi_0    pypi
 40 | pillow                    10.2.0                   pypi_0    pypi
 41 | pip                       24.0               pyhd8ed1ab_0    conda-forge
 42 | ...
 43 | websocket-client          1.7.0                    pypi_0    pypi
 44 | wheel                     0.42.0             pyhd8ed1ab_0    conda-forge
 45 | xz                        5.2.6                h57fd34a_0    conda-forge
 46 | ```
 47 | 
 48 | and you should be able to see `numpy` and `pandas` there. A better way to this, instead of manually searching this list would be to use the `grep` command.
 49 | 
 50 | For instance:
 51 | 
 52 | ```bash
 53 | conda list | grep numpy
 54 | ```
 55 | will output something like: `numpy    2.2.3  pypi_0    pypi`
 56 | 
 57 | while
 58 | ```bash
 59 | conda list | grep pandas
 60 | ```
 61 | will output something like: `pandas   2.2.3  pypi_0    pypi`
 62 | 
 63 | <details>
 64 | <summary>Explanation</summary>
 65 | 
 66 | * `grep` is used to search for text patterns. Here we are searching for the text pattern "numpy" or "pandas" in the output of the command `conda list`
 67 | * The pipe symbol `|` connects two commands, sending the output of the first command as input to the second command.
 68 | * When you run `conda list | grep numpy`, you're taking all your installed packages (from conda list) and filtering to only show lines containing "numpy".
 69 | * This approach is much faster than scanning through a long list manually, as it instantly shows you if and which versions of numpy or pandas are installed in your environment.
 70 | 
 71 | </details>
 72 | 
 73 | ## Installing new packages and updating the environment file
 74 | 
 75 | **Note**: this section assumes that you have pushed your exported environment file to your personal
 76 | `ppchem` repository, as in the section "Export the environment to a file" in exercise 3. If this is not the case,
 77 | make sure you have followed all of the instructions before proceeding.
 78 | 
 79 | Next, we will install a new package in our environment and re-export the environment file. We will then push the updated `env.yml` to your personal `ppchem` repository.
 80 | 
 81 | In this step you will learn how to create a development branch in your repository, and merge your changes to the main branch via a *pull request*.
 82 | 
 83 | This can be done as follows:
 84 | 
 85 | 1. In your terminal, navigate to your personal `ppchem` repository on your computer. This will be in the folder you created in a previous exercise, for example at `~/git/ppchem/`.
 86 | 1. Make sure you have activated your conda environment (which is probably also called "ppchem"!): `conda activate ppchem`
 87 | 1. Install matplotlib as follows: `pip install matplotlib`
 88 | 1. Create a new branch: `git checkout -b update-env`
 89 | 1. Export the environment file: `conda env export > env.yml`
 90 | 1. Inspect the changes to the environment file compared to the last commit. This can be done by running `git diff env.yml`. Use your arrow keys to scroll. There may be a few changes, but most importantly you should see a line like: `+ - matplotlib==3.8.3`. This tells us that, relative to the last commit, matplotlib has been installed in the environment, at version number `3.8.3`. Press `q` to quit the git diff viewer.
 91 | 1. Add the changes: `git add env.yml`
 92 | 1. Commit them with a meaningful message: `git commit -m "Updated environment to include matplotlib"`
 93 | 1. Push to your repository. As the remote doesn't yet know that we have created the branch `update-env` locally, we need to push with: `git push --set-upstream origin update-env`
 94 |    
 95 | Navigate to your repository on Github, at URL:
 96 | `https://github.com/<username>/ppchem`. You should
 97 | see a page like this:
 98 | 
 99 | ![Pull Request 1](../assets/Lecture05/1.png)
100 | 
101 | Click the branch drop down menu where it says "main" to select a branch, and select the
102 | branch "update-env":
103 | 
104 | ![Pull Request 2](../assets/Lecture05/2.png)
105 | 
106 | You should see that your branch `update-env` is 1 commit ahead of main. We want to
107 | create a pull request for this branch, so will click on the "contribute" button, and
108 | select "Open pull request":
109 | 
110 | ![Pull Request 3](../assets/Lecture05/3.png)
111 | 
112 | this will open a new page for opening a pull request:
113 | 
114 | ![Pull Request 4](../assets/Lecture05/4.png)
115 | 
116 | Make sure you add a title and a short description of your pull request - i.e the changes
117 | you have made and want to merge. Then, select "Create pull request". This will take you
118 | to the pull request page.
119 | 
120 | On this page, this is typically where code reviews will be posted. Usually, if you are
121 | contributing to an open source package, and want to merge some of your changes into the
122 | main branch of the code, someone will review your work, request changes and leave
123 | comments. This all happens on this page. 
124 | 
125 | As this is just your personal repository and the changes to the code weren't
126 | significant, for now we will not do any review and just merge into main. Select "Merge
127 | pull request":
128 | 
129 | ![Pull Request 5](../assets/Lecture05/5.png)
130 | 
131 | and "Confirm merge":
132 | 
133 | 
134 | ![Pull Request 6](../assets/Lecture05/6.png)
135 | 
136 | then your pull request is merged! You can safely delete the branch associated with the
137 | PR, as all the changes are now in main:
138 | 
139 | ![Pull Request 7](../assets/Lecture05/7.png)
140 | 
141 | The pull request is accessible in the "Pull Requests" tab of the main repository page,
142 | but will be in the 'closed' section.
143 | 
144 | Navigate back to your main repository landing page, i.e.
145 | `https://github.com/<username>/ppchem` and check that the changes are there:
146 | 
147 | ![Pull Request 8](../assets/Lecture05/8.png)
148 | 
149 | Good job! Now onto the exercises...
150 | 


--------------------------------------------------------------------------------
/Lecture05/molecular_data.txt:
--------------------------------------------------------------------------------
 1 | # Compound Name,Molecular Weight (g/mol),Boiling Point (°C)
 2 | Water,18.015,100.0
 3 | Carbon Dioxide,44.01,-78.5
 4 | Methane,16.04,-161.5
 5 | Ethanol,46.07,78.3
 6 | Oxygen,32.00,-183.0
 7 | Nitrogen,28.01,-196.0
 8 | Hydrogen Peroxide,34.01,150.2
 9 | Ammonia,17.03,-33.3
10 | Sulfuric Acid,98.09,337.0
11 | Acetic Acid,60.05,118.1
12 | Methanol,32.04,64.7
13 | Benzene,78.11,80.1
14 | Ethylene,28.05,-103.7
15 | Propane,44.10,-42.1
16 | Butane,58.12,-0.5
17 | Pentane,72.15,36.1
18 | Hexane,86.18,68.7
19 | Heptane,100.20,98.4
20 | Octane,114.22,125.7
21 | Nonane,128.25,150.8
22 | Decane,142.28,174.1
23 | Caffeine,194.19,178.0
24 | Ethane,30.07,-88.6


--------------------------------------------------------------------------------
/Lecture06/README.md:
--------------------------------------------------------------------------------
 1 | # Lecture 06: Introduction to RDKit
 2 | 
 3 | RDKit is a powerful open-source cheminformatics toolkit designed to simplify chemical data processing. Importantly, it has specialized classes to deal with molecular structures, allowing us to easily represent, modify, and visualize molecules. RDKit is a useful tool both in research and in industry (and some of your TA's proudly consider themselves RDKit hackers :sunglasses:)
 4 | 
 5 | Today we will go through some of RDKit's most useful functionalities:
 6 | 1. **Reading, writing, and visualizing molecular data**
 7 | 2. **Calculating descriptors**
 8 | 3. **Representing molecules with "fingerprints".** 
 9 | 
10 | 
11 | ## Setup
12 | 
13 | Get your exercises by running `git pull` from your `practical-programming-in-chemistry-exercises` folder. Copy `Lecture06` to your `ppchem` folder (or whatever folder you're using to complete your exercises), and complete the exercises from there. 
14 | 
15 | This week, we need one extra package: `py3Dmol`. Install it with `pip` into your `ppchem` environment. Update the `.env` file just like you did yesterday. Try to see what you remember from yesterday, but if you need help, walk through the instructions in Lecture05 with `py3Dmol` instead of `matplotlib`. 
16 | 
17 | <details>
18 | <summary>Solution: Commands</summary>
19 | 
20 | Don't forget to repalce `path/to/ppchem` with your path!
21 | 
22 | ```bash
23 | cd <path/to/ppchem>
24 | conda activate ppchem
25 | pip install py3Dmol
26 | conda env export > env.yml
27 | git add env.yml
28 | git commit -m "Add environment file"
29 | git push origin main
30 | ```
31 | 
32 | </details>
33 | 
34 | 
35 | 
36 | 
37 | ## Exercise
38 | 
39 | Now you're ready to give the [exercises](06_exercise.ipynb) a go!
40 | 


--------------------------------------------------------------------------------
/Lecture07/README.md:
--------------------------------------------------------------------------------
 1 | # Lecture 7: Advanced RDKit
 2 | 
 3 | ## Introduction
 4 | 
 5 | In this lecture, we will dive deeper into RDKit and explore some of its more advanced functionalities. We will cover:
 6 | 
 7 | 1. **Substructure matching**: how to search for specific molecular substructures.
 8 | 2. **Maximum common substructure (MCS)**: how to find the largest common substructure between two molecules.
 9 | 3. **Conformer generation**: how to generate 3D conformers of a molecule.
10 | 
11 | ## Setup
12 | 
13 | As always, we first need to update our code. This can be done by following these steps:
14 | 
15 | 1. Open VSCode and open the folder where you cloned the `practical-programming-in-chemistry-exercises` repository.
16 | 2. Go to the Source Control tab (the one that looks like a branch) and click on the three dots.
17 | 3. Click on `Pull, Push`.
18 | 4. Click on `Pull` to update your local repository.
19 | 5. Copy the exercise notebook to your folder/repository to work on it.
20 | 
21 | If you cannot pull because you have made changes, copy the changed files into your own repository, revert the changes (arrow pointing left), and then pull.
22 | 
23 | Aaaand that's it! You're ready to start with this lecture's [exercises](07_exercises.ipynb).
24 | 
25 | **Happy coding** :star_struck:
26 | 


--------------------------------------------------------------------------------
/Lecture08/Part1/08_drfp_partA.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "6ea1be65-e1e1-4259-bb87-c2d7a526341f",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# DRFP - Reaction fingerprint\n",
  9 |     "\n",
 10 |     "DRFP is a reaction fingerprint developed by Probst et al.. \n",
 11 |     "\n",
 12 |     "Here we will just perform a quick test that it is correctly installed to show you the different ways of installing it.\n",
 13 |     "\n",
 14 |     "To display the path to the activated conda environment, we can run:"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "id": "72d03e42-596e-4bf7-ad5f-ae5e53c470a9",
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import sys \n",
 25 |     "print(sys.prefix)"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "id": "bb117f3b-7ff4-495a-a9e6-9914efc54a04",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "This should end with `drfp_env`, if you followed the tutorial in the README.\n",
 34 |     "\n",
 35 |     "We can also check where we installed the `drfp` package."
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "id": "a2082807-2463-4a35-a2f1-5f6bfab16970",
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "import drfp\n",
 46 |     "print(\"DRFP installed in:\", drfp.__file__)"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "id": "a4ed33f4-6dd3-4366-a830-b93b8bd6de73",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "Those two checks were mainly to show you how the python package installation is working.\n",
 55 |     "\n",
 56 |     "Now, we need to import the reaction encoder (`DrfpEncoder`)\n",
 57 |     "\n",
 58 |     "### Testing DRFP"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "id": "1c42f92a-3450-4f0e-822e-3fefd9798ead",
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "from drfp import DrfpEncoder"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "id": "67615523-ad08-4616-9041-5d48568a1194",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "Let's define a few reaction SMILES. If you want to see what they look like, you could use [CDK depict](https://www.simolecule.com/cdkdepict/depict.html)."
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "id": "5514ae5b-70ea-4493-8343-462d6e29bdc4",
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "rxn_smiles = [\n",
 87 |     "    \"CO.O[C@@H]1CCNC1.[C-]#[N+]CC(=O)OC>>[C-]#[N+]CC(=O)N1CC[C@@H](O)C1\",\n",
 88 |     "    \"CCOC(=O)C(CC)c1cccnc1.Cl.O>>CCC(C(=O)O)c1cccnc1\",\n",
 89 |     "]"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "id": "dea655f5-8ec0-4681-bca8-4f4e26466a98",
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "fps = DrfpEncoder.encode(rxn_smiles)\n",
100 |     "fps"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "id": "c4bb0788-da88-494c-a464-555d64556938",
106 |    "metadata": {},
107 |    "source": [
108 |     "Now, go back to the README, and complete the installation from source in another environment."
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "id": "85750285-550f-467a-a174-8cc776c7bcac",
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": []
118 |   }
119 |  ],
120 |  "metadata": {
121 |   "kernelspec": {
122 |    "display_name": "drfp_pypi",
123 |    "language": "python",
124 |    "name": "python3"
125 |   },
126 |   "language_info": {
127 |    "codemirror_mode": {
128 |     "name": "ipython",
129 |     "version": 3
130 |    },
131 |    "file_extension": ".py",
132 |    "mimetype": "text/x-python",
133 |    "name": "python",
134 |    "nbconvert_exporter": "python",
135 |    "pygments_lexer": "ipython3",
136 |    "version": "3.10.16"
137 |   }
138 |  },
139 |  "nbformat": 4,
140 |  "nbformat_minor": 5
141 | }
142 | 


--------------------------------------------------------------------------------
/Lecture08/Part1/08_drfp_partB.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "6ea1be65-e1e1-4259-bb87-c2d7a526341f",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# DRFP - Reaction fingerprint\n",
  9 |     "\n",
 10 |     "DRFP is a reaction fingerprint developed by Probst et al.. \n",
 11 |     "\n",
 12 |     "Here we will just perform a quick test that it is correctly installed to show you the different ways of installing it.\n",
 13 |     "\n",
 14 |     "To display the path to the activated conda environment, we can run:"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "id": "72d03e42-596e-4bf7-ad5f-ae5e53c470a9",
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import sys \n",
 25 |     "print(sys.prefix)"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "id": "bb117f3b-7ff4-495a-a9e6-9914efc54a04",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "This should end with `drfp_source`, if you followed the tutorial in the README\n",
 34 |     "\n",
 35 |     "We can also check where we installed the `drfp` package."
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "id": "a2082807-2463-4a35-a2f1-5f6bfab16970",
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "import drfp\n",
 46 |     "print(\"DRFP installed in:\", drfp.__file__)"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "id": "a4ed33f4-6dd3-4366-a830-b93b8bd6de73",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "Those two checks were mainly to show you how the python package installation is working.\n",
 55 |     "\n",
 56 |     "Compare the `path` with what you had in the output in `partA`. As you installed it with `pip install -e .`, the \"editable\" flag (`-e`), the `__file__` path should point to a subfolder of where you downloaded `drfp`. \n",
 57 |     "\n",
 58 |     "Hence, all the changes you make to the code in that folder, will be immediately reflected in your installation. \n",
 59 |     "\n",
 60 |     "Now, we need to import the reaction encoder (`DrfpEncoder`)\n",
 61 |     "\n",
 62 |     "### Testing DRFP"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "id": "1c42f92a-3450-4f0e-822e-3fefd9798ead",
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "from drfp import DrfpEncoder"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "id": "67615523-ad08-4616-9041-5d48568a1194",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "Let's define a few reaction SMILES. If you want to see what they look like, you could use [CDK depict](https://www.simolecule.com/cdkdepict/depict.html)."
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "id": "5514ae5b-70ea-4493-8343-462d6e29bdc4",
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "rxn_smiles = [\n",
 91 |     "    \"CO.O[C@@H]1CCNC1.[C-]#[N+]CC(=O)OC>>[C-]#[N+]CC(=O)N1CC[C@@H](O)C1\",\n",
 92 |     "    \"CCOC(=O)C(CC)c1cccnc1.Cl.O>>CCC(C(=O)O)c1cccnc1\",\n",
 93 |     "]"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "id": "dea655f5-8ec0-4681-bca8-4f4e26466a98",
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "fps = DrfpEncoder.encode(rxn_smiles)\n",
104 |     "fps"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "id": "ab12b471-3d69-4402-8bdc-80adb43a28d3",
110 |    "metadata": {},
111 |    "source": [
112 |     "### Visualize the bits\n",
113 |     "\n",
114 |     "We could visualize the bits with `matplotlib`, so let's try to do that."
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "id": "89282acf-3b8f-4b51-9116-816fcb65c03d",
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "import matplotlib.pyplot as plt\n",
125 |     "import numpy as np"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "id": "add89cd9-6b1f-45b4-9916-f7e299d4ffe4",
131 |    "metadata": {},
132 |    "source": [
133 |     "As you have created a new conda environment and not yet installed `matplotlib` this should results in a:\n",
134 |     "\n",
135 |     "```\n",
136 |     "---------------------------------------------------------------------------\n",
137 |     "ModuleNotFoundError                       Traceback (most recent call last)\n",
138 |     "Cell In[6], line 1\n",
139 |     "----> 1 import matplotlib.pyplot as plt\n",
140 |     "      2 import numpy as np\n",
141 |     "      4 # Sample arrays, replace these with your actual data\n",
142 |     "\n",
143 |     "ModuleNotFoundError: No module named 'matplotlib'\n",
144 |     "```\n",
145 |     "\n",
146 |     "Conveniently, we can access the command line through a Jupyter notebook with `!`, and install `matplotlib` in the activate conda environment. "
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": null,
152 |    "id": "7174516b-d36d-4a7d-be49-2e5841047934",
153 |    "metadata": {},
154 |    "outputs": [],
155 |    "source": [
156 |     "!pip install matplotlib"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "id": "c0a39fe0-2d8a-4868-a279-ffc5f1ebaacc",
162 |    "metadata": {},
163 |    "source": [
164 |     "Now, we are able to import `matplotlib`. The reason we do not have to do the same with `numpy` is that it is a dependency of `drfp`, and therefore, was automatically installed together with `drfp`."
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": null,
170 |    "id": "4b1e9f9d-3d0a-44df-9fd3-ad42b3c86d62",
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "import matplotlib.pyplot as plt\n",
175 |     "import numpy as np"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "id": "11c21da2-9f83-47c2-bdbe-8c385473e26e",
182 |    "metadata": {},
183 |    "outputs": [],
184 |    "source": [
185 |     "# To make the visualisation a bit more appealing, we will not use the 2048 dimensions but only 128.\n",
186 |     "fps_128 = DrfpEncoder.encode(rxn_smiles, n_folded_length=128)\n",
187 |     "fps_128"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "id": "d1865aad-cdb5-4f0a-9909-721ea9349b3d",
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": [
197 |     "# Convert list of fingerprints into a 2D NumPy array\n",
198 |     "arrays_2d = np.array(fps_128)\n",
199 |     "\n",
200 |     "# Visualize the arrays\n",
201 |     "plt.figure(figsize=(20, 2))  # Adjust the figsize as necessary\n",
202 |     "plt.imshow(arrays_2d, aspect='auto', cmap='viridis')\n",
203 |     "plt.colorbar(label='Value')\n",
204 |     "plt.yticks(ticks=np.arange(len(fps_128)), labels=[f'RXN {i+1}' for i in range(len(fps_128))])\n",
205 |     "plt.xlabel('Index')\n",
206 |     "plt.show()"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": null,
212 |    "id": "6a2c0c7a",
213 |    "metadata": {},
214 |    "outputs": [],
215 |    "source": []
216 |   }
217 |  ],
218 |  "metadata": {
219 |   "kernelspec": {
220 |    "display_name": "drfp_source",
221 |    "language": "python",
222 |    "name": "python3"
223 |   },
224 |   "language_info": {
225 |    "codemirror_mode": {
226 |     "name": "ipython",
227 |     "version": 3
228 |    },
229 |    "file_extension": ".py",
230 |    "mimetype": "text/x-python",
231 |    "name": "python",
232 |    "nbconvert_exporter": "python",
233 |    "pygments_lexer": "ipython3",
234 |    "version": "3.10.16"
235 |   }
236 |  },
237 |  "nbformat": 4,
238 |  "nbformat_minor": 5
239 | }
240 | 


--------------------------------------------------------------------------------
/Lecture08/Part1/README.md:
--------------------------------------------------------------------------------
1 | ## Exercises
2 | 
3 | Please refer to the main [README](../README.md) for instructions!


--------------------------------------------------------------------------------
/Lecture08/Part2/08_from_script_to_package.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Lecture 8: turning a Python script into a Python package"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Introduction\n",
 15 |     "\n",
 16 |     "In this exercise, you'll learn how to convert a Python script into a Python package. Packaging your code is an essential skill in software development as it promotes code reusability, maintainability, and distribution.\n",
 17 |     "\n",
 18 |     "The goal of the exercise is to take the existing (and very simple) `amremover_script.py` file (aka a Python *module*) and restructure it into a Python package called `amremover`.\n",
 19 |     "\n",
 20 |     "Within the context of digital chemistry, and more specifically computational reaction prediction or database building, there are situations where you would like to have your SMILES without atom-mapping, which may be included by default via (for example) RDKit. Although you could remove them by hand for a few reactions, it is convenient to automate atom-mapping removal with a Python tool. \n",
 21 |     "\n",
 22 |     "The `amremover_script.py` module contains python functions for removing atom mapping numbers from SMILES strings and canonicalizing them. The functionality is basic, but useful, and is a good playground for illustrating the point of this week's exercise."
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "The file contains the following code structure:\n",
 30 |     "\n",
 31 |     "```python\n",
 32 |     "import re\n",
 33 |     "from rdkit import Chem\n",
 34 |     "\n",
 35 |     "def remove_atom_mapping(smiles: str) -> str:\n",
 36 |     "    ...\n",
 37 |     "\n",
 38 |     "def canonicalize_smiles(smiles: str) -> str:\n",
 39 |     "    ...\n",
 40 |     "\n",
 41 |     "def remove_atom_mapping_and_canonicalize_rxn_smiles(smiles: str) -> str:\n",
 42 |     "    ...\n",
 43 |     "\n",
 44 |     "rxn_smiles_with_atom_mapping = '[CH3:17][S:14](=[O:15])(=[O:16])[N:11]1[CH2:10][CH2:9][N:8](Cc2ccccc2)[CH2:13][CH2:12]1.C1CCCCC1>[OH-].[OH-].[Pd+2].CCO>[CH3:17][S:14](=[O:15])(=[O:16])[N:11]1[CH2:10][CH2:9][NH:8][CH2:13][CH2:12]1'\n",
 45 |     "\n",
 46 |     "print(f\"RXN SMILES with atom mapping: {rxn_smiles_with_atom_mapping}\")\n",
 47 |     "print(\"*** Remove atom mapping ***\")\n",
 48 |     "rxn_smiles_without_atom_mapping = remove_atom_mapping_and_canonicalize_rxn_smiles(rxn_smiles_with_atom_mapping)\n",
 49 |     "print(f\"RXN SMILES without atom mapping: {rxn_smiles_without_atom_mapping}\")\n",
 50 |     "```\n",
 51 |     "\n",
 52 |     "Though you are encouraged to open the file and read the code to understand it better.\n",
 53 |     "\n",
 54 |     "It is important to notice that the module, as currently written, contains both the useful functions **and** and example usage! This is not what we'd expect (or want) from a re-usable python module that exists in a package. Part of today's exercise will be improving this!"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "We can run this script even from a Jupyter notebook, using the `!` character to execute code in the shell. "
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "!python amremover_script.py"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "Or import the function and execute it here:"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "from amremover_script import remove_atom_mapping_and_canonicalize_rxn_smiles\n",
 87 |     "\n",
 88 |     "rxn_smiles_with_atom_mapping = '[CH3:17][S:14](=[O:15])(=[O:16])[N:11]1[CH2:10][CH2:9][N:8](Cc2ccccc2)[CH2:13][CH2:12]1.C1CCCCC1>[OH-].[OH-].[Pd+2].CCO>[CH3:17][S:14](=[O:15])(=[O:16])[N:11]1[CH2:10][CH2:9][NH:8][CH2:13][CH2:12]1'\n",
 89 |     "\n",
 90 |     "print(f\"RXN SMILES with atom mapping: {rxn_smiles_with_atom_mapping}\")\n",
 91 |     "print(\"*** Remove atom mapping ***\")\n",
 92 |     "rxn_smiles_without_atom_mapping = remove_atom_mapping_and_canonicalize_rxn_smiles(rxn_smiles_with_atom_mapping)\n",
 93 |     "print(f\"RXN SMILES without atom mapping: {rxn_smiles_without_atom_mapping}\")"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "#### To discuss\n",
101 |     "\n",
102 |     "With the structure of the module in mind, think back to why there is a double printing of outputs here when the module is loaded. \n",
103 |     "\n",
104 |     "Hint: what happens when you import a python module?\n",
105 |     "\n",
106 |     "See what happens when you run the above cell a second time. Discuss with the person next to you to see if you can figure out why!"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "## Your task\n",
114 |     "\n",
115 |     "While this script works as intended, it lacks the structure and organization that a Python package provides. By converting it into a package, you'll learn how to organize your code into modules, set up package metadata, and create a distributable version of your software.\n",
116 |     "\n",
117 |     "To get started, you'll be provided with a minimal template for the `amremover` folder. Locate this in the current directory, and inspect the contents.\n",
118 |     "\n",
119 |     "**Your task is to**:\n",
120 |     "\n",
121 |     "1. Inspect the existing code in `amremover_script.py`.\n",
122 |     "2. Determine the appropriate file structure and module organization for the package.\n",
123 |     "3. Copy the **relevant** code from `amremover_script.py` into the appropriate file within the `amremover` package.\n",
124 |     "4. Make sure to specify `rdkit` and `numpy` (with the correct version!) as a dependency. Refer back to the README to remind yourself what you did to install these packages in your environment.\n",
125 |     "5. Ensure that the package is properly configured and can be installed using `pip install -e .`. This involves modify the `pyproject.toml` file, with help by referring to https://packaging.python.org/en/latest/guides/writing-pyproject-toml/.\n",
126 |     "\n",
127 |     "Throughout the exercise, you'll learn about essential package components like `pyproject.toml`, `__init__.py`, and module organization. By the end, you'll have a better understanding of how to structure and distribute your Python code as a reusable package.\n",
128 |     "\n",
129 |     "While you develop your package, regularly come back here and check whether you have managed to create the package successfully. The following cells should run without error, and print the desired output. This cell will also give you some hints as to how importing the module should work:"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "!pip install ./amremover"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "from amremover.utils import remove_atom_mapping_and_canonicalize_rxn_smiles\n",
148 |     "\n",
149 |     "rxn_smiles_with_atom_mapping = 'CCN(C(C)C)C(C)C.[O:16]=[C:15]([O:17][CH2:18][c:19]1[cH:20][cH:21][cH:22][cH:23][cH:24]1)[N:9]1[CH2:10][CH2:11][NH:12][CH2:13][CH2:14]1.[O:8]=[c:4]1[cH:3][c:2](Cl)[n:7][cH:6][nH:5]1>CCC(C)O>[O:16]=[C:15]([O:17][CH2:18][c:19]1[cH:20][cH:21][cH:22][cH:23][cH:24]1)[N:9]1[CH2:10][CH2:11][N:12]([c:2]2[cH:3][c:4](=[O:8])[nH:5][cH:6][n:7]2)[CH2:13][CH2:14]1'\n",
150 |     "\n",
151 |     "rxn_smiles_without_atom_mapping = remove_atom_mapping_and_canonicalize_rxn_smiles(rxn_smiles_with_atom_mapping)\n",
152 |     "\n",
153 |     "print(rxn_smiles_without_atom_mapping)\n",
154 |     "\n",
155 |     "assert rxn_smiles_without_atom_mapping == 'CCN(C(C)C)C(C)C.O=C(OCc1ccccc1)N1CCNCC1.O=c1cc(Cl)nc[nH]1>CCC(C)O>O=C(OCc1ccccc1)N1CCN(c2cc(=O)[nH]cn2)CC1'"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "metadata": {},
161 |    "source": [
162 |     "# Solutions\n",
163 |     "\n",
164 |     "There are two packages present that offer solutions to the main and advanced (below) exercises. Try as best you can to build your package without looking at the solutions, discussing with your peers and the TAs as you go. The solutions are there as a last resort!"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "markdown",
169 |    "metadata": {},
170 |    "source": [
171 |     "## Advanced exercise: creating a command-line interface\n",
172 |     "\n",
173 |     "Explore [Typer](https://typer.tiangolo.com) to add a command line interface to the `amremover_package`. \n",
174 |     "\n",
175 |     "The goal of this part would be to be able to run:"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "!amremover \"CCN(C(C)C)C(C)C.[O:16]=[C:15]([O:17][CH2:18][c:19]1[cH:20][cH:21][cH:22][cH:23][cH:24]1)[N:9]1[CH2:10][CH2:11][NH:12][CH2:13][CH2:14]1.[O:8]=[c:4]1[cH:3][c:2](Cl)[n:7][cH:6][nH:5]1>CCC(C)O>[O:16]=[C:15]([O:17][CH2:18][c:19]1[cH:20][cH:21][cH:22][cH:23][cH:24]1)[N:9]1[CH2:10][CH2:11][N:12]([c:2]2[cH:3][c:4](=[O:8])[nH:5][cH:6][n:7]2)[CH2:13][CH2:14]1\" "
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {},
190 |    "source": [
191 |     "and it should print the canonicalized reaction SMILES without atom mapping: \n",
192 |     "```\n",
193 |     "CCN(C(C)C)C(C)C.O=C(OCc1ccccc1)N1CCNCC1.O=c1cc(Cl)nc[nH]1>CCC(C)O>O=C(OCc1ccccc1)N1CCN(c2cc(=O)[nH]cn2)CC1\n",
194 |     "```"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "markdown",
199 |    "metadata": {},
200 |    "source": []
201 |   }
202 |  ],
203 |  "metadata": {
204 |   "kernelspec": {
205 |    "display_name": "reactions",
206 |    "language": "python",
207 |    "name": "python3"
208 |   },
209 |   "language_info": {
210 |    "codemirror_mode": {
211 |     "name": "ipython",
212 |     "version": 3
213 |    },
214 |    "file_extension": ".py",
215 |    "mimetype": "text/x-python",
216 |    "name": "python",
217 |    "nbconvert_exporter": "python",
218 |    "pygments_lexer": "ipython3",
219 |    "version": "3.10.16"
220 |   }
221 |  },
222 |  "nbformat": 4,
223 |  "nbformat_minor": 4
224 | }
225 | 


--------------------------------------------------------------------------------
/Lecture08/Part2/README.md:
--------------------------------------------------------------------------------
1 | ## Exercises
2 | 
3 | Please refer to the main [README](../README.md) for preliminary instructions. Then, the exercise for Part 2 can be followed in the main notebook [08_from_script_to_package.ipynb](08_from_script_to_package.ipynb). You will be creating your package in the [amremover](amremover) directory. 
4 | 
5 | 
6 | Solutions to the standard and advanced exercises are given in [solution_standard](solution_standard) and [solution_advanced](solution_advanced), respectively. These both contain completed `amremover` packages, which can be installed from source using `pip install -e .` (provided you are in the desired `.../amremover/` directory) and used.
7 | 
8 | Happy coding!


--------------------------------------------------------------------------------
/Lecture08/Part2/amremover/pyproject.toml:
--------------------------------------------------------------------------------
 1 | # https://packaging.python.org/en/latest/guides/writing-pyproject-toml/
 2 | # Here you find a lot more information of what metadata you could
 3 | # specify in the pyproject.toml
 4 | 
 5 | [build-system] # let's go with the default build system (instead of setuptools)
 6 | requires = ["hatchling"]
 7 | build-backend = "hatchling.build"
 8 | 
 9 | # TODO: fill in the project specification below.
10 | [project]
11 | # define name = ..., 
12 | # version = 0.0.1, 
13 | # dependencies = [...]
14 | 
15 | 
16 | # We need to define where the main package is. You can leave this bit as is.
17 | [tool.hatch.build.targets.wheel]
18 | packages = ["src/amremover"]
19 | 


--------------------------------------------------------------------------------
/Lecture08/Part2/amremover/src/amremover/utils.py:
--------------------------------------------------------------------------------
1 | # TODO: copy in the relevant functions


--------------------------------------------------------------------------------
/Lecture08/Part2/amremover_script.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from rdkit import Chem
 3 | 
 4 | def remove_atom_mapping(smiles: str) -> str:
 5 |     # from: https://github.com/rxn4chemistry/rxn-chemutils/blob/main/src/rxn/chemutils/utils.py, MIT
 6 |     """
 7 |     Remove the atom mapping of a reaction SMILES.
 8 | 
 9 |     The resulting SMILES strings will still contain brackets and it may be
10 |     advisable to canonicalize them or clean them up as a postprocessing step.
11 | 
12 |     Args:
13 |         smiles: SMILES string potentially containing mapping information.
14 | 
15 |     Returns:
16 |         A SMILES string without atom mapping information.
17 |     """
18 | 
19 |     # We look for ":" followed by digits before a "]" not coming after an "*"
20 |     return re.sub(r"(?<=[^\*])(:\d+)]", "]", smiles)
21 | 
22 | def canonicalize_smiles(smiles: str) -> str:
23 |     """
24 |     Canonicalize SMILES.
25 | 
26 |     Args:
27 |         smiles: SMILES string, potentially not canonical 
28 | 
29 |     Returns:
30 |         A canonical SMILES string.
31 |     """
32 |     mol = Chem.MolFromSmiles(smiles)
33 | 
34 |     if mol is not None:
35 |         return Chem.MolToSmiles(mol)
36 | 
37 |     return ''
38 | 
39 | 
40 | def remove_atom_mapping_and_canonicalize_rxn_smiles(smiles: str) -> str:
41 |     """
42 |     Remove atom mapping and canonicalize reaction SMILES.
43 | 
44 |     Args:
45 |         smiles: reaction SMILES string, potentially not canonical with atom mapping. 
46 | 
47 |     Returns:
48 |         A canonical reaction SMILES string without atom mapping.
49 |     """
50 | 
51 |     smiles_without_atom_mapping = remove_atom_mapping(smiles)
52 |     
53 |     rxn_parts = smiles_without_atom_mapping.split('>')
54 | 
55 |     can_rxn_parts = [canonicalize_smiles(smiles) for smiles in rxn_parts]
56 | 
57 |     return '>'.join(can_rxn_parts)
58 | 
59 | rxn_smiles_with_atom_mapping = '[CH3:17][S:14](=[O:15])(=[O:16])[N:11]1[CH2:10][CH2:9][N:8](Cc2ccccc2)[CH2:13][CH2:12]1.C1CCCCC1>[OH-].[OH-].[Pd+2].CCO>[CH3:17][S:14](=[O:15])(=[O:16])[N:11]1[CH2:10][CH2:9][NH:8][CH2:13][CH2:12]1'
60 | 
61 | print(f"RXN SMILES with atom mapping: {rxn_smiles_with_atom_mapping}")
62 | print("*** Remove atom mapping ***")
63 | rxn_smiles_without_atom_mapping = remove_atom_mapping_and_canonicalize_rxn_smiles(rxn_smiles_with_atom_mapping)
64 | print(f"RXN SMILES without atom mapping: {rxn_smiles_without_atom_mapping}")
65 | 


--------------------------------------------------------------------------------
/Lecture08/Part2/solution_advanced/amremover/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system] 
 2 | requires = ["hatchling"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "amremover" # name of the installed package
 7 | version = "0.0.1" # version is required
 8 | dependencies = [ # specify the required package dependencies
 9 |     "rdkit",
10 |     "numpy == 1.25",
11 |     "typer",
12 | ]
13 | 
14 | [tool.hatch.build.targets.wheel]
15 | packages = ["src/amremover"]
16 | 
17 | # https://hatch.pypa.io/1.9/config/metadata/#cli
18 | [project.scripts]
19 | amremover = "amremover.cli:run"


--------------------------------------------------------------------------------
/Lecture08/Part2/solution_advanced/amremover/src/amremover/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/Lecture08/Part2/solution_advanced/amremover/src/amremover/__init__.py


--------------------------------------------------------------------------------
/Lecture08/Part2/solution_advanced/amremover/src/amremover/cli.py:
--------------------------------------------------------------------------------
 1 | # sample usage: `amremover '[CH3:17][S:14](=[O:15])(=[O:16])[N:11]1[CH2:10][CH2:9][N:8](Cc2ccccc2)[CH2:13][CH2:12]1.C1CCCCC1>[OH-].[OH-].[Pd+2].CCO>[CH3:17][S:14](=[O:15])(=[O:16])[N:11]1[CH2:10][CH2:9][NH:8][CH2:13][CH2:12]1'`
 2 | 
 3 | import typer
 4 | 
 5 | from .utils import remove_atom_mapping_and_canonicalize_rxn_smiles
 6 | 
 7 | def run():
 8 |     typer.run(main)
 9 | 
10 | 
11 | def main(
12 |     rxn_smiles: str = typer.Argument(..., help="Reaction SMILES with atom-mapping"),
13 |     verbose: bool = typer.Option(False, "--verbose", help="Print additional verbose output.."),
14 |     ):
15 |     """Removes atom-mapping from reaction SMILES."""
16 |     if verbose:
17 |         print(f"RXN SMILES with atom mapping: {rxn_smiles}")
18 |         print("*** Remove atom mapping ***")
19 | 
20 |     rxn_smiles_without_atom_mapping = remove_atom_mapping_and_canonicalize_rxn_smiles(rxn_smiles)
21 | 
22 |     print(rxn_smiles_without_atom_mapping)
23 | 
24 | if __name__ == "__main__":
25 |     run()


--------------------------------------------------------------------------------
/Lecture08/Part2/solution_advanced/amremover/src/amremover/utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from rdkit import Chem
 3 | 
 4 | def remove_atom_mapping(smiles: str) -> str:
 5 |     # from: https://github.com/rxn4chemistry/rxn-chemutils/blob/main/src/rxn/chemutils/utils.py, MIT
 6 |     """
 7 |     Remove the atom mapping of a reaction SMILES.
 8 | 
 9 |     The resulting SMILES strings will still contain brackets and it may be
10 |     advisable to canonicalize them or clean them up as a postprocessing step.
11 | 
12 |     Args:
13 |         smiles: SMILES string potentially containing mapping information.
14 | 
15 |     Returns:
16 |         A SMILES string without atom mapping information.
17 |     """
18 | 
19 |     # We look for ":" followed by digits before a "]" not coming after an "*"
20 |     return re.sub(r"(?<=[^\*])(:\d+)]", "]", smiles)
21 | 
22 | def canonicalize_smiles(smiles: str) -> str:
23 |     """
24 |     Canonicalize SMILES.
25 | 
26 |     Args:
27 |         smiles: SMILES string, potentially not canonical 
28 | 
29 |     Returns:
30 |         A canonical SMILES string.
31 |     """
32 |     mol = Chem.MolFromSmiles(smiles)
33 | 
34 |     if mol is not None:
35 |         return Chem.MolToSmiles(mol)
36 | 
37 |     return ''
38 | 
39 | 
40 | def remove_atom_mapping_and_canonicalize_rxn_smiles(smiles: str) -> str:
41 |     """
42 |     Remove atom mapping and canonicalize reaction SMILES.
43 | 
44 |     Args:
45 |         smiles: reaction SMILES string, potentially not canonical with atom mapping. 
46 | 
47 |     Returns:
48 |         A canonical reaction SMILES string without atom mapping.
49 |     """
50 | 
51 |     smiles_without_atom_mapping = remove_atom_mapping(smiles)
52 |     
53 |     rxn_parts = smiles_without_atom_mapping.split('>')
54 | 
55 |     can_rxn_parts = [canonicalize_smiles(smiles) for smiles in rxn_parts]
56 | 
57 |     return '>'.join(can_rxn_parts)
58 | 


--------------------------------------------------------------------------------
/Lecture08/Part2/solution_standard/amremover/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system] 
 2 | requires = ["hatchling"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "amremover" # name of the installed package
 7 | version = "0.0.1" # version is required
 8 | dependencies = [ # specify the required package dependencies
 9 |     "rdkit",
10 |     "numpy == 1.25",
11 | ]
12 | 
13 | [tool.hatch.build.targets.wheel]
14 | packages = ["src/amremover"]
15 | 


--------------------------------------------------------------------------------
/Lecture08/Part2/solution_standard/amremover/src/amremover/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/Lecture08/Part2/solution_standard/amremover/src/amremover/__init__.py


--------------------------------------------------------------------------------
/Lecture08/Part2/solution_standard/amremover/src/amremover/utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from rdkit import Chem
 3 | 
 4 | def remove_atom_mapping(smiles: str) -> str:
 5 |     # from: https://github.com/rxn4chemistry/rxn-chemutils/blob/main/src/rxn/chemutils/utils.py, MIT
 6 |     """
 7 |     Remove the atom mapping of a reaction SMILES.
 8 | 
 9 |     The resulting SMILES strings will still contain brackets and it may be
10 |     advisable to canonicalize them or clean them up as a postprocessing step.
11 | 
12 |     Args:
13 |         smiles: SMILES string potentially containing mapping information.
14 | 
15 |     Returns:
16 |         A SMILES string without atom mapping information.
17 |     """
18 | 
19 |     # We look for ":" followed by digits before a "]" not coming after an "*"
20 |     return re.sub(r"(?<=[^\*])(:\d+)]", "]", smiles)
21 | 
22 | def canonicalize_smiles(smiles: str) -> str:
23 |     """
24 |     Canonicalize SMILES.
25 | 
26 |     Args:
27 |         smiles: SMILES string, potentially not canonical 
28 | 
29 |     Returns:
30 |         A canonical SMILES string.
31 |     """
32 |     mol = Chem.MolFromSmiles(smiles)
33 | 
34 |     if mol is not None:
35 |         return Chem.MolToSmiles(mol)
36 | 
37 |     return ''
38 | 
39 | 
40 | def remove_atom_mapping_and_canonicalize_rxn_smiles(smiles: str) -> str:
41 |     """
42 |     Remove atom mapping and canonicalize reaction SMILES.
43 | 
44 |     Args:
45 |         smiles: reaction SMILES string, potentially not canonical with atom mapping. 
46 | 
47 |     Returns:
48 |         A canonical reaction SMILES string without atom mapping.
49 |     """
50 | 
51 |     smiles_without_atom_mapping = remove_atom_mapping(smiles)
52 |     
53 |     rxn_parts = smiles_without_atom_mapping.split('>')
54 | 
55 |     can_rxn_parts = [canonicalize_smiles(smiles) for smiles in rxn_parts]
56 | 
57 |     return '>'.join(can_rxn_parts)
58 | 


--------------------------------------------------------------------------------
/Lecture08/README.md:
--------------------------------------------------------------------------------
 1 | # Lecture 8: Python Packaging
 2 | 
 3 | ## Setup
 4 | 
 5 | As always, we first need to update our code. This can be done by following these steps:
 6 | 
 7 | 1. Open VSCode and open the folder where you cloned the `practical-programming-in-chemistry-exercises` repository.
 8 | 2. Go to the Source Control tab (the one that looks like a branch) and click on the three dots.
 9 | 3. Click on `Pull, Push`.
10 | 4. Click on `Pull` to update your local repository.
11 | 5. Copy the exercise notebook to your folder/repository to work on it.
12 | 
13 | If you cannot pull because you have made changes, copy the changed files into your own repository, revert the changes (arrow pointing left), and then pull.
14 | 
15 | Aaaand that's it! You're ready to start with this lecture's exercises! **Happy coding** :star_struck:
16 | 
17 | # Part 1: Python Package installation
18 | 
19 | In this tutorial, we will explore different ways to install a Python package. We will specifically focus on installing the `drfp` (Direct Retrosynthesis-based Fingerprint) package, which was discussed in the lecture. Before we proceed, let's set up a **new** conda environment with Python 3.11 to ensure compatibility and isolation.
20 | 
21 | We will briefly discuss two methods of installing a python package: a) via `pip` and b) from source. To do this we will setup new and separate environments for each.
22 | 
23 | ## a) Installing from PyPI
24 | 
25 | Open your terminal and create a new conda environment named `drfp_pypi` with Python 3.10, then activate it:
26 | 
27 | ```
28 | conda create -n drfp_pypi python=3.10
29 | conda activate drfp_pypi
30 | ```
31 | 
32 | Now you can install the `drfp` package from PyPI. You will also need to downgrade `numpy` to avoid an error coming from version conflicts.
33 | ```
34 | pip install drfp
35 | pip install numpy==1.25  # to avoid version conflicts with rdkit
36 | ```
37 | Open the notebook `Part1/08_drfp_partA.ipynb` and select the kernel for the environment `drfp_pypi`. You will have to install `iPython` again, just as you did in the exercises for Lecture 3. See that [README](../Lecture03/README.md) for help.
38 | 
39 | Now run the cell in the notebook. This is just a quick test to make sure that `drfp` was properly installed.
40 | 
41 | ## b) Installing from Source
42 | 
43 | In the second part, you can install the `drfp` package directly from the source code hosted on GitHub. This method is useful if you want to work with the latest development version or contribute to the project.
44 | 
45 | It's a good practice to create a separate conda environment when installing from source to keep things organized and avoid conflicts with other environments. 
46 | 
47 | In your terminal, deactivate your current environment with `conda deactivate`, then follow the steps above to **create a new environment**, this time called `drfp_source` and activate it.
48 | 
49 | Next, we will clone the source code and install the package from source:
50 | 
51 | 1. Clone the GitHub repository by running the address you find on [https://github.com/reymond-group/drfp/](https://github.com/reymond-group/drfp/) under `Code`. One potential folder to place it would be `~/git`, but you can also put it into another folder. The folder has to be created before you can navigate there with `cd`. It should be outside your exercises folder.
52 | 
53 | ![Clone address](../assets/Lecture08/clone_drfp.jpg)
54 | 
55 | Navigate on your terminal to the folder where you keep your git repositories, for example `~/git`. In here, **make sure you are not inside another repository**. You can do this by using `git status`, which should return an error `fatal: not a git repository` if you are **not** inside another repository. 
56 | 
57 | Clone the `drfp` package and navaigate to it:
58 | 
59 | ```
60 | git clone https://github.com/reymond-group/drfp.git
61 | cd drfp
62 | ```
63 | 
64 | Install the package using pip in development (or "editable") mode:
65 | ```
66 | pip install -e .
67 | ```
68 | 
69 | The `-e` flag stands for "editable" mode, which allows you to make changes to the source code and have them immediately reflected in your installation without having to reinstall the package. This is not necessary in this case, but typically useful if you want to make changes to the code. You could have also run `pip install .`. 
70 | 
71 | By installing from source in development mode, you have the flexibility to modify the code or stay up-to-date with the latest changes made to the package that haven't been released on PyPI. Care should be taken though, as releases on PyPI are typically more stable and robust than the latest version on GitHub.
72 | 
73 | Remember, it's generally a good practice to create a new conda environment for each project to avoid potential conflicts and maintain a clean working environment.
74 | 
75 | Now you can open and execute the code cells in `Part1/08_drfp_partB.ipynb`. 
76 | 
77 | 
78 | # Part 2: Turn a scripts into Python packages
79 | 
80 | 
81 | As we're simulating the process of building a new project, let's create another conda environment and activate it:
82 | ```
83 | conda deactivate
84 | conda create -n reactions python=3.10
85 | conda activate reactions
86 | ```
87 | and install some necessary packages:
88 | ```
89 | pip install rdkit
90 | pip install numpy==1.25  # to avoid version conflicts with rdkit
91 | ```
92 | 
93 | Next follow the instructions in `Part2/08_from_script_to_package.ipynb` to learn more about how to build your first package using `pyproject.toml`. Remember that the build tool will only recognize subfolders containing an `__init__.py` file (even if empty) as part of the package. 
94 | 
95 | Additional information can be found on how to write a `pyproject.toml` file can be found on [https://packaging.python.org/en/latest/guides/writing-pyproject-toml/](https://packaging.python.org/en/latest/guides/writing-pyproject-toml/). Here, we will first just explore the minimal set of metadata. 
96 | 


--------------------------------------------------------------------------------
/Lecture09/09_exercise.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "In this excerises we are going to demonstrate how to extract raw sources of data from the internet. This will be split into two sections. First we will consider a simple case where data is already structured and stored in a file, we will show how to use Python code to download and manipulate this data directly. The second case is dealing with automated extraction of unstructured data from a web page. This process is called scraping.\n"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Import the packages"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 3,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import requests\n",
 24 |     "from bs4 import BeautifulSoup"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "First lets look at the requests package and its functions. The most useful function of the requests package is 'get'. It performs a HTTP request to a specific webpage. \n",
 32 |     "\n",
 33 |     "**HTTP Request**\n",
 34 |     "\n",
 35 |     "HTTP stands for HyperText Transfer Protocol and is used for transferring data over the internet. A HTTP request is essentially a way for a client (like your web browser or a Python script) to communicate with a server (where a website's data is stored). When you type a URL into your web browser, it is actually sending a HTTP request to a server.\n",
 36 |     "\n",
 37 |     "These requests can be of different types, called _methods_, examples of which include GET, POST, PUT, DELETE, etc. These methods tell the server what kind of action the client wants to perform.\n",
 38 |     "\n",
 39 |     "**GET Request**\n",
 40 |     "\n",
 41 |     "The GET method is the most common HTTP request type. It is used to _retrieve_ information from a server. When you enter a URL into your browser, you're sending a GET request to the server asking it to send back some HTML content.\n",
 42 |     "\n",
 43 |     "\n",
 44 |     "In Python, we can use the `requests` library to send a HTTP GET request.\n",
 45 |     "\n",
 46 |     "\n",
 47 |     "As an introduction, say you are taking an organic chemistry course and there aren't enough exercises with solutions to practise on. You might find a webpage that contains lots of examples with individual files, it would be tedious to go through and download all of these manually so lets try and do it automatically. We will use the Sparr Group at Basel University as an example.\n",
 48 |     "\n",
 49 |     "https://sparr.chemie.unibas.ch/en/teaching/\n"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "**Making a GET request:**\n",
 57 |     "\n",
 58 |     "This is how you make a GET request to a website:"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 4,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "res = requests.get('https://sparr.chemie.unibas.ch/en/teaching/')"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "**Checking the status code:**\n",
 75 |     "\n",
 76 |     "First we need to check if our request was successful. We can find this and other information about the status of our request using the 'status code'\n",
 77 |     "\n",
 78 |     "The status code tells you about the status of your HTTP request. For example, a status code of 200 means that your request was successful, a status code of 404 means the resource was not found on the server.\n",
 79 |     "\n",
 80 |     "A full list of HTTP response codes can be found here.\n",
 81 |     "\n",
 82 |     "https://en.wikipedia.org/wiki/List_of_HTTP_status_codes\n"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 5,
 88 |    "metadata": {},
 89 |    "outputs": [
 90 |     {
 91 |      "name": "stdout",
 92 |      "output_type": "stream",
 93 |      "text": [
 94 |       "200\n"
 95 |      ]
 96 |     }
 97 |    ],
 98 |    "source": [
 99 |     "print(res.status_code)"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "The `get` method is used to send a GET request to a server. The URL of the desired resource is passed as an argument.\n",
107 |     "\n",
108 |     "**Accessing the response:**\n",
109 |     "\n",
110 |     "When we make a request to a web server, the server responds with data and some metadata like status code, content-type, etc. This data is stored in the response. \n",
111 |     "\n",
112 |     "* `.text` gives you the response from the server as a string:\n",
113 |     "* `.json()` gives you the response as a JSON object, if the response was in the format of JSON:"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 5,
119 |    "metadata": {},
120 |    "outputs": [],
121 |    "source": [
122 |     "print(res.text)"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "metadata": {},
128 |    "source": [
129 |     "**HTTP headers:**\n",
130 |     "\n",
131 |     "HTTP headers allow the client and the server to pass additional information with the request or the response. Headers include information like content type of the response, date, status code, etc.\n"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 8,
137 |    "metadata": {},
138 |    "outputs": [
139 |     {
140 |      "name": "stdout",
141 |      "output_type": "stream",
142 |      "text": [
143 |       "{'Date': 'Wed, 17 Apr 2024 15:08:17 GMT', 'Content-Type': 'text/html; charset=utf-8', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'Vary': 'Accept-Encoding', 'Content-Language': 'en', 'X-Frame-Options': 'SAMEORIGIN', 'X-Content-Type-Options': 'nosniff', 'X-XSS-Protection': '1; mode=block', 'Referrer-Policy': 'strict-origin-when-cross-origin', 'Content-Encoding': 'gzip', 'Strict-Transport-Security': 'max-age=15724800; includeSubDomains'}\n"
144 |      ]
145 |     }
146 |    ],
147 |    "source": [
148 |     "print(res.headers)"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {},
154 |    "source": [
155 |     "It is extremely important when writing requests into a piece of software that the code segment is included in a 'try and except' block. \n",
156 |     "\n",
157 |     "In the context of making HTTP requests using the `requests` library, there are several types of exceptions that can occur due to various reasons, such as:\n",
158 |     "\n",
159 |     "1. `requests.exceptions.Timeout`: This exception is raised when a request times out.\n",
160 |     "\n",
161 |     "2. `requests.exceptions.TooManyRedirects`: This error occurs if a request exceeds the configured number of maximum redirections.\n",
162 |     "\n",
163 |     "3. `requests.exceptions.HTTPError`: This exception is raised for certain types of invalid HTTP responses, like a 404 not found or 500 internal error.\n",
164 |     "\n",
165 |     "4. `requests.exceptions.RequestException`: This is a base exception from which all the above exceptions inherit, and it's raised for all other types of exceptions.\n",
166 |     "\n",
167 |     "When making a request to a web server, it is always possible that the server might not respond, there could be a network problem, or we may not get the expected data back. In these situations, a Python script without error handling would simply crash and stop execution. To prevent this, we use `try` and `except` blocks when making HTTP requests. By doing this, we can catch these exceptions, handle them appropriately (possibly by just printing an error message), and continue with the rest of our code instead of having our entire application crash."
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 9,
173 |    "metadata": {},
174 |    "outputs": [
175 |     {
176 |      "name": "stdout",
177 |      "output_type": "stream",
178 |      "text": [
179 |       "{\n",
180 |       "  \"args\": {}, \n",
181 |       "  \"headers\": {\n",
182 |       "    \"Accept\": \"*/*\", \n",
183 |       "    \"Accept-Encoding\": \"gzip, deflate, br\", \n",
184 |       "    \"Host\": \"httpbin.org\", \n",
185 |       "    \"User-Agent\": \"python-requests/2.31.0\", \n",
186 |       "    \"X-Amzn-Trace-Id\": \"Root=1-661fe5f6-7baa181a0e93947e330baec7\"\n",
187 |       "  }, \n",
188 |       "  \"origin\": \"128.178.38.31\", \n",
189 |       "  \"url\": \"https://httpbin.org/get\"\n",
190 |       "}\n",
191 |       "\n"
192 |      ]
193 |     }
194 |    ],
195 |    "source": [
196 |     "from requests.exceptions import RequestException\n",
197 |     "\n",
198 |     "url = 'https://httpbin.org/get'\n",
199 |     "\n",
200 |     "try:\n",
201 |     "    response = requests.get(url)\n",
202 |     "    response.raise_for_status()\n",
203 |     "except RequestException as err:\n",
204 |     "    print(f\"An Error Occured: {err}\")\n",
205 |     "else:\n",
206 |     "    print(response.text)"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "markdown",
211 |    "metadata": {},
212 |    "source": [
213 |     "In the above code, the `raise_for_status()` function is used to raise an `HTTPError` if an error occurs (i.e., if the HTTP request returned an unsuccessful status code). If the request is successful, it will print the response text. If an error occurs during the request, it will be handled, and the error message will be printed out without crashing the script."
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "metadata": {},
219 |    "source": [
220 |     "Now we have access to the web page and its contents as a python object, we need to be able to find specific elements and do things with them. For example, we might want to find all images and download them. Web pages are structured in a language called HTML, it can be tricky to read and interpret so we use a package called beautifulsoup to convert this HTML in a machine handleable code code called a parse tree."
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {},
226 |    "source": [
227 |     "First we need to create a BeautifulSoup object which we do as follows. The first argument is the raw HTML content, the second argument 'html.parser' is the html parser to parse the page.\n"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": 7,
233 |    "metadata": {},
234 |    "outputs": [],
235 |    "source": [
236 |     "soup = BeautifulSoup(res.text, 'html.parser')"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "markdown",
241 |    "metadata": {},
242 |    "source": [
243 |     "You can navigate the BeautifulSoup parse tree using attributes like `.contents`, `.parent`, `.next_sibling`, `.prev_sibling`\n",
244 |     "\n",
245 |     "Here is an example of using `.contents` which returns a list of all children of a tag:"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": 11,
251 |    "metadata": {},
252 |    "outputs": [
253 |     {
254 |      "name": "stdout",
255 |      "output_type": "stream",
256 |      "text": [
257 |       "['How do I activate JavaScript in this web browser?']\n"
258 |      ]
259 |     }
260 |    ],
261 |    "source": [
262 |     "first_link = soup.a\n",
263 |     "first_link_contents = first_link.contents\n",
264 |     "print(first_link_contents)"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "markdown",
269 |    "metadata": {},
270 |    "source": [
271 |     "You can search the BeautifulSoup parse tree using methods like `.find()`, `.find_all()`, `.find_next()`, `.find_previous()`\n",
272 |     "\n",
273 |     "Here is an example of using `.find_all()` which returns all elements with a certain tag:"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 8,
279 |    "metadata": {},
280 |    "outputs": [],
281 |    "source": [
282 |     "all_links = soup.find_all('a')\n",
283 |     "print(all_links)"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "markdown",
288 |    "metadata": {},
289 |    "source": [
290 |     "**Accessing tag attributes:**\n",
291 |     "\n",
292 |     "You can access the attributes of a HTML tag using indexing:\n"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": 9,
298 |    "metadata": {},
299 |    "outputs": [
300 |     {
301 |      "name": "stdout",
302 |      "output_type": "stream",
303 |      "text": [
304 |       "https://www.enable-javascript.com/en/\n"
305 |      ]
306 |     }
307 |    ],
308 |    "source": [
309 |     "first_link = soup.a\n",
310 |     "href = first_link['href']\n",
311 |     "print(href)"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "markdown",
316 |    "metadata": {},
317 |    "source": [
318 |     "TASK **Extracting all the URLs found within a page’s `<a>` tags:**\n",
319 |     "The attribute 'href' specifies a link to a webpage that is embedded into a segment of text. for example the HTML ```<a href=\"https://sparr.chemie.unibas.ch/en/teaching/\">Visit Sparr Group</a>``` would display text 'Visit Sparr Group' which when clicked on would direct to the webpage. Here the 'a' represents a 'tag' which you can search for using .findall().\n",
320 |     "\n",
321 |     "Your task is to use the `requests` library to send a GET request to 'https://example.com'. Parse the response text with BeautifulSoup and print out all the URLs found within a page's `<a>` tags.\n"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "code",
326 |    "execution_count": null,
327 |    "metadata": {},
328 |    "outputs": [],
329 |    "source": [
330 |     "### Your Code Here ###"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "markdown",
335 |    "metadata": {},
336 |    "source": [
337 |     "When we want to extract specific data from a webpage, it is essential to know the nested structure that is present in the original HTML as this structure will be reflected in the BeautifulSoup object. When inspecting a webpage HTML we might find that all the files we want to download are contained with in a <file>. Then we could simply use .findall() to locate all files of interest without complex navigation through the nested structure.\n",
338 |     "\n",
339 |     "First, open up a webpage in Google Chrome. Let's use \"https://google.com\" for instance. \n",
340 |     "\n",
341 |     "Next, move your cursor to the element you are interested in. Right-click on that element, and in the dropdown menu, click on the \"Inspect\" option. This would open up the Developer Tools on the right-hand side or at the bottom of your browser depending on your settings.\n",
342 |     "\n",
343 |     "The Developer Tools panel comprises of many different tools, but for our current purpose, we'll focus primarily on the \"Elements\" pane. This pane show us the HTML structure of the webpage.\n",
344 |     "\n",
345 |     "In the Elements pane, you'll see a lot of HTML, which is the code used to create the structure and contents of the web page. The part of the page you right-clicked to inspect would be highlighted in the HTML on the panel. You can browse through the HTML to find other elements as well.\n",
346 |     "\n",
347 |     "If you want to find a specific HTML element in the code, hover over the different parts of the HTML code in the \"Elements\" pane, corresponding sections of the web page will be highlighted. This can help you figure out which part of the HTML corresponds to the data you're interested in.\n",
348 |     "\n",
349 |     "This process only covers the basic functionality needed for web scraping. Developer Tools in browsers are very robust and have many other functionalities that can be very helpful when building or debugging websites.\n",
350 |     "\n",
351 |     "For inspecting web pages in other browsers like Firefox, Safari, or Internet Explorer, you can use similar steps but the exact navigation and layout of developer tools might vary slightly. If you're not sure, it's best to do a quick Google search for \"How to inspect webpage in [Your Browser]\". "
352 |    ]
353 |   },
354 |   {
355 |    "cell_type": "markdown",
356 |    "metadata": {},
357 |    "source": [
358 |     "Question 2: \n",
359 |     "\n",
360 |     "Use *requests* to access https://sparr.chemie.unibas.ch/en/teaching/ through a browser and inspect the page HTML. Under Group Seminar, there is a list of Excercises and Solutions. Your task is to find BeautifulSoup code which will access all of the exercises and solution You should end up with 465 links in total. Do this without referring to ChatGPT or a similar chatbot. You can use other resources such as the <a href=\"https://beautiful-soup-4.readthedocs.io/en/latest/\">Beautiful Soup documentation</a> or Stack Exchange. You could do this by using hard coded navigations through the internal structure, but try to find a pattern that enables .findall() to directly retrieve the files. You do not need to download the files, just verify that you have found the links"
361 |    ]
362 |   },
363 |   {
364 |    "cell_type": "code",
365 |    "execution_count": null,
366 |    "metadata": {},
367 |    "outputs": [],
368 |    "source": [
369 |     "### Your Code Here ###"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "markdown",
374 |    "metadata": {},
375 |    "source": [
376 |     "Question 3: \n",
377 |     "Provide the HTML code to Claude or ChatGPT and ask it to find the links for you. Play around until you get the same answer as your manually implemented code."
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": null,
383 |    "metadata": {},
384 |    "outputs": [],
385 |    "source": [
386 |     "### Chatbot generated code here ###"
387 |    ]
388 |   },
389 |   {
390 |    "cell_type": "markdown",
391 |    "metadata": {},
392 |    "source": [
393 |     "Question 4\n",
394 |     "\n",
395 |     "A large number of machine learning applications use SMILES as a data representation format. This format encodes a molecule as a string. Although excellent for machine learning the SMILES format is very difficult for humans to interpret and understand. To get around this, there exist several software tools which convert SMILES strings into an image of a molecular structure.\n",
396 |     "\n",
397 |     "The most commonly used tool is CDK Depict. This is only available as a Java package, so to access it through python code we must use the web page.\n",
398 |     "\n",
399 |     "https://www.simolecule.com/cdkdepict/depict.html\n",
400 |     "\n",
401 |     "The webpage takes a SMILES string as input and then executes some JavaScript code to generate an image. Navigate to this webpage and click on one of the pre-provided images. Doing this will take you to a new webpage which contains only the image you clicked on. Closely inspect the link to this new page. \n",
402 |     "\n",
403 |     "We will provide a function called *urllib.parse.urlencode()* which converts a python dictionary into a query suitable for a URL format. Try it out below. Note how it converts '=' characters into '%3D'.\n",
404 |     "\n",
405 |     "Your task is inspect the link for a molecule of your choice and construct a dictionary of parameters that can be converted to a string using the *urllib.parse.urlencode()*. You should then build a string which represents the link to the image for a given SMILES. Finally use what you have learned about the *requests* library to retrieve the image and display it. Use the code skeleton provided below."
406 |    ]
407 |   },
408 |   {
409 |    "cell_type": "code",
410 |    "execution_count": 15,
411 |    "metadata": {},
412 |    "outputs": [
413 |     {
414 |      "name": "stdout",
415 |      "output_type": "stream",
416 |      "text": [
417 |       "smiles=CN1C%3DNC2%3DC1C%28%3DO%29N%28C%28%3DO%29N2C%29C\n"
418 |      ]
419 |     }
420 |    ],
421 |    "source": [
422 |     "import urllib.parse\n",
423 |     "\n",
424 |     "params =    {\n",
425 |     "                'smiles': 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C',\n",
426 |     "            }\n",
427 |     "\n",
428 |     "print(urllib.parse.urlencode(params))\n"
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "code",
433 |    "execution_count": 17,
434 |    "metadata": {},
435 |    "outputs": [
436 |     {
437 |      "data": {
438 |       "text/plain": [
439 |        "<IPython.core.display.SVG object>"
440 |       ]
441 |      },
442 |      "metadata": {},
443 |      "output_type": "display_data"
444 |     }
445 |    ],
446 |    "source": [
447 |     "import requests\n",
448 |     "from IPython.display import SVG, display\n",
449 |     "\n",
450 |     "CDKDEPICTLINK = 'https://www.simolecule.com/cdkdepict/depict/bow'\n",
451 |     "\n",
452 |     "\n",
453 |     "def smiles_depict_url(smiles: str, format: str = 'svg') -> str:\n",
454 |     "    \"\"\"\n",
455 |     "    Generate the URL for the depiction of a SMILES string.\n",
456 |     "    Args:\n",
457 |     "        smiles: smiles string to depict\n",
458 |     "        format: 'svg', 'pdf', 'png', etc.\n",
459 |     "    Returns:\n",
460 |     "        URL string\n",
461 |     "    \"\"\"\n",
462 |     "   \n",
463 |     "    \"\"\"\n",
464 |     "    Complete the params dictionary and encode it into a URL string with the urllib.parse.urlencode function\n",
465 |     "    Remember it must be a valid link to a cdkdepict URL, you can test your URL's by just navigating to the link in your browser\n",
466 |     "    Return the URL string\n",
467 |     "    \"\"\"\n",
468 |     "\n",
469 |     "    params = {\n",
470 |     "        'smi': smiles,\n",
471 |     "        # Your code here\n",
472 |     "        \n",
473 |     "    }\n",
474 |     "    params_str = urllib.parse.urlencode(params)\n",
475 |     "\n",
476 |     "    # Your code here\n",
477 |     "\n",
478 |     "def display_svg(url: str) -> None:\n",
479 |     "    # post a request to the link you construct. Remember to handle the cases where the response does not work as intented\n",
480 |     "    # Look at the response content to find the SVG data.\n",
481 |     "\n",
482 |     "    # Use the display function to display the SVG data\n",
483 |     "    display(SVG(svg_data))\n",
484 |     "\n",
485 |     "\n",
486 |     "smiles = 'CCOC(=O)C1=CC=CC=C1C(=O)OCC' \n",
487 |     "url = smiles_depict_url(smiles)\n",
488 |     "display_svg(url)"
489 |    ]
490 |   }
491 |  ],
492 |  "metadata": {
493 |   "kernelspec": {
494 |    "display_name": "Python 3",
495 |    "language": "python",
496 |    "name": "python3"
497 |   },
498 |   "language_info": {
499 |    "codemirror_mode": {
500 |     "name": "ipython",
501 |     "version": 3
502 |    },
503 |    "file_extension": ".py",
504 |    "mimetype": "text/x-python",
505 |    "name": "python",
506 |    "nbconvert_exporter": "python",
507 |    "pygments_lexer": "ipython3",
508 |    "version": "3.11.5"
509 |   },
510 |   "orig_nbformat": 4
511 |  },
512 |  "nbformat": 4,
513 |  "nbformat_minor": 2
514 | }
515 | 


--------------------------------------------------------------------------------
/Lecture09/README.md:
--------------------------------------------------------------------------------
 1 | # Web Data Extraction Exercise
 2 | 
 3 | This exercise covers techniques for extracting data from the internet using Python, focusing on two main approaches:
 4 | 1. Downloading structured data from files
 5 | 2. Scraping unstructured data from web pages
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | ### Setting up the environment
10 | 
11 | 1. Activate the ppchem environment:
12 |    ```bash
13 |    conda activate ppchem
14 |    ```
15 | 
16 | 2. Install the required packages:
17 |    ```bash
18 |    pip install requests beautifulsoup4 IPython
19 |    ```
20 | 
21 | ## Exercise Overview
22 | 
23 | ### Part 1: Using the Requests Library
24 | - Learn how to make HTTP GET requests using the `requests` library
25 | - Handle HTTP responses and status codes
26 | - Implement error handling with try-except blocks
27 | 
28 | ### Part 2: Web Scraping with BeautifulSoup
29 | - Parse HTML content using BeautifulSoup
30 | - Navigate and search HTML parse trees
31 | - Extract specific elements from web pages
32 | 
33 | ### Part 3: Practical Tasks
34 | 1. Extract all URLs from a webpage
35 | 2. Find and extract exercise and solution links from a university chemistry teaching page
36 | 3. Compare your manual implementation with ChatGPT's solution
37 | 
38 | ### Part 4: Working with Chemical Data
39 | - Convert SMILES strings to molecular structure images
40 | - Construct URLs with parameters for the CDK Depict service
41 | - Display SVG images of molecular structures
42 | 
43 | ## Tips for Success
44 | - Pay attention to HTML structure when scraping web pages
45 | - Use browser developer tools to inspect webpage elements
46 | - Remember to handle potential errors in HTTP requests
47 | - Test your constructed URLs in a browser before implementing them in code
48 | 
49 | ## Learning Objectives
50 | - Understand HTTP request-response cycle
51 | - Develop skills in web scraping using Python libraries
52 | - Apply these techniques to chemistry-related data extraction
53 | - Compare manual implementation with AI-assisted solutions
54 | 
55 | Happy coding!


--------------------------------------------------------------------------------
/Lecture10/Part-1/README.md:
--------------------------------------------------------------------------------
  1 | # Lecture 10 - Copier project templates
  2 | 
  3 | This lecture we will look at using a project template, Copier, to turn the CDK-Depict code you wrote last week into a simple Python package.
  4 | 
  5 | **What is Copier?**
  6 |    - Copier is a utility tool created to facilitate the duplication and customization of project templates. It is written in Python and enables you to efficiently generate projects by copying a template and making specified alterations defined in an easy configuration file (often YAML).
  7 |    - This tool is designed to help reduce redundant setup tasks and ensures consistency across multiple projects by allowing users to create a single project template that can be reused and customized without starting from scratch each time.
  8 | 
  9 | Lets go through the steps required to make this package.
 10 | 
 11 | #### Step 1: Set Up Your Environment
 12 | 
 13 | ```bash
 14 | conda activate ppchem
 15 | ```
 16 | 
 17 | #### Step 2: Organise the code
 18 | 
 19 | Go through the Jupyter notebook from yesterday and ensure that your code is neatly organised into functions. Once this is done move the code into a new notebook which should contain only the code required to query the CDK-Depict website. 
 20 | Place this notebook in a new folder called 'CDK-Package'.
 21 | 
 22 | Remember you can use the following commands to create folders and move files between directories. For the *mv* command to work you must navigate your terminal to the directory in which the file you want to move is located. In your case it should be something like '/path/to/your/practical-programming-in-chemistry-exercises/week_08/your_new_notebook.ipynb'
 23 | 
 24 | ```bash
 25 | mkdir CDK-Package
 26 | cd CDK-Package
 27 | mv /path/to/your/practical-programming-in-chemistry-exercises/week_08/your_new_notebook.ipynb .
 28 | ```
 29 | 
 30 | #### Step 3: Convert Your Notebook
 31 | 
 32 | While Jupyter notebooks are excellent tools for quickly developing small snippets of code, they are not suited for use in a Python package. To make your code compatible with a Python package we need to copy the functions into a Python module, as you did in exercise 8. Create a module called `cdktest.py`.
 33 | 
 34 | #### Step 4: Using Copier to Create the Package Structure**
 35 | 
 36 | Instead of manually creating directories and files as outlined previously, you can use the Copier template designed for consistent and quick setup of scientific projects/folders. Here’s how you can do it:
 37 | 
 38 | 1. **Install Copier**:
 39 |    First, ensure Copier is installed in your environment:
 40 | 
 41 |    ```bash
 42 |    pip install copier
 43 |    ```
 44 | 
 45 | 2. **Generate Project Structure**:
 46 |    Utilize Copier with the schwallergroup template to create your project structure. Make sure you're still within your `ppchem` environment:
 47 | 
 48 |    ```bash
 49 |    copier copy gh:schwallergroup/copier-liac-minimal /path/to/your/CDK-package
 50 |    ```
 51 | 
 52 |    Enter the directory path where you want your new project to be initiated. Follow the on-screen prompts provided by Copier to customize the project (such as naming modules or defining author details).
 53 |    Enforce the code style as : strict (precommit, ruff, mypy)
 54 | 
 55 | 3. **Place Your Converted Module**:
 56 |    Move or copy the Python scripts (converted from your Jupyter notebooks in Step 3) into the appropriate directories within this newly created package structure. Typically you should place this in `src/package_name`.
 57 | 
 58 | 4. **Setup ths Code**
 59 |    Open the `__init__.py` file. Add a line of the following format to import the functions from the code we generated from your notebook
 60 | 
 61 |    ```python
 62 |    from .cdktest import smiles_depict_url, display_svg
 63 |    ```
 64 | 
 65 |    Remember to use the names you gave to your functions
 66 | 
 67 | 
 68 | #### Step 6: Initialize Git Repository
 69 | 1. **Create a Local Repository**
 70 | Initialize a git repository to start version control within the newly created directory structure:
 71 | 
 72 | ```bash
 73 | cd /path/to/you/CDK-Package
 74 | git init
 75 | git add .
 76 | git commit -m "Initial package setup with Copier"
 77 | ```
 78 | 
 79 | Next, you need to create a remote repository where your code will be stored online.
 80 | 
 81 | 2. **Create a New Upstream Repository**
 82 |    - Navigate to the Repositories tab on Github.com and click on the "New" button.
 83 |    - Name your repository (e.g., `CDK-Package`).
 84 |    - Choose if you want your repository to be public (anyone can see this repository) or private (you choose who can see and commit to this repository).
 85 |    - **Important**: Do not initialize the repository with a README, .gitignore, or License. Your local repository already contains these files if necessary.
 86 |    - Click the "Create repository" button.
 87 | 
 88 | 3. **Link Your Local Repository to the Remote Repository**
 89 |    - Once your repository is created, GitHub will display a page with a URL and some setup instructions. Copy the URL for the repository.
 90 |    - Go back to your terminal and link your local repository with the remote repository using the following command:
 91 |      ```bash
 92 |      git remote add origin YOUR_REMOTE_URL
 93 |      ```
 94 |      Replace `YOUR_REMOTE_URL` with the copied URL.
 95 | 
 96 | 4. **Push Your Local Repository to GitHub**
 97 |    - Now, push the changes from your local repository to GitHub with:
 98 |      ```bash
 99 |      git push -u origin main  # or git push -u origin master
100 |      ```
101 |    - The `-u` flag is used to set the upstream (tracking reference) for your local branch.
102 | 
103 | 5. **Verify Everything is Online**
104 | - Go back to your repository on GitHub and refresh the page. You should now see all the files you've added locally.
105 | 
106 | #### Step 7: Install Your Local Package
107 | Now we have the code in our package prepared, we must set it up as installable so yourself and others could access it through:
108 | ```bash
109 | pip install -e .
110 | ```
111 | This uses the `pyproject.toml` file created by copier.
112 | 
113 | This installs the package in editable mode (symlink) so changes are reflected immediately.
114 | 
115 | 
116 | #### Step 8: Test Your Package
117 | 
118 | Ensure everything works by importing your package in Python. Try out your functions, do you get the same result as in the notebook?
119 | 
120 | #### Step 9: Create formal tests using pytest
121 | 
122 | 1. **Install pytest**:
123 |    If not already installed, you can install pytest using pip:
124 |    ```bash
125 |    pip install pytest
126 |    ```
127 | 
128 | 2. **Create a Test File**:
129 |    Inside your project structure, usually under a `tests` folder, create a test file named `test_depict_url.py` or similar.
130 | 
131 | 3. **Write Test Cases**:
132 |    In the `test_depict_url.py` file, add the following Python code to create test cases for the `smiles_depict_url` function.
133 | 
134 |    ```python
135 |    import pytest
136 |    from cdkpackage import smiles_depict_url
137 | 
138 |    def test_smiles_depict_url():
139 |        # test your code here
140 |    ```
141 | 
142 | 4. **Run the Tests**:
143 |    Navigate to your project's root directory in the terminal, and run:
144 |    ```bash
145 |    pytest
146 |    ```
147 |    This command will discover and run all the tests in the `tests` directory.
148 | 


--------------------------------------------------------------------------------
/Lecture10/Part-1/solution_code.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "data": {
 10 |       "image/svg+xml": [
 11 |        "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" version=\"1.2\" width=\"20.58mm\" height=\"17.48mm\" viewBox=\"0 0 20.58 17.48\">\n",
 12 |        "  <desc>Generated by the Chemistry Development Kit (http://github.com/cdk)</desc>\n",
 13 |        "  <g stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke=\"#000000\" stroke-width=\".23\" fill=\"#000000\">\n",
 14 |        "    <rect x=\".0\" y=\".0\" width=\"21.0\" height=\"18.0\" fill=\"#FFFFFF\" stroke=\"none\"/>\n",
 15 |        "    <g id=\"mol1\" class=\"mol\">\n",
 16 |        "      <line id=\"mol1bnd5\" class=\"bond\" x1=\"5.08\" y1=\"2.98\" x2=\"5.09\" y2=\"6.6\"/>\n",
 17 |        "      <g id=\"mol1bnd6\" class=\"bond\">\n",
 18 |        "        <line x1=\"5.09\" y1=\"6.6\" x2=\".72\" y2=\"9.14\"/>\n",
 19 |        "        <line x1=\"5.09\" y1=\"7.54\" x2=\"1.53\" y2=\"9.61\"/>\n",
 20 |        "      </g>\n",
 21 |        "      <line id=\"mol1bnd7\" class=\"bond\" x1=\".72\" y1=\"9.14\" x2=\".72\" y2=\"14.22\"/>\n",
 22 |        "      <g id=\"mol1bnd8\" class=\"bond\">\n",
 23 |        "        <line x1=\".72\" y1=\"14.22\" x2=\"5.09\" y2=\"16.76\"/>\n",
 24 |        "        <line x1=\"1.53\" y1=\"13.75\" x2=\"5.09\" y2=\"15.82\"/>\n",
 25 |        "      </g>\n",
 26 |        "      <line id=\"mol1bnd9\" class=\"bond\" x1=\"5.09\" y1=\"16.76\" x2=\"9.49\" y2=\"14.22\"/>\n",
 27 |        "      <g id=\"mol1bnd10\" class=\"bond\">\n",
 28 |        "        <line x1=\"9.49\" y1=\"14.22\" x2=\"9.49\" y2=\"9.14\"/>\n",
 29 |        "        <line x1=\"8.68\" y1=\"13.75\" x2=\"8.68\" y2=\"9.61\"/>\n",
 30 |        "      </g>\n",
 31 |        "      <line id=\"mol1bnd11\" class=\"bond\" x1=\"5.09\" y1=\"6.6\" x2=\"9.49\" y2=\"9.14\"/>\n",
 32 |        "      <line id=\"mol1bnd12\" class=\"bond\" x1=\"9.49\" y1=\"9.14\" x2=\"12.74\" y2=\"7.26\"/>\n",
 33 |        "      <g id=\"mol1atm4\" class=\"atom\">\n",
 34 |        "        <path d=\"M5.82 .74v.26q-.13 -.12 -.27 -.18q-.14 -.06 -.3 -.06q-.32 .0 -.49 .19q-.17 .19 -.17 .56q.0 .36 .17 .56q.17 .19 .49 .19q.16 .0 .3 -.06q.14 -.06 .27 -.17v.26q-.13 .09 -.28 .14q-.15 .04 -.31 .04q-.42 .0 -.66 -.26q-.24 -.26 -.24 -.7q.0 -.44 .24 -.7q.24 -.26 .66 -.26q.16 .0 .31 .05q.15 .04 .28 .13zM6.96 .76q-.27 .0 -.43 .2q-.16 .2 -.16 .55q.0 .35 .16 .55q.16 .2 .43 .2q.27 .0 .43 -.2q.16 -.2 .16 -.55q.0 -.35 -.16 -.55q-.16 -.2 -.43 -.2zM6.96 .56q.39 .0 .62 .26q.23 .26 .23 .7q.0 .44 -.23 .7q-.23 .26 -.62 .26q-.39 .0 -.62 -.26q-.23 -.26 -.23 -.7q.0 -.43 .23 -.7q.23 -.26 .62 -.26z\" stroke=\"none\"/>\n",
 35 |        "        <path d=\"M8.2 2.88h.52v.13h-.7v-.13q.09 -.09 .23 -.24q.15 -.15 .19 -.19q.07 -.08 .1 -.14q.03 -.06 .03 -.11q.0 -.09 -.06 -.14q-.06 -.05 -.16 -.05q-.07 .0 -.15 .02q-.08 .02 -.17 .07v-.15q.09 -.04 .17 -.05q.08 -.02 .14 -.02q.17 .0 .28 .09q.1 .09 .1 .23q.0 .07 -.03 .13q-.03 .06 -.09 .14q-.02 .02 -.12 .13q-.1 .1 -.28 .29z\" stroke=\"none\"/>\n",
 36 |        "        <path d=\"M8.93 .59h1.16v.21h-.92v.54h.88v.21h-.88v.67h.94v.21h-1.19v-1.85zM10.74 .66v.39h.47v.18h-.47v.75q.0 .17 .05 .22q.05 .05 .19 .05h.23v.19h-.23q-.26 .0 -.36 -.1q-.1 -.1 -.1 -.36v-.75h-.17v-.18h.17v-.39h.23z\" stroke=\"none\"/>\n",
 37 |        "      </g>\n",
 38 |        "      <g id=\"mol1atm12\" class=\"atom\">\n",
 39 |        "        <path d=\"M14.63 5.82v.26q-.13 -.12 -.27 -.18q-.14 -.06 -.3 -.06q-.32 .0 -.49 .19q-.17 .19 -.17 .56q.0 .36 .17 .56q.17 .19 .49 .19q.16 .0 .3 -.06q.14 -.06 .27 -.17v.26q-.13 .09 -.28 .14q-.15 .04 -.31 .04q-.42 .0 -.66 -.26q-.24 -.26 -.24 -.7q.0 -.44 .24 -.7q.24 -.26 .66 -.26q.16 .0 .31 .05q.15 .04 .28 .13zM15.77 5.84q-.27 .0 -.43 .2q-.16 .2 -.16 .55q.0 .35 .16 .55q.16 .2 .43 .2q.27 .0 .43 -.2q.16 -.2 .16 -.55q.0 -.35 -.16 -.55q-.16 -.2 -.43 -.2zM15.77 5.64q.39 .0 .62 .26q.23 .26 .23 .7q.0 .44 -.23 .7q-.23 .26 -.62 .26q-.39 .0 -.62 -.26q-.23 -.26 -.23 -.7q.0 -.43 .23 -.7q.23 -.26 .62 -.26z\" stroke=\"none\"/>\n",
 40 |        "        <path d=\"M17.01 7.96h.52v.13h-.7v-.13q.09 -.09 .23 -.24q.15 -.15 .19 -.19q.07 -.08 .1 -.14q.03 -.06 .03 -.11q.0 -.09 -.06 -.14q-.06 -.05 -.16 -.05q-.07 .0 -.15 .02q-.08 .02 -.17 .07v-.15q.09 -.04 .17 -.05q.08 -.02 .14 -.02q.17 .0 .28 .09q.1 .09 .1 .23q.0 .07 -.03 .13q-.03 .06 -.09 .14q-.02 .02 -.12 .13q-.1 .1 -.28 .29z\" stroke=\"none\"/>\n",
 41 |        "        <path d=\"M17.74 5.67h1.16v.21h-.92v.54h.88v.21h-.88v.67h.94v.21h-1.19v-1.85zM19.55 5.74v.39h.47v.18h-.47v.75q.0 .17 .05 .22q.05 .05 .19 .05h.23v.19h-.23q-.26 .0 -.36 -.1q-.1 -.1 -.1 -.36v-.75h-.17v-.18h.17v-.39h.23z\" stroke=\"none\"/>\n",
 42 |        "      </g>\n",
 43 |        "    </g>\n",
 44 |        "  </g>\n",
 45 |        "</svg>"
 46 |       ],
 47 |       "text/plain": [
 48 |        "<IPython.core.display.SVG object>"
 49 |       ]
 50 |      },
 51 |      "metadata": {},
 52 |      "output_type": "display_data"
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "import urllib.parse\n",
 57 |     "import requests\n",
 58 |     "from IPython.display import SVG, display\n",
 59 |     "\n",
 60 |     "cdk_depict_link = 'https://www.simolecule.com/cdkdepict/depict/bow'\n",
 61 |     "\n",
 62 |     "\n",
 63 |     "def smiles_depict_url(smiles: str, format: str = 'svg') -> str:\n",
 64 |     "    \"\"\"\n",
 65 |     "    Generate the URL for the depiction of a SMILES string.\n",
 66 |     "    Args:\n",
 67 |     "        smiles: smiles string to depict\n",
 68 |     "        format: 'svg', 'pdf', 'png', etc.\n",
 69 |     "        use_internal_service: whether to use the service deployed on ZC2 (True)\n",
 70 |     "            or the one available on the simolecule website (False).\n",
 71 |     "    Returns:\n",
 72 |     "        URL string\n",
 73 |     "    \"\"\"\n",
 74 |     "    rendering_service = cdk_depict_link\n",
 75 |     "    params = {\n",
 76 |     "        'smi': smiles,\n",
 77 |     "        'zoom': '1.0',\n",
 78 |     "        'abbr': 'on',\n",
 79 |     "        'hdisp': 'bridgehead',\n",
 80 |     "        'showtitle': 'false',\n",
 81 |     "        'annotate': 'none'\n",
 82 |     "    }\n",
 83 |     "    params_str = urllib.parse.urlencode(params)\n",
 84 |     "    return f'{rendering_service}/{format}?{params_str}'\n",
 85 |     "\n",
 86 |     "def display_svg(url: str) -> None:\n",
 87 |     "    response = requests.get(url)\n",
 88 |     "    if response.status_code == 200:\n",
 89 |     "        svg_data = response.text\n",
 90 |     "        display(SVG(svg_data))\n",
 91 |     "    else:\n",
 92 |     "        print(\"Failed to retrieve SVG: Status code\", response.status_code)\n",
 93 |     "\n",
 94 |     "smiles = 'CCOC(=O)C1=CC=CC=C1C(=O)OCC' \n",
 95 |     "url = smiles_depict_url(smiles)\n",
 96 |     "display_svg(url)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 1,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "from cdktest import smiles_depict_url, display_svg"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 2,
111 |    "metadata": {},
112 |    "outputs": [
113 |     {
114 |      "data": {
115 |       "image/svg+xml": [
116 |        "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" version=\"1.2\" width=\"20.58mm\" height=\"17.48mm\" viewBox=\"0 0 20.58 17.48\">\n",
117 |        "  <desc>Generated by the Chemistry Development Kit (http://github.com/cdk)</desc>\n",
118 |        "  <g stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke=\"#000000\" stroke-width=\".23\" fill=\"#000000\">\n",
119 |        "    <rect x=\".0\" y=\".0\" width=\"21.0\" height=\"18.0\" fill=\"#FFFFFF\" stroke=\"none\"/>\n",
120 |        "    <g id=\"mol1\" class=\"mol\">\n",
121 |        "      <line id=\"mol1bnd5\" class=\"bond\" x1=\"5.08\" y1=\"2.98\" x2=\"5.09\" y2=\"6.6\"/>\n",
122 |        "      <g id=\"mol1bnd6\" class=\"bond\">\n",
123 |        "        <line x1=\"5.09\" y1=\"6.6\" x2=\".72\" y2=\"9.14\"/>\n",
124 |        "        <line x1=\"5.09\" y1=\"7.54\" x2=\"1.53\" y2=\"9.61\"/>\n",
125 |        "      </g>\n",
126 |        "      <line id=\"mol1bnd7\" class=\"bond\" x1=\".72\" y1=\"9.14\" x2=\".72\" y2=\"14.22\"/>\n",
127 |        "      <g id=\"mol1bnd8\" class=\"bond\">\n",
128 |        "        <line x1=\".72\" y1=\"14.22\" x2=\"5.09\" y2=\"16.76\"/>\n",
129 |        "        <line x1=\"1.53\" y1=\"13.75\" x2=\"5.09\" y2=\"15.82\"/>\n",
130 |        "      </g>\n",
131 |        "      <line id=\"mol1bnd9\" class=\"bond\" x1=\"5.09\" y1=\"16.76\" x2=\"9.49\" y2=\"14.22\"/>\n",
132 |        "      <g id=\"mol1bnd10\" class=\"bond\">\n",
133 |        "        <line x1=\"9.49\" y1=\"14.22\" x2=\"9.49\" y2=\"9.14\"/>\n",
134 |        "        <line x1=\"8.68\" y1=\"13.75\" x2=\"8.68\" y2=\"9.61\"/>\n",
135 |        "      </g>\n",
136 |        "      <line id=\"mol1bnd11\" class=\"bond\" x1=\"5.09\" y1=\"6.6\" x2=\"9.49\" y2=\"9.14\"/>\n",
137 |        "      <line id=\"mol1bnd12\" class=\"bond\" x1=\"9.49\" y1=\"9.14\" x2=\"12.74\" y2=\"7.26\"/>\n",
138 |        "      <g id=\"mol1atm4\" class=\"atom\">\n",
139 |        "        <path d=\"M5.82 .74v.26q-.13 -.12 -.27 -.18q-.14 -.06 -.3 -.06q-.32 .0 -.49 .19q-.17 .19 -.17 .56q.0 .36 .17 .56q.17 .19 .49 .19q.16 .0 .3 -.06q.14 -.06 .27 -.17v.26q-.13 .09 -.28 .14q-.15 .04 -.31 .04q-.42 .0 -.66 -.26q-.24 -.26 -.24 -.7q.0 -.44 .24 -.7q.24 -.26 .66 -.26q.16 .0 .31 .05q.15 .04 .28 .13zM6.96 .76q-.27 .0 -.43 .2q-.16 .2 -.16 .55q.0 .35 .16 .55q.16 .2 .43 .2q.27 .0 .43 -.2q.16 -.2 .16 -.55q.0 -.35 -.16 -.55q-.16 -.2 -.43 -.2zM6.96 .56q.39 .0 .62 .26q.23 .26 .23 .7q.0 .44 -.23 .7q-.23 .26 -.62 .26q-.39 .0 -.62 -.26q-.23 -.26 -.23 -.7q.0 -.43 .23 -.7q.23 -.26 .62 -.26z\" stroke=\"none\"/>\n",
140 |        "        <path d=\"M8.2 2.88h.52v.13h-.7v-.13q.09 -.09 .23 -.24q.15 -.15 .19 -.19q.07 -.08 .1 -.14q.03 -.06 .03 -.11q.0 -.09 -.06 -.14q-.06 -.05 -.16 -.05q-.07 .0 -.15 .02q-.08 .02 -.17 .07v-.15q.09 -.04 .17 -.05q.08 -.02 .14 -.02q.17 .0 .28 .09q.1 .09 .1 .23q.0 .07 -.03 .13q-.03 .06 -.09 .14q-.02 .02 -.12 .13q-.1 .1 -.28 .29z\" stroke=\"none\"/>\n",
141 |        "        <path d=\"M8.93 .59h1.16v.21h-.92v.54h.88v.21h-.88v.67h.94v.21h-1.19v-1.85zM10.74 .66v.39h.47v.18h-.47v.75q.0 .17 .05 .22q.05 .05 .19 .05h.23v.19h-.23q-.26 .0 -.36 -.1q-.1 -.1 -.1 -.36v-.75h-.17v-.18h.17v-.39h.23z\" stroke=\"none\"/>\n",
142 |        "      </g>\n",
143 |        "      <g id=\"mol1atm12\" class=\"atom\">\n",
144 |        "        <path d=\"M14.63 5.82v.26q-.13 -.12 -.27 -.18q-.14 -.06 -.3 -.06q-.32 .0 -.49 .19q-.17 .19 -.17 .56q.0 .36 .17 .56q.17 .19 .49 .19q.16 .0 .3 -.06q.14 -.06 .27 -.17v.26q-.13 .09 -.28 .14q-.15 .04 -.31 .04q-.42 .0 -.66 -.26q-.24 -.26 -.24 -.7q.0 -.44 .24 -.7q.24 -.26 .66 -.26q.16 .0 .31 .05q.15 .04 .28 .13zM15.77 5.84q-.27 .0 -.43 .2q-.16 .2 -.16 .55q.0 .35 .16 .55q.16 .2 .43 .2q.27 .0 .43 -.2q.16 -.2 .16 -.55q.0 -.35 -.16 -.55q-.16 -.2 -.43 -.2zM15.77 5.64q.39 .0 .62 .26q.23 .26 .23 .7q.0 .44 -.23 .7q-.23 .26 -.62 .26q-.39 .0 -.62 -.26q-.23 -.26 -.23 -.7q.0 -.43 .23 -.7q.23 -.26 .62 -.26z\" stroke=\"none\"/>\n",
145 |        "        <path d=\"M17.01 7.96h.52v.13h-.7v-.13q.09 -.09 .23 -.24q.15 -.15 .19 -.19q.07 -.08 .1 -.14q.03 -.06 .03 -.11q.0 -.09 -.06 -.14q-.06 -.05 -.16 -.05q-.07 .0 -.15 .02q-.08 .02 -.17 .07v-.15q.09 -.04 .17 -.05q.08 -.02 .14 -.02q.17 .0 .28 .09q.1 .09 .1 .23q.0 .07 -.03 .13q-.03 .06 -.09 .14q-.02 .02 -.12 .13q-.1 .1 -.28 .29z\" stroke=\"none\"/>\n",
146 |        "        <path d=\"M17.74 5.67h1.16v.21h-.92v.54h.88v.21h-.88v.67h.94v.21h-1.19v-1.85zM19.55 5.74v.39h.47v.18h-.47v.75q.0 .17 .05 .22q.05 .05 .19 .05h.23v.19h-.23q-.26 .0 -.36 -.1q-.1 -.1 -.1 -.36v-.75h-.17v-.18h.17v-.39h.23z\" stroke=\"none\"/>\n",
147 |        "      </g>\n",
148 |        "    </g>\n",
149 |        "  </g>\n",
150 |        "</svg>"
151 |       ],
152 |       "text/plain": [
153 |        "<IPython.core.display.SVG object>"
154 |       ]
155 |      },
156 |      "metadata": {},
157 |      "output_type": "display_data"
158 |     }
159 |    ],
160 |    "source": [
161 |     "smiles = 'CCOC(=O)C1=CC=CC=C1C(=O)OCC' \n",
162 |     "url = smiles_depict_url(smiles)\n",
163 |     "display_svg(url)"
164 |    ]
165 |   }
166 |  ],
167 |  "metadata": {
168 |   "kernelspec": {
169 |    "display_name": "Python 3",
170 |    "language": "python",
171 |    "name": "python3"
172 |   },
173 |   "language_info": {
174 |    "codemirror_mode": {
175 |     "name": "ipython",
176 |     "version": 3
177 |    },
178 |    "file_extension": ".py",
179 |    "mimetype": "text/x-python",
180 |    "name": "python",
181 |    "nbconvert_exporter": "python",
182 |    "pygments_lexer": "ipython3",
183 |    "version": "3.10.12"
184 |   },
185 |   "orig_nbformat": 4
186 |  },
187 |  "nbformat": 4,
188 |  "nbformat_minor": 2
189 | }
190 | 


--------------------------------------------------------------------------------
/Lecture10/Part-2/README.md:
--------------------------------------------------------------------------------
  1 | ## Setup
  2 | 
  3 | Go to https://github.com/mrodobbe/Rxn-INSIGHT and read the README. Follow the instructions to install **from source** (i.e. Option 2) in developer mode. This means that **after** cloning the repository (and entering the directory), you'll want to install with:
  4 | ```bash
  5 | pip install -e ".[test,doc]"
  6 | ```
  7 | 
  8 | Follow the installation instructions, making sure to install the optional dependencies
  9 | as these will be required to run the tests and style checks.
 10 | 
 11 | Make sure you are in the top directory, and in your newly-created conda environment
 12 | `rxn-insight`, then run the tests with the `tox` command.
 13 | 
 14 | 
 15 | ## Writing a failing test, then fixing it
 16 | 
 17 | Run the test environment using the command `tox -e py3` and inspect the terminal output.
 18 | First, tox builds the correct test environment with the required dependencies for
 19 | running the tests. It is told how to do so in the `tox.ini` file. Then, it tests the
 20 | code using `pytest`. You can see this in the `tox.ini` file, under the `[testenv]`
 21 | block:
 22 | ```
 23 | commands =
 24 |     mypy src tests
 25 |     pytest {env:PYTEST_MARKERS:} {env:PYTEST_EXTRA_ARGS:} {posargs:-vv}
 26 |     coverage: genbadge coverage -i coverage.xml    
 27 | ```
 28 | Essentially this looks for any file named `test_{...}.py` in the
 29 | `Rxn-insight/tests/` directory and executes the code within them. You should see an
 30 | output in the terminal like this:
 31 | 
 32 | ```
 33 | tests/test_classification.py::test_initialization PASSED
 34 | tests/test_classification.py::test_get_template_smiles PASSED
 35 | tests/test_import.py::test_import PASSED
 36 | ```
 37 | 
 38 | Open the `tests/test_classification.py` file and inspect the tests. What is the code
 39 | doing and why do the tests pass?
 40 | 
 41 | In the case of the function `test_initialization` in `test_classification.py`, a
 42 | `ReactionClassifier` object is initialized with a reaction SMILES string. If the code
 43 | runs without error, the test is considered *passed*.
 44 | 
 45 | What if we pass an invalid SMILES string? A good test could be that an appropriate error
 46 | message is raised. 
 47 | 
 48 | To the file `test_classification.py`, add `import pytest` to the top of the file, as the
 49 | first import. Copy the following unit test into the test file:
 50 | 
 51 | ```py
 52 | def test_initialization_error():
 53 |     """
 54 |     Tests that the appropriate error is raised when initializing the
 55 |     ReactionClassifier class with an invalid reaction SMILES
 56 |     """
 57 |     rxn_smiles_with_atom_mapping = "invalid_smiles"
 58 | 
 59 |     ReactionClassifier(rxn_smiles_with_atom_mapping, keep_mapping=True)
 60 | ```
 61 | 
 62 | and re-run the tests.
 63 | 
 64 | Inspect the output. The test fails because the code raises an error. This is what we
 65 | want: passing the string `"invalid_smiles"` as the reaction SMILES should raise an
 66 | error. However the test should still pass, so we need to modify the test to tell pytest
 67 | that we **expect** an error. 
 68 | 
 69 | 
 70 | Modify the test `test_initialization_error` to ***catch*** the
 71 | exception with the `pytest.raises` context manager:
 72 | ```py
 73 | with pytest.raises(ValueError) as e:
 74 |     ReactionClassifier(rxn_smiles_with_atom_mapping, keep_mapping=True)
 75 | assert str(e.value).startswith("Invalid reaction SMILES")
 76 | ```
 77 | Where we check that the appropriate error message is raised by asserting that the
 78 | message starts with `"Invalid reaction SMILES"`. If you inspect module
 79 | `classification.py` in the source code, you will see in the constructor of the class
 80 | (i.e. the `__init__()` method of `ReactionClassifier`) where this error is raised.
 81 | 
 82 | ## Coverage reports
 83 | 
 84 | When writing a series of tests for code, it can be useful to know how much of the code
 85 | is 'touched' (or ***covered***) by the tests. Ideally, tests would cover as much of the
 86 | relevant source code (i.e. in `Rxn-insight/src/`) as possible to ensure that the code is
 87 | working properly.
 88 | 
 89 | We can run the tests again, but generate a coverage report in the process to check this.
 90 | 
 91 | Run the command `tox -e py3-coverage` and inspect the output. The bit relevant to
 92 | coverage here is:
 93 | ```
 94 | Name                                Stmts   Miss  Cover
 95 | -------------------------------------------------------
 96 | src/rxn_insight/__init__.py             0      0   100%
 97 | src/rxn_insight/classification.py    1002    800    20%
 98 | src/rxn_insight/reaction.py           318    318     0%
 99 | src/rxn_insight/representation.py      22     22     0%
100 | src/rxn_insight/utils.py              405    243    40%
101 | -------------------------------------------------------
102 | TOTAL                                1747   1383    21%
103 | Coverage XML written to file coverage.xml
104 | ```
105 | 
106 | This tells us how much of each file in `src/` is covered by the tests. As we can see,
107 | the tests that have been provided cover some of the code in the `classification.py`
108 | module (and by
109 | extension `utils.py`, as these utility functions are used in `classification.py`), but
110 | not the other modules.
111 | 
112 | 
113 | ### A note on test-driven development
114 | 
115 | In our case, we are retrospectively writing tests for code that has already been
116 | written. However, when building your owm project you can use the principle of
117 | ***'test-driven development'*** (read more here:
118 | https://en.wikipedia.org/wiki/Test-driven_development) to help you design your codebase.
119 | In essence, you can write a series of tests that capture the functionality of your
120 | package from the user's perspective, including tests that check the code runs as
121 | expected, with expected outputs and without error, as well as those that run with
122 | expected errors. Then you write the actual source code such that all the tests pass.
123 | This can be useful in writing well-designed software, and naturally leads you to write
124 | code that has a high coverage.
125 | 
126 | **A caveat to coverage**: something to be aware of, for your own code and for others', is
127 | that a high coverage score doesn't necessarily mean the code is well tested. Tests could
128 | cover a large part of the codebase by just running class methods, without testing the
129 | outputs of these methods. Tests are a tool to build well-designed code, and a high
130 | coverage score should be a consequence of well-designed tests instead of a metric to be
131 | maximised in isolation.
132 | 
133 | 
134 | # Main exercise
135 | 
136 | Now for the main exercise of today. This is purposefully left open-ended so you
137 | have space to think about code design, functionality, and user experience. 
138 | 
139 | **The aim is simple**: get the code coverage as high as possible, ideally > 80%, by
140 | writing a series of ***well-designed*** tests.
141 | 
142 | Some general advice to help you in the process:
143 | 
144 | * Each python module has its own test module. For instance, currently we have
145 |   `tests/test_classification.py` for the module `src/rxn_insight/classification.py`.
146 | * Each unit test (i.e. the test function, such as `test_initialization` in
147 |   `test_classification.py`) should be short and test **one aspect** of the
148 |   functionality. If a test fails, it should be easy to debug as it ideally will not be
149 |   ambiguous what part of the code is being tested.
150 | * Keep unit test names descriptive, and write short docstrings for the tests. This helps
151 |   others (or you later on) read, improve, and add to your tests more easily.
152 | * Make sure you understand the functionality of the package you are trying to test. Open
153 |   a notebook in jupyter lab and play around with the classes and functions. This will
154 |   help you to understand what should work, and hopefully what shouldn't, and get you
155 |   thinking about appropriate tests.
156 | * Usually good tests won't just test that code runs without error, but check the output
157 |   of the code too. Conditional checks can be useful for this: i.e. `assert`ing that
158 |   output is equal to some expected output.
159 | * In this exercise, fancy tests aren't required. However, so that you're aware, `pytest`
160 |   has a lot of nice functionality to help write more complex tests without creating a
161 |   lot of messy test code. More can be read here:
162 |   https://docs.pytest.org/en/7.1.x/how-to/index.html . Examples include the use of
163 |   fixtures, parametrization, and doctests.
164 | 
165 | 
166 | ### Need help getting started? 
167 | 
168 | First, you can try to increase the coverage of tests for the module `classification.py`.
169 | A good way to do this is to test the class methods, such as
170 | `get_functional_group_smarts`. Remember: we don't just want to test that the code runs,
171 | but that the outputs are as expected.
172 | 
173 | Next, move on to the other modules. You could start with `reaction.py`:
174 | 
175 | 1. Open the file `reaction.py` and inpect the code. What does the `Reaction` class do?
176 |    What class methods are there and what do they do?
177 | 2. Open a jupyter notebook and import the `Reaction` class. Initialize a `Reaction`
178 |    object with the appropriate parameters and try a few of the class methods.
179 | 3. Start with writing a test for the initialization of the object, as was done for
180 |    `classification.py`, then move onto testing the class methods.
181 | 


--------------------------------------------------------------------------------
/Lecture10/README.md:
--------------------------------------------------------------------------------
1 | # Lecture 10
2 | 
3 | Welcome to lecture 10! This week is divided into 2 parts, the details of which are in the
4 | corresponding READMEs.
5 | 


--------------------------------------------------------------------------------
/Lecture11/README.md:
--------------------------------------------------------------------------------
1 | # Lecture 11: Setup
2 | 
3 | Welcome to lecture 11! Today, we will learn some ways to visualize chemical data. This is a very important step in any cheminformatics project, as it allows us to understand the data and the impact it might have on models or tools we built using that data.
4 | 
5 | **Happy coding** :star_struck:


--------------------------------------------------------------------------------
/Lecture12/12_exercises.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Streamlit\n",
  8 |     "\n",
  9 |     "Welcome to streamlit, a free, open-source framework that allows you to build interactive web apps. We can use it to share data or machine learning models with a general audience. Take good notes! Streamlit is a great resource for your projects.\n",
 10 |     "\n",
 11 |     "\n",
 12 |     "Today we'll introduce you to some basic streamlit functionalities, but this is just the tip of the iceberg. Here are some resources if you're excited and want to learn more: \n",
 13 |     "\n",
 14 |     "1. https://docs.streamlit.io/develop/quick-reference/cheat-sheet\n",
 15 |     "2. https://www.youtube.com/watch?v=vIQQR_yq-8I\n",
 16 |     "\n",
 17 |     "## Our Goal\n",
 18 |     "We are going to build an app to compare a user-inputted molecule to existing drug molecules. \n",
 19 |     "It will have the following functionalities: \n",
 20 |     "1. A graphical interface to draw a molecule\n",
 21 |     "2. A 3D visualization of that molecule\n",
 22 |     "3. A dashboard displaying the Lipinski descriptors for the molecule (https://en.wikipedia.org/wiki/Lipinski%27s_rule_of_five)\n",
 23 |     "4. A tool to search a database of approved drugs for the most similar molecules\n",
 24 |     "5. A series of plots that compare our molecule's properties to the distribution of drug molecules\n",
 25 |     "\n",
 26 |     "## How to use these exercises\n",
 27 |     "Our app will be run from the ```app.py``` file. However, it's harder to troubleshoot from there. Write the necessary functions in this notebook and test them here. When they're ready (with good error handling), paste them into the ```app.py``` file which already has the scaffold for your app. "
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "### Text \n",
 35 |     "\n",
 36 |     "We assemble our webpage by piecing together individual widgets. Let's start with text. Transfer the following to the ```app.py``` file. \n",
 37 |     "\n",
 38 |     "```\n",
 39 |     "st.title('Are you drug like?!')\n",
 40 |     "st.caption(\"Practical Proramming In Chemistry Week 11\")\n",
 41 |     "st.markdown(\"Draw a molecule and see how it stacks up against known drugs\")\n",
 42 |     "```\n",
 43 |     "\n",
 44 |     "In your terminal, type ```streamlit run app.py```. Note that you must be in the folder with the app.py file for this to work. It should launch a browser where you can see your app. \n",
 45 |     "\n",
 46 |     "Hit the three dots in the upper right corner of the app. Go to settings, and check the box that says \"Run on Save\". This means that every time you save your file, the app will update automatically. "
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "### Widgets\n",
 54 |     "Streamlit offers a huge variety of widgets for user interaction. One such widget is a slider (but there are many many more, check the cheatsheet)! Add this code to your ```app.py``` file and save. \n",
 55 |     "\n",
 56 |     "```\n",
 57 |     "st.sidebar.markdown('# Options')\n",
 58 |     "slider_val = st.sidebar.slider(\"Number of similar molecules\", 0, 10, 4)\n",
 59 |     "```\n",
 60 |     "\n",
 61 |     "When we change the slider in the app, the variable ```slider_val``` will change accordingly. "
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "### Ketcher\n",
 69 |     "The first main feature of the app is a ketcher interface, which allows us to draw a molecule. Luckily, other developers have made this super easy for us! All we need to do is use the ```streamlit_ketcher``` package. \n",
 70 |     "\n",
 71 |     "The ```st_ketcher()``` command creates the ketcher interface. The function can optionally take a smiles and a height. It returns the smiles of the molecule that the user draws, once the user hits the apply button. \n",
 72 |     "\n",
 73 |     "It may be useful to let the user draw a molecule from smiles. To do so, we will use the `st.text_input()` function. We add a default smiles.  \n",
 74 |     "\n",
 75 |     "```\n",
 76 |     "with st.expander(\"Draw Molecule From Smiles (optional)\"):\n",
 77 |     "    molecule = st.text_input(\"**Smiles**\", \"CCO\") # initially, molecule = \"\"CCO\"\n",
 78 |     "```\n",
 79 |     "\n",
 80 |     "We create the ketcher as follows: \n",
 81 |     "```ketcher_smiles = st_ketcher(molecule, height=600) ```\n",
 82 |     "\n",
 83 |     "When the user draws a molecule and hits apply, the variable `ketcher_smiles` will be updated accordingly. "
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "Write code to display the `ketcher_smiles` in an ```expander``` widget. "
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "# Your code here\n"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "Add the ketcher code to your app. "
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "### 3D visualization\n",
114 |     "It would be nice to see how our molecule looks in 3D. We can use the ```rdkit``` to generate a 3D structure of our molecule. Then, we can use ```stmol``` to visualize it in streamlit. \n",
115 |     "\n",
116 |     "Write a function that takes a smiles and outputs a string in the SDF format (in ```rdkit``` lingo, this is a ```MolBlock```). Make sure to include error handling, so that the function returns `None` if ```rdkit``` cannot generate a valid molecule. Test your function on a few cases, then tranfser it into the `app.py` file. "
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": null,
122 |    "metadata": {},
123 |    "outputs": [],
124 |    "source": [
125 |     "def generate_3D(smiles: str) -> str:\n",
126 |     "    \"Generate 3D coordinates from smiles in an SDF formatted string\"\n",
127 |     "    pass # your code here!! \n",
128 |     "    return mol_string"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "In the ```app.py``` file, we've provided you with a function to visualize the molecule in 3D from the SDF string. We've already created two columns using ```st.columns()```. In the first column, put a sensible title and caption. Use your function to generate 3D coordinates for the molecule, and then use the ```visualize_3D()``` function to show the molecule. Add error handling using ```st.error()``` to exit gracefully if the ```ketcher_smiles``` is invalid. "
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "markdown",
140 |    "metadata": {},
141 |    "source": [
142 |     "### Descriptors\n",
143 |     "In the second column, we want to compute descriptors related to Lipinski's rule of 5 (the molecular weight, logP, number of H bond donors, and the number of H bond acceptors) for our ```ketcher_smiles```. Write a function using rdkit that calculates these descriptors. Include appropriate error handling!! "
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "from typing import Tuple\n",
153 |     "def calculate_descriptors(smiles: str) -> tuple(float, int, int, float):\n",
154 |     "    \"Calculate Lipinski descriptors: molecular weight, H-bond donors, H-bond acceptors, and LogP\"\n",
155 |     "    pass # your code here!!\n",
156 |     "    return mw, hbd, hba, logp"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "metadata": {},
162 |    "source": [
163 |     "### Comparison to known drugs\n",
164 |     "\n",
165 |     "It would be nice to know how the Lipinski descriptors for our molecule compare to those of known drugs. To make this comparison, first we need to load our dataset from lecture 6 of known drugs. Write a function that returns a ```pd.DataFrame``` of the Lecture 6 chembl_drugs dataset. Once you're happy with your function, transfer it to the `app.py` file. "
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "def download_data():\n",
175 |     "    \"Download the ChEMBL database\"\n",
176 |     "    pass # your code here!!\n",
177 |     "    return df"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "metadata": {},
183 |    "source": [
184 |     "We need to calculate the Lipinski descriptors for our imported data. Later, we will want to do a similarity search, so we should also calculate the morgan fingerprints for each molecule. Write a function called ```modify_data(df)``` that takes a dataframe, removes any rows that have `NA` in the ```Smiles``` column, calculates the four Lipinski descriptors, and calculates the morgan fingerprints. Return the morgan fingerprints seperately, as streamlit can't save the dataframe with fingerprints in it. "
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "metadata": {},
191 |    "outputs": [],
192 |    "source": [
193 |     "from typing import List\n",
194 |     "import pandas as pd  # if not done already\n",
195 |     "\n",
196 |     "def modify_data(df: pd.DataFrame) -> Tuple[pd.DataFrame, List[str]]:\n",
197 |     "    \"Modify the ChEMBL dataframe to include Lipinski descriptors. Also get the Morgan fingerprints of each molecule.\"\n",
198 |     "    return modified_df, fingerprints"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "markdown",
203 |    "metadata": {},
204 |    "source": [
205 |     "Now that we have our functions, let's use them! Add \n",
206 |     "```\n",
207 |     "df = download_data()\n",
208 |     "df, fingerprints = modify_data(df)\n",
209 |     "``` \n",
210 |     "to the app.py file. "
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "markdown",
215 |    "metadata": {},
216 |    "source": [
217 |     "We will display the Lipinski descriptors using a the ```st.metric()``` widget. This widget is a nice way to depict data. We provide a title, a value, and a delta. Our delta in this case is difference between the descriptors for our molecule and the average for the drugs dataset. We will format all of this in containers, which add pleasant little bounding boxes. We can also add a success widget to show the user whether or not the input molecule passes all of lipinski's rule of 5. \n",
218 |     "\n",
219 |     "The outline of this code is already in the ```app.py``` file. Finish it for tiles 2-4.\n"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "markdown",
224 |    "metadata": {},
225 |    "source": [
226 |     "### Caching \n",
227 |     "Try changing the molecule in the app. See how long it takes for the app to recalulate descriptors? This is because every time we change the molecule, the entire app runs top to bottom. What this means is our expensive calculations (finding the lipinski descriptors and fingerprints) are rerun every time. This results in a fairly unpleasant user experience. To avoid it, we can use the streamlit decorator `@st.cache_data()`. This decorator makes it so that if the function is rerun with the same inputs, the function doesn't actually run, streamlit just returns the last cached values. In our case, as long as we don't change the input dataframe, our expensive calculations are only run once (at the beginning.) Add the ```@st.cache_data()``` decorator to the line right before the ```download_data()``` and ```modify_data()``` functions. Try changing the molecule now. Big improvement, right??"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "markdown",
232 |    "metadata": {},
233 |    "source": [
234 |     "### Similarity search\n",
235 |     "Now that we've got our dataset of known drugs, wouldn't it also be nice to see which drug in the database is most similar to our input molecule? Wouldn't it be nice if the user could choose how many hits pop up? \n",
236 |     "\n",
237 |     "Write a function that takes all of the fingerprints, the ```ketcher_smiles```, and the ```slider_val```, and returns a dataframe of the top-n most similar molecules. "
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": null,
243 |    "metadata": {},
244 |    "outputs": [],
245 |    "source": [
246 |     "def find_most_similar(fingerprints: List[str], ketcher_smiles: str, slider_val: int = 4) -> pd.DataFrame:\n",
247 |     "    \"Find the most similar molecules to the one drawn in Ketcher.\"\n",
248 |     "    pass # your code here!!\n",
249 |     "    return df"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "markdown",
254 |    "metadata": {},
255 |    "source": [
256 |     "We will display the top n hits using the package mols2grid, which allows us to visualize dataframes of molecules. Take a moment to read and understand the following snippet of code, then transfer it to the app.py file. \n",
257 |     "\n",
258 |     "```\n",
259 |     "most_similar_df = find_most_similar(fingerprints, ketcher_smiles, slider_val)\n",
260 |     "if most_similar_df is not None:\n",
261 |     "    if slider_val > 8: \n",
262 |     "        height = 600 # Dynamically adjust height for visualization\n",
263 |     "    elif slider_val > 4:\n",
264 |     "        height = 500\n",
265 |     "    else:\n",
266 |     "        height = 300\n",
267 |     "\n",
268 |     "    # use mols2grid to display our dataframe with molecules\n",
269 |     "    raw_html = mols2grid.display(most_similar_df, subset = [\"Name\", \"similarity\"],\n",
270 |     "                                smiles_col = \"Smiles\")._repr_html_()\n",
271 |     "    components.html(raw_html, height = height)\n",
272 |     "else:\n",
273 |     "    st.error('INVALID MOLECULE', icon=\"🚨\")\n",
274 |     "```"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "markdown",
279 |    "metadata": {},
280 |    "source": [
281 |     "### Interactive plots\n",
282 |     "Finally, we will do some graphing! Part of streamlit's appeal is that it lets you create beautiful, interactive plots easily. There are many interactive plotting libraries, but now we'll use ```plotly```. You were introduced to plotly yesterday, but for the kind of plots we want we'll use the ```plotly.figure_factory``` instead of ```plotly.express```. You can look [here](https://plotly.com/python/distplot/) for reference. We want a histogram, kernel density, and a rug plot (see Basic Distplot section) for each of the Lipinski descriptors. Add a vertical line so we can see how our molecule stacks up. \n"
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "code",
287 |    "execution_count": null,
288 |    "metadata": {},
289 |    "outputs": [],
290 |    "source": [
291 |     "# Use plotly to show the distribution for each lipinski descriptor\n",
292 |     "\n",
293 |     "# Add a vertical line where the user's molecule falls on the distribution"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "markdown",
298 |    "metadata": {},
299 |    "source": [
300 |     "Once your code is working, transfer it to ```app.py```. "
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "markdown",
305 |    "metadata": {},
306 |    "source": [
307 |     "### All Done!\n",
308 |     "Our app is now done! Now we have a beautiful, easy-to-use interface. And it wasn't even too hard to make. This is the tip of the iceberg and there are many excellent resources on line. "
309 |    ]
310 |   }
311 |  ],
312 |  "metadata": {
313 |   "kernelspec": {
314 |    "display_name": "ppchem",
315 |    "language": "python",
316 |    "name": "python3"
317 |   },
318 |   "language_info": {
319 |    "codemirror_mode": {
320 |     "name": "ipython",
321 |     "version": 3
322 |    },
323 |    "file_extension": ".py",
324 |    "mimetype": "text/x-python",
325 |    "name": "python",
326 |    "nbconvert_exporter": "python",
327 |    "pygments_lexer": "ipython3",
328 |    "version": "3.10.13"
329 |   }
330 |  },
331 |  "nbformat": 4,
332 |  "nbformat_minor": 2
333 | }
334 | 


--------------------------------------------------------------------------------
/Lecture12/README.md:
--------------------------------------------------------------------------------
 1 | # Streamlit 
 2 | 
 3 | Activate your *ppchem* conda environment and install the following packages. 
 4 | 
 5 | ```bash
 6 | pip install streamlit streamlit_ketcher stmol py3Dmol mols2grid scipy plotly
 7 | ```
 8 | 
 9 | **Happy coding** :star_struck:
10 | 


--------------------------------------------------------------------------------
/Lecture12/app.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | from rdkit import Chem
  3 | from rdkit.Chem import AllChem
  4 | from rdkit.Chem.Descriptors import ExactMolWt, MolLogP, NumHDonors, NumHAcceptors
  5 | from stmol import showmol
  6 | import py3Dmol
  7 | from pathlib import Path
  8 | import pandas as pd
  9 | import os
 10 | from streamlit_ketcher import st_ketcher
 11 | from rdkit.Chem import rdFingerprintGenerator
 12 | import numpy as np
 13 | import mols2grid
 14 | import streamlit.components.v1 as components
 15 | import plotly.figure_factory as ff
 16 | from typing import Tuple, List
 17 | 
 18 | # Put functions here 
 19 | def generate_3D(smiles):
 20 |     "Generate 3D coordinates from smiles"
 21 |     pass # your code here!! 
 22 | 
 23 | def visualize_3D(molstring):
 24 |     "Visualize the molecule in 3D using stmol"
 25 |     w, h = 400, 400
 26 |     xyzview = py3Dmol.view(width=w,height=w)
 27 |     xyzview.addModel(molstring,'mol')
 28 |     xyzview.setStyle({'sphere':{'colorscheme':'cyanCarbon', 'scale':0.25}, 'stick':{'colorscheme':'cyanCarbon'}})
 29 |     xyzview.zoomTo()
 30 |     xyzview.spin()
 31 |     xyzview.setBackgroundColor('white')
 32 |     showmol(xyzview, height = w,width=w)
 33 | 
 34 | def calculate_descriptors(smiles: str) -> Tuple[float, int, int, float]:
 35 |     "Calculate Lipinski descriptors: molecular weight, H-bond donors, H-bond acceptors, and LogP"
 36 |     pass # your code here!!
 37 |     return mw, hbd, hba, logp
 38 | 
 39 | def download_data():
 40 |     "Download the ChEMBL database"
 41 |     pass # your code here!!
 42 |     return df
 43 | 
 44 | def modify_data(df: pd.DataFrame) -> Tuple[pd.DataFrame, List[str]]:
 45 |     "Modify the ChEMBL database to include Lipinski descriptors. Also get the Morgan fingerprints of each molecule."
 46 |     return df, fingerprints
 47 | 
 48 | def find_most_similar(fingerprints: List[str], ketcher_smiles: str, slider_val: int = 4) -> pd.DataFrame:
 49 |     "Find the most similar molecules to the one drawn in Ketcher."
 50 |     pass # your code here!!
 51 |     return df
 52 | 
 53 | # Put title and caption here
 54 | 
 55 | # Put slider here
 56 | 
 57 | with st.expander("Draw Molecule From Smiles (optional)"):
 58 |     molecule = st.text_input("**Smiles**", "CCO") # initially, molecule = ""CCO"
 59 | # Put ketcher here
 60 | 
 61 | # Display ketcher smiles here
 62 | 
 63 | # Put dataset download here
 64 | 
 65 | # 3D coordinates
 66 | col1, col2 = st.columns([1, 1])
 67 | with col1:
 68 |     # Add header, caption 
 69 |     # Generate 3D coordinates
 70 |     if molstring is not None:
 71 |         # Visualize 3D coordinates
 72 |     else:
 73 |         # Add error message
 74 | 
 75 | 
 76 | # Lipinski Descriptors
 77 | with col2: 
 78 |     # Add header, caption
 79 |     # Calculate Lipinski descriptors
 80 |     if mw is None:
 81 |         # Add error message
 82 |     else:
 83 |         # Create formatting for metrics
 84 |         metric_row1 = st.columns(2)
 85 |         metric_row2 = st.columns(2)
 86 |         tile1 = metric_row1[0].container(height = 120) # Put a container in the first row, first column
 87 |         tile1.metric() # Title, value, delta) # Add a metric to that container
 88 |         # repeat for tiles 2-4
 89 | 
 90 |         # Lipinski's Rule of Five
 91 |         if # your code here!!:
 92 |             st.success("Passes All of Lipinski's Rules", icon="✅")
 93 |         else:
 94 |             st.error("Does not pass Lipinski's Rule of Five", icon="🚨")
 95 | 
 96 |         # Lipinski's Rule of Five explanation
 97 |         st.markdown("Lipinski's rule of five is a rule of thumb to evaluate the druglikeness of a molecule.")
 98 |         st.markdown("Lipinski's Rules:  \n1. \# H-Bond donors < 5 \n 2. \# H-bond acceptors < 10\n 3. MW < 500 daltons \n4. LogP < 5.") 
 99 |         st.markdown("As with any rule, there are many exceptions")
100 | 
101 | 
102 | # Display the top n most similar molecules
103 | 
104 | # Graph the distributions of molecular properties
105 | # Add sensible header and caption
106 | mw, hbd, hba, logp = calculate_descriptors(ketcher_smiles)
107 | if mw == None:
108 |     st.error('INVALID MOLECULE', icon="🚨")
109 | else: 
110 |     # Finish the followng
111 |     st.subheader("Molecular weight")
112 |     fig =  # your code here!!
113 |     st.plotly_chart(fig, use_container_width=True)
114 | 


--------------------------------------------------------------------------------
/Lecture12/solutions_app.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | from rdkit import Chem
  3 | from rdkit.Chem import AllChem
  4 | from rdkit.Chem.Descriptors import ExactMolWt, MolLogP, NumHDonors, NumHAcceptors
  5 | from stmol import showmol
  6 | import py3Dmol
  7 | from pathlib import Path
  8 | import pandas as pd
  9 | import os
 10 | from streamlit_ketcher import st_ketcher
 11 | from rdkit.Chem import rdFingerprintGenerator
 12 | import numpy as np
 13 | import mols2grid
 14 | import streamlit.components.v1 as components
 15 | import plotly.express as px
 16 | import plotly.figure_factory as ff
 17 | from typing import Tuple, List
 18 | 
 19 | # Load data
 20 | @st.cache_data()
 21 | def download_data():
 22 |     "Download the ChEMBL database"
 23 |     current_file = Path(os.path.abspath(''))
 24 |     csv_file = current_file.parent / "Lecture06" / "chembl_drugs.csv"
 25 |     df = pd.read_csv(csv_file, sep= ";")
 26 |     return df
 27 | 
 28 | @st.cache_data()
 29 | def modify_data(df: pd.DataFrame) -> Tuple[pd.DataFrame, List[str]]:
 30 |     "Modify the ChEMBL database to include Lipinski descriptors. Also get the Morgan fingerprints of each molecule."
 31 |     df = df.dropna(subset=['Smiles'])
 32 |     df['MW'], df['HBD'], df['HBA'], df['LogP'] = zip(*df['Smiles'].apply(calculate_descriptors))
 33 | 
 34 |     mfp = rdFingerprintGenerator.GetMorganGenerator(radius = 2, fpSize = 2048)
 35 |     fingerprints = df["Smiles"].apply(lambda x: mfp.GetFingerprint(Chem.MolFromSmiles(x)))
 36 |     return df, fingerprints
 37 | 
 38 | def calculate_descriptors(smiles: str) -> Tuple[float, int, int, float]:
 39 |     "Calculate Lipinski descriptors: molecular weight, H-bond donors, H-bond acceptors, and LogP"
 40 |     mol = Chem.MolFromSmiles(smiles)
 41 |     if mol is None:
 42 |         return None, None, None, None
 43 |     mw = ExactMolWt(mol)
 44 |     hbd = NumHDonors(mol)
 45 |     hba = NumHAcceptors(mol)
 46 |     logp = MolLogP(mol)
 47 |     return mw, hbd, hba, logp
 48 | 
 49 | def generate_3D(smiles):
 50 |     "Generate 3D coordinates from smiles"
 51 |     mol = Chem.MolFromSmiles(smiles)
 52 |     if mol is None:
 53 |         return None
 54 |     mol = Chem.AddHs(mol)
 55 |     params = AllChem.ETKDGv3()
 56 |     params.randomSeed = 42
 57 |     AllChem.EmbedMolecule(mol)
 58 |     molstring= Chem.MolToMolBlock(mol)
 59 |     return molstring
 60 | 
 61 | def find_most_similar(fingerprints: List[str], ketcher_smiles: str, slider_val: int = 4) -> pd.DataFrame:
 62 |     mfp = rdFingerprintGenerator.GetMorganGenerator(radius = 2, fpSize = 2048)
 63 |     mol = Chem.MolFromSmiles(ketcher_smiles)
 64 |     if mol is None:
 65 |         return None
 66 |     ketcher_mol_fp = mfp.GetFingerprint(Chem.MolFromSmiles(ketcher_smiles))
 67 |     df["similarity"] = [round(Chem.DataStructs.TanimotoSimilarity(x, ketcher_mol_fp), 4) for x in fingerprints]
 68 |     return df.sort_values("similarity", ascending = False).head(slider_val)
 69 | 
 70 | # Visualize molecule 
 71 | def visualize_3D(molstring):
 72 |     "Visualize the molecule in 3D using stmol"
 73 |     w, h = 400, 400
 74 |     xyzview = py3Dmol.view(width=w,height=w)
 75 |     xyzview.addModel(molstring,'mol')
 76 |     xyzview.setStyle({'sphere':{'colorscheme':'cyanCarbon', 'scale':0.25}, 'stick':{'colorscheme':'cyanCarbon'}})
 77 |     xyzview.zoomTo()
 78 |     xyzview.spin()
 79 |     xyzview.setBackgroundColor('white')
 80 |     showmol(xyzview, height = w,width=w)
 81 | 
 82 | 
 83 | # APP
 84 | # Title
 85 | st.title('Are you drug like?!')
 86 | st.caption("Practical Proramming In Chemistry Week 11")
 87 | st.markdown("Draw a molecule and see how it stacks up against known drugs")
 88 | 
 89 | # Ketcher
 90 | # Optional draw from smiles
 91 | with st.expander("Draw Molecule From Smiles (optional)"):
 92 |     molecule = st.text_input("**Smiles**", "CCO")
 93 | 
 94 | # Ketcher
 95 | ketcher_smiles = st_ketcher(molecule, height=600) 
 96 | 
 97 | with st.expander("Smiles from Drawing"):
 98 |     st.markdown(ketcher_smiles) # Initialize a placeholder within the expander
 99 | 
100 | 
101 | 
102 | # Download data, calculate descriptors and fingerprints
103 | df = download_data()
104 | df, fingerprints = modify_data(df)
105 | 
106 | # Sidebar
107 | st.sidebar.markdown('# Options')
108 | slider_val = st.sidebar.slider("Number of similar molecules", 0, 10, 4)
109 | 
110 | # 3D coordinates
111 | col1, col2 = st.columns([1, 1])
112 | with col1:
113 |     st.header("3D")
114 |     st.caption("Generate 3D coordinates for your molecule using rdkit's ETKDGv3 algorithm.")
115 |     molstring = generate_3D(ketcher_smiles)
116 |     if molstring is not None:
117 |         visualize_3D(molstring)
118 |     else:
119 |         st.error('INVALID MOLECULE', icon="🚨")
120 | 
121 | # Lipinski stats
122 | with col2: 
123 |     st.header("Lipinski Stats")
124 |     st.caption("How does your molecule compare to the average for FDA approved drugs?")
125 |     mw, hbd, hba, logp = calculate_descriptors(ketcher_smiles)
126 |     if mw is None:
127 |         st.error('INVALID MOLECULE', icon="🚨")
128 |     else:
129 |         # Add metrics to compare our molecule to the average drug molecule
130 |         metric_row1 = st.columns(2)
131 |         metric_row2 = st.columns(2)
132 |         tile1 = metric_row1[0].container(height = 120)
133 |         tile1.metric("Molecular weight", round(mw, 2), round(mw - df["MW"].mean(), 2))
134 |         tile2 = metric_row1[1].container(height = 120)
135 |         tile2.metric("LogP", round(logp, 2), round(logp - df["LogP"].mean(), 2))
136 |         tile3 = metric_row2[0].container(height = 120)
137 |         tile3.metric("H-Bond Donors", hbd, round(hbd - df["HBD"].mean(), 2))
138 |         tile4 = metric_row2[1].container(height = 120)
139 |         tile4.metric("H-Bond Acceptors", hba, round(hba - df["HBA"].mean(), 2))
140 | 
141 |         # Lipinski's Rule of Five
142 |         if mw < 500 and hbd < 5 and hba < 10 and logp < 5:
143 |             st.success("Passes All of Lipinski's Rules", icon="✅")
144 |         else:
145 |             st.error("Does not pass Lipinski's Rule of Five", icon="🚨")
146 | 
147 |         # Lipinski's Rule of Five explanation
148 |         st.markdown("Lipinski's rule of five is a rule of thumb to evaluate the oral availability of a molecule.")
149 |         st.markdown("Lipinski's Rules:  \n1. \# H-Bond donors < 5 \n 2. \# H-bond acceptors < 10\n 3. MW < 500 daltons \n4. LogP < 5.") 
150 |         st.markdown("As with any rule, there are many exceptions")
151 | 
152 | # Get the most similar molecules
153 | st.header("Most similar drug molecules")
154 | st.caption("Based on Tanimoto similarity of Morgan fingerprints")
155 | most_similar_df = find_most_similar(fingerprints, ketcher_smiles, slider_val)
156 | if most_similar_df is not None:
157 |     if slider_val > 8: 
158 |         height = 600 # Dynamically adjust height for visualization
159 |     elif slider_val > 4:
160 |         height = 500
161 |     else:
162 |         height = 300
163 | 
164 |     # use mols2grid to display our DataFrame with molecules
165 |     raw_html = mols2grid.display(most_similar_df, subset = ["Name", "similarity"],
166 |                                 smiles_col = "Smiles")._repr_html_()
167 |     components.html(raw_html, height = height)
168 | else:
169 |     st.error('INVALID MOLECULE', icon="🚨")
170 | 
171 | 
172 | # Graph the distribution of molecular properties
173 | st.header("Distribution of molecular properties")
174 | st.caption("How does your molecule compare to FDA approved drugs?")
175 | mw, hbd, hba, logp = calculate_descriptors(ketcher_smiles)
176 | if mw == None:
177 |     st.error('INVALID MOLECULE', icon="🚨")
178 | else: 
179 |     st.subheader("Molecular weight")
180 |     fig = ff.create_distplot([df["MW"]], ["MW"], bin_size = 100)
181 |     fig.add_vline(x = mw, line_dash="dash", line_color="lightgray", annotation_text="Your molecule")
182 |     st.plotly_chart(fig, use_container_width=True)
183 | 
184 |     st.subheader("H-Bond Donors")
185 |     fig = ff.create_distplot([df["HBD"]], ["HBD"], bin_size = 1)
186 |     fig.add_vline(x = hbd, line_dash="dash", line_color="lightgray", annotation_text="Your molecule")
187 |     st.plotly_chart(fig, use_container_width=True)
188 | 
189 |     st.subheader("H-Bond Acceptors")
190 |     fig = ff.create_distplot([df["HBA"]], ["HBA"], bin_size = 1)
191 |     fig.add_vline(x = hba, line_dash="dash", line_color="lightgray", annotation_text="Your molecule")
192 |     st.plotly_chart(fig, use_container_width=True)
193 | 
194 |     st.subheader("LogP")
195 |     fig = ff.create_distplot([df["LogP"]], ["LogP"], bin_size = 1)
196 |     fig.add_vline(x = logp, line_dash="dash", line_color="lightgray", annotation_text="Your molecule")
197 |     st.plotly_chart(fig, use_container_width=True)
198 | 
199 | 
200 | 
201 | 
202 | 
203 | 
204 | 
205 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Practical Programming in Chemistry
 2 | ==================================
 3 | 
 4 | Welcome to the repository for the Practical Programming in Chemistry exercises. Those exercises offers a comprehensive and hands-on introduction to computer programming, tailored specifically for chemists and chemical engineers. With a focus on Python, this course is designed to equip you with the programming skills necessary to tackle real-world chemical tasks.
 5 | 
 6 | This course is designed for individuals with little to no programming experience and focuses on applying programming concepts within the context of chemistry and chemical engineering. Through a series of lessons and hands-on exercises.
 7 | 
 8 | Our goal is to make programming accessible and relevant to chemists and chemical engineers, enabling you to automate tasks, analyze data, and enhance your research capabilities.
 9 | 
10 | Exercises
11 | ----------------
12 | 
13 | Below is a table linking to the exercise folders for each lecture. 
14 | Navigate to the relevant week to access the exercises.
15 | 
16 | | Lecture | Topic | Exercise Link |
17 | | --- | --- | --- |
18 | | 01 | Setup your environment | [Lecture01](Lecture01) |
19 | | 02 | GitHub and creating first repositories | [Lecture02](Lecture02) |
20 | | 03 | Conda, Jupyter notebooks, and Python basics | [Lecture03](Lecture03) |
21 | | 04 | Advanced Python: file I/O, functions, error handling, and classes. | [Lecture04](Lecture04) |
22 | | 05 | Numerical operations, data handling, data visualization: `numpy`, `pandas`, `matplotlib` |[Lecture05](Lecture05)  |
23 | | 06 | `RDKit` (part I): Reading/Writing, Descriptors, Fingerprints | [Lecture06](Lecture06) |
24 | | 07 | `RDKit` (part II): Substructure matching, Conformer generation | [Lecture07](Lecture07) |
25 | | 08 | Making a Python package | [Lecture08](Lecture08)|
26 | | 09 | Data Acquisition and Cleaning, Web APIs | [Lecture09](Lecture09) |
27 | | 10 | More packaging; project templates, code testing and coverage. | [Lecture10](Lecture10) |
28 | | 11 | Visualization and analysis of chemical data (clustering) |  [Lecture11](Lecture11)|
29 | | 12 | Streamlit |[Lecture12](Lecture12)  |
30 | | 13 |  |  |
31 | 
32 | We are currently updating last year's course. So, there might be upcoming changes in the schedule.
33 | 
34 | 
35 | **Happy coding!**
36 | 


--------------------------------------------------------------------------------
/assets/Lecture05/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/Lecture05/1.png


--------------------------------------------------------------------------------
/assets/Lecture05/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/Lecture05/2.png


--------------------------------------------------------------------------------
/assets/Lecture05/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/Lecture05/3.png


--------------------------------------------------------------------------------
/assets/Lecture05/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/Lecture05/4.png


--------------------------------------------------------------------------------
/assets/Lecture05/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/Lecture05/5.png


--------------------------------------------------------------------------------
/assets/Lecture05/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/Lecture05/6.png


--------------------------------------------------------------------------------
/assets/Lecture05/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/Lecture05/7.png


--------------------------------------------------------------------------------
/assets/Lecture05/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/Lecture05/8.png


--------------------------------------------------------------------------------
/assets/Lecture08/clone_drfp.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/Lecture08/clone_drfp.jpg


--------------------------------------------------------------------------------
/assets/PAT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/PAT.png


--------------------------------------------------------------------------------
/assets/git_bash_download.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/git_bash_download.png


--------------------------------------------------------------------------------
/assets/git_workflow.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/git_workflow.jpeg


--------------------------------------------------------------------------------
/assets/ipython.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/ipython.png


--------------------------------------------------------------------------------
/assets/kernel_env.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/kernel_env.png


--------------------------------------------------------------------------------
/assets/kernel_select.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/kernel_select.png


--------------------------------------------------------------------------------
/assets/python_env.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/python_env.png


--------------------------------------------------------------------------------
/assets/vscode_extensions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/vscode_extensions.png


--------------------------------------------------------------------------------
/assets/vscode_install_ipython.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/vscode_install_ipython.png


--------------------------------------------------------------------------------
/assets/vscode_python_env.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/vscode_python_env.png


--------------------------------------------------------------------------------
/assets/vscode_select_env.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/vscode_select_env.png


--------------------------------------------------------------------------------
/assets/vscode_select_kernel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/vscode_select_kernel.png


--------------------------------------------------------------------------------
/assets/windows_conda_path.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/schwallergroup/practical-programming-in-chemistry-exercises/74c0f5d976e72b78c76f567424b2a8fc1e542b5d/assets/windows_conda_path.png


--------------------------------------------------------------------------------