├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug-report.md
    │   ├── config.yml
    │   ├── enhancement-request.md
    │   └── project-request.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   ├── auto-comment-pr-raise.yml
    │   └── ci.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── docs
    ├── computer-vision
    │   ├── bicep-reps-counting.md
    │   ├── black-and-white-image-colorizer.md
    │   ├── brightness-control.md
    │   ├── face-detection.md
    │   └── index.md
    ├── contribute.md
    ├── data-visualization
    │   ├── bangladesh-premier-league-analysis.md
    │   └── index.md
    ├── deep-learning
    │   ├── anamoly-detection.md
    │   ├── brain-tumor-detection-model.md
    │   ├── index.md
    │   └── music-genre-classification-model.md
    ├── generative-adversarial-networks
    │   └── index.md
    ├── index.md
    ├── large-language-models
    │   └── index.md
    ├── machine-learning
    │   ├── air-quality-prediction.md
    │   ├── autism-detection.md
    │   ├── bulldozer-price-prediction.md
    │   ├── cardiovascular-disease-prediction.md
    │   ├── crop-recommendation.md
    │   ├── health-insurance-cross-sell-prediction.md
    │   ├── heart-disease-detection-model.md
    │   ├── index.md
    │   ├── poker-hand-prediction.md
    │   ├── sleep-quality-prediction.md
    │   └── used-cars-price-prediction.md
    ├── natural-language-processing
    │   ├── chatbot-implementation.md
    │   ├── email-spam-detection.md
    │   ├── index.md
    │   ├── name-entity-recognition.md
    │   ├── next-word-pred.md
    │   ├── text-summarization.md
    │   └── twitter-sentiment-analysis.md
    └── project-readme-template.md
└── mkdocs.yml


/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug Report 🐞
 3 | about: Reporting for any bug in the project description or algorithm.
 4 | title: '🐛: '
 5 | labels: ["Up-for-Grabs ✋", "bug 🐛"]
 6 | assignees:
 7 |    - ''
 8 | 
 9 | ---
10 | 
11 | :red_circle: **Title** : 
12 | :red_circle: **Bug** : 
13 | :red_circle: **Changes** :  <!-- Explain the approach to handle this bug. -->
14 | 
15 | ### Screenshots 📷
16 | <!-- Write N/A if not available-->
17 | 
18 | ***********************************************************************
19 | :white_check_mark: **To be Mentioned while taking the issue :**
20 | - Full name : 
21 | - What is your participant role? <!-- (Mention the Open Source Program name. Eg. Hacktoberfest, GSSOC, SSOC, JWOC, etc.) -->
22 | 
23 | ***********************************************************************
24 | Happy Contributing 🚀 
25 | 
26 | All the best. Enjoy your open source journey ahead. 😎
27 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | contact_links:
3 |   - name: Questions
4 |     url: https://discord.gg/tSqtvHUJzE
5 |     about: You can join the discussions on Discord.
6 |   - name: Login does not work
7 |     url: https://github.com/Avdhesh-Varshney/AI-Code/blob/main/README.md
8 |     about: Before opening a new issue, please make sure to read README.md
9 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/enhancement-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Enhancement Request 🧑‍💻
 3 | about: Improving any styling or documentation of the project/algorithm.
 4 | title: '🧑‍💻: '
 5 | labels: ["Up-for-Grabs ✋", "enhancement 🧑‍💻"]
 6 | assignees:
 7 |    - ''
 8 | 
 9 | ---
10 | 
11 | :red_circle: **Title** : 
12 | :red_circle: **Enhancement Aim** : 
13 | :red_circle: **Changes** : <!-- Enlist those changes you want to do. -->
14 | 
15 | ### Screenshots 📷
16 | <!-- Write N/A if not available-->
17 | 
18 | ***********************************************************************
19 | :white_check_mark: **To be Mentioned while taking the issue :**
20 | - Full name : 
21 | - What is your participant role? <!-- (Mention the Open Source Program name. Eg. Hacktoberfest, GSSOC, SSOC, JWOC, etc.) -->
22 | 
23 | ***********************************************************************
24 | Happy Contributing 🚀 
25 | 
26 | All the best. Enjoy your open source journey ahead. 😎
27 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/project-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Code/Project Addition Request 📜
 3 | about: Requesting for the new project/algorithm addition to contribute in this repository.
 4 | title: '📃: '
 5 | labels: 'Up-for-Grabs ✋'
 6 | assignees:
 7 |    - ''
 8 | 
 9 | ---
10 | 
11 | :red_circle: **Title** : 
12 | :red_circle: **Aim** : 
13 | :red_circle: **Brief Explanation** : 
14 | 
15 | ### Screenshots 📷
16 | <!-- Write N/A if not available-->
17 | 
18 | ***********************************************************************
19 | :white_check_mark: **To be Mentioned while taking the issue :**
20 | - Full name : 
21 | - What is your participant role? <!-- (Mention the Open Source Program name. Eg. Hacktoberfest, GSSOC, SSOC, JWOC, etc.) -->
22 | 
23 | ***********************************************************************
24 | Happy Contributing 🚀 
25 | 
26 | All the best. Enjoy your open source journey ahead. 😎
27 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | Closes: 
 2 | 
 3 | - **Title** : 
 4 | - **Your Name:** 
 5 | - **Idenitfy yourself:** <!-- Mention your role (like Hacktoberfest, GSSOC, SSOC, etc. Contributor) -->
 6 | 
 7 | ### Describe the add-ons or changes you've made 📃
 8 | 
 9 | > Give a clear description of what have you added or modifications made
10 | 
11 | 
12 | ### Checklist: ☑️
13 | <!--
14 | Example how to mark a checkbox:-
15 | - [x] My code follows the code style of this project.
16 | -->
17 | - [ ] My code follows the [Contributing Guidelines](https://github.com/Avdhesh-Varshney/AI-Code/blob/main/README.md) & [Code of Conduct](https://github.com/Avdhesh-Varshney/AI-Code/blob/main/CODE_OF_CONDUCT.md) of this project.
18 | - [ ] This PR does not contain plagiarized content.
19 | - [ ] I have performed a self-review of my own code.
20 | - [ ] I have commented my code, particularly wherever it was hard to understand.
21 | - [ ] My changes generate no new warnings.
22 | 
23 | ### Screenshots 📷
24 | <!-- Must add the screenshot of the project or your changes for review your pr -->
25 | 
26 | 
27 | ### Working Video 🎥
28 | <!-- Must add a video to showcase how your project page looks like -->
29 | 
30 | 
31 | ##### Happy Coding 🎉
32 | 


--------------------------------------------------------------------------------
/.github/workflows/auto-comment-pr-raise.yml:
--------------------------------------------------------------------------------
 1 | name: Auto Comment on PR
 2 | 
 3 | on:
 4 |   pull_request_target:
 5 |     types: [opened]
 6 | 
 7 | permissions:
 8 |   issues: write
 9 |   pull-requests: write
10 | 
11 | jobs:
12 |   comment:
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - name: Add Comment to Pull Request
17 |       run: |
18 |         COMMENT=$(cat <<EOF
19 |         {
20 |           "body": "Thank you for submitting your pull request! We'll review it as soon as possible. For further communication, join our discord server https://discord.gg/tSqtvHUJzE."
21 |         }
22 |         EOF
23 |         )
24 |         RESPONSE=$(curl -s -o response.json -w "%{http_code}" \
25 |           -X POST \
26 |           -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
27 |           -H "Accept: application/vnd.github.v3+json" \
28 |           https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments \
29 |           -d "$COMMENT")
30 |         cat response.json
31 |         if [ "$RESPONSE" -ne 201 ]; then
32 |           echo "Failed to add comment"
33 |           exit 1
34 |         fi
35 |       env:
36 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
37 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: ci
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master
 6 |       - main
 7 | permissions:
 8 |   contents: write
 9 | jobs:
10 |   deploy:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |       - name: Configure Git Credentials
15 |         run: |
16 |           git config user.name github-actions[bot]
17 |           git config user.email 41898282+github-actions[bot]@users.noreply.github.com
18 |       - uses: actions/setup-python@v5
19 |         with:
20 |           python-version: 3.x
21 |       - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
22 |       - uses: actions/cache@v4
23 |         with:
24 |           key: mkdocs-material-${{ env.cache_id }}
25 |           path: .cache
26 |           restore-keys: |
27 |             mkdocs-material-
28 |       - run: pip install mkdocs-material
29 |       - run: mkdocs gh-deploy --force
30 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Virtual environment files
 2 | */venv/
 3 | venv/
 4 | myenv/
 5 | etc/
 6 | Include/
 7 | Lib/
 8 | Scripts/
 9 | share/
10 | pyvenv.cfg
11 | __pycache__\
12 | 
13 | # Others extension files or caches
14 | *.ini
15 | .ipynb_checkpoints/
16 | 
17 | # Secret files
18 | kaggle.json
19 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | https://discord.gg/tSqtvHUJzE.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | <h1 align="center">Contributors Guide⚡ </h1>
 2 | <h3 align="center">Welcome to our open-source project! 😍<br> We appreciate your interest in contributing.😊 <br>This guide will help you get started with the project and make your first contribution.</h3>
 3 | 
 4 | --- 
 5 | 
 6 | <h1 align="center">Creating first Pull Request 🌟</h1>
 7 | 
 8 | ---
 9 | 1. Star this repository.
10 | 2. Fork this repository.
11 | 3. Clone the forked repository.
12 | ```css
13 | git clone https://github.com/<your-github-username>/AI-Code.git
14 | ```
15 |   
16 | 4. Navigate to the project directory.
17 | ```py
18 | cd AI-Code
19 | ```
20 | 5. Create a new branch.
21 | ```css
22 | git checkout -b <your_branch_name>
23 | ```
24 | 6. Make changes.
25 | 7. Stage your changes and commit
26 | ```css
27 | git add .
28 | git commit -m "<your_commit_message>"
29 | ```
30 | 8. Push your local commits to the remote repo.
31 | ```css
32 | git push -u origin <your_branch_name>
33 | ```
34 | 9. Create a Pull Request.
35 | 10. Congratulations! 🎉 you've made your contribution.
36 | 
37 | --- 
38 | 
39 | ### Communication and Support 💬
40 | - Join the project's communication channels to interact with other contributors and seek assistance.
41 | - If you have any questions or need help, don't hesitate to ask in the project's communication channels or comment on the relevant issue.
42 | 
43 | ### Code of Conduct 😇
44 | Please follow our project's code of conduct while contributing.</br>Treat all contributors and users with respect and create a positive and inclusive environment for everyone.
45 | 
46 | ### License 📄
47 | The project is licensed under ***MIT***. Make sure to review and comply with the license terms.</br>We hope this guide helps you get started with contributing to our open-source project. Thank you for your contribution!
48 | 
49 | ### Need more help?🤔
50 | 
51 | You can refer to the following articles on basics of Git and Github and also contact the Project Mentors, in case you are stuck:
52 | 
53 | - [Forking a Repo](https://help.github.com/en/github/getting-started-with-github/fork-a-repo)
54 | - [Cloning a Repo](https://help.github.com/en/desktop/contributing-to-projects/creating-an-issue-or-pull-request)
55 | - [How to create a Pull Request](https://opensource.com/article/19/7/create-pull-request-github)
56 | - [Getting started with Git and GitHub](https://towardsdatascience.com/getting-started-with-git-and-github-6fcd0f2d4ac6)
57 | - [Learn GitHub from Scratch](https://lab.github.com/githubtraining/introduction-to-github)
58 | 
59 | --- 
60 | 
61 | ### Note from Admin ❗
62 | 
63 | - We welcome contributions from everyone. However, please avoid spamming the repository with irrelevant issues & pull requests. We reserve the right to mark PRs as invalid if they are not relevant.
64 | 
65 | <div align="center">
66 |   <img src="https://media.giphy.com/media/LnQjpWaON8nhr21vNW/giphy.gif" width="60"> <em><b>I love connecting with different people</b> so if you want to say <b>hi, I'll be happy to meet you more!</b> :)</em>
67 | </div>
68 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Avdhesh Varshney
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h2>Hey <𝚌𝚘𝚍𝚎𝚛𝚜/>! 👋</h2>
  2 | 
  3 | [![Typing SVG](https://readme-typing-svg.demolab.com?font=Monoton&size=85&pause=12&speed=12&color=00FF00&center=true&vCenter=true&width=2000&height=200&lines=Hello+World!;Welcome+to+AI-Code!;Learn,+Build,+Contribute!;Master+AI+with+Hands-on+Projects!;Machine+Learning+to+LLMs!;Scratch+Code+for+Every+Algorithm!;Collaborate.+Innovate.+Inspire!;Your+AI+Journey+Starts+Here!)](https://git.io/typing-svg)
  4 | 
  5 | <div align="center">
  6 |   <p>
  7 |     <a href="https://www.buymeacoffee.com/avdheshvarshney">
  8 |       <img src="https://img.shields.io/badge/Buy%20Me%20a%20Coffee-ffdd00?style=for-the-badge&logo=buy-me-a-coffee&logoColor=black" />
  9 |     </a>
 10 |     <a href="https://discord.gg/tSqtvHUJzE">
 11 |       <img src="https://img.shields.io/badge/Discord-%235865F2.svg?style=for-the-badge&logo=discord&logoColor=white" />
 12 |     </a><br /><br />
 13 |     <img src="https://img.shields.io/github/repo-size/Avdhesh-Varshney/AI-Code" />
 14 |     <img src="https://img.shields.io/github/contributors/Avdhesh-Varshney/AI-Code" />
 15 |     <img src="https://img.shields.io/github/languages/count/Avdhesh-Varshney/AI-Code" />
 16 |     <img src="https://img.shields.io/github/stars/Avdhesh-Varshney/AI-Code" />
 17 |     <img src="https://img.shields.io/github/forks/Avdhesh-Varshney/AI-Code" />
 18 |     <img src="https://img.shields.io/github/last-commit/Avdhesh-Varshney/AI-Code" />
 19 |     <img src="https://img.shields.io/github/license/Avdhesh-Varshney/AI-Code" />
 20 |     <br />
 21 |     <img src="https://img.shields.io/github/issues-raw/Avdhesh-Varshney/AI-Code" />
 22 |     <img src="https://img.shields.io/github/issues-closed-raw/Avdhesh-Varshney/AI-Code" />
 23 |     <img src="https://img.shields.io/github/issues-pr-raw/Avdhesh-Varshney/AI-Code" />
 24 |     <img src="https://img.shields.io/github/issues-pr-closed-raw/Avdhesh-Varshney/AI-Code" />
 25 |   </p>
 26 | 
 27 |   <p>
 28 | 
 29 |   ![Python](https://img.shields.io/badge/python-3670A0?style=for-the-badge&logo=python&logoColor=ffdd54)
 30 |   ![Markdown](https://img.shields.io/badge/markdown-%23000000.svg?style=for-the-badge&logo=markdown&logoColor=white)
 31 |   ![Git](https://img.shields.io/badge/git-%23F05033.svg?style=for-the-badge&logo=git&logoColor=white)
 32 |   ![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white)
 33 |   ![Visual Studio Code](https://img.shields.io/badge/Visual%20Studio%20Code-0078d7.svg?style=for-the-badge&logo=visual-studio-code&logoColor=white)
 34 |   </p>
 35 | 
 36 |   <p>
 37 | 
 38 |   ![Statstics](https://img.shields.io/badge/Statistics-e67e22?style=for-the-badge)
 39 |   ![ML](https://img.shields.io/badge/ML-%23FF7F50.svg?style=for-the-badge)
 40 |   ![DL](https://img.shields.io/badge/DL-%23FF6347.svg?style=for-the-badge)
 41 |   ![NLP](https://img.shields.io/badge/NLP-%23706FD3.svg?style=for-the-badge)
 42 |   ![OpenCV](https://img.shields.io/badge/OpenCV-34495e?style=for-the-badge)
 43 |   ![GAN](https://img.shields.io/badge/GAN-%23FF69B4.svg?style=for-the-badge)
 44 |   ![LLM](https://img.shields.io/badge/LLM-%238E44AD.svg?style=for-the-badge)
 45 |   ![AI](https://img.shields.io/badge/AI-%234A90E2.svg?style=for-the-badge)
 46 |   </p>
 47 | 
 48 | </div>
 49 | 
 50 | ---
 51 | 
 52 | #### :zap: About AI Code 🌟
 53 | 
 54 | **AI Code** is an open-source initiative designed to make learning **Artificial Intelligence (AI)** more accessible, structured, and hands-on. Whether you're a beginner or an experienced developer, AI-Code provides **scratch implementations** of various **AI algorithms** alongside **real-world project guides**, helping you bridge the gap between theory and practice.
 55 | 
 56 | <details>
 57 | <summary><h4>:zap: Core Features 🔑</h4></summary>
 58 | 
 59 | - Scratch-level implementations of **AI algorithms** 🧠
 60 | - **Guides**, datasets, research papers, and **step-by-step tutorials** 📘
 61 | - Clear directories with focused **README** files 📂
 62 | - Fast learning with minimal complexity 🚀
 63 | 
 64 | </details>
 65 | 
 66 | <details>
 67 | <summary><h4>:zap: Setup the Project 🍱</h4></summary>
 68 | 
 69 | 1. Go through the [Contributing Guidelines](./CONTRIBUTING.md) to fork and clone the project.
 70 | 2. After forking and cloning the project in your local system:
 71 |    - Create a virtual environment:
 72 |      ```bash
 73 |      python -m venv myenv
 74 |      ```
 75 |    - Activate the virtual environment:
 76 |      - On Windows:
 77 |        ```bash
 78 |        myenv\Scripts\activate
 79 |        ```
 80 |      - On macOS/Linux:
 81 |        ```bash
 82 |        source myenv/bin/activate
 83 |        ```
 84 |    - Install the required Python package:
 85 |      ```bash
 86 |      pip install mkdocs-material
 87 |      ```
 88 | 3. After installing the package, run the following command to start the development server:
 89 |    ```bash
 90 |    mkdocs serve
 91 |    ```
 92 | 4. Open the local server URL (usually `http://127.0.0.1:8000`) in your browser. You are now ready to work on the project.
 93 | 
 94 | </details>
 95 | 
 96 | <details>
 97 | <summary><h4>:zap: Important Points to remember while submitting your work 📍</h4></summary>
 98 | 
 99 | > We want your work to be readable by others; therefore, we encourage you to note the following:
100 | 
101 | 1. File names should be in `kebab-case` letters (e.g., `music-genre-classification-model`, `insurance-cross-sell-prediction`).
102 | 2. Follow the [***PROJECT README TEMPLATE***](./docs/project-readme-template.md) and [***ALGORITHM README TEMPLATE***](./docs/algorithm-readme-template.md) for refrence.
103 | 3. Do not upload images or video files directly. Use a GitHub raw URL in the documentation.
104 | 4. Upload your notebook to Kaggle, make it public, and share the Kaggle embed link only. Other links are not accepted.
105 | 5. Limit commits to 3-4 unless given permission by project Admins or Mentors.
106 | 6. Keep commit messages clear and relevant; avoid unnecessary details.
107 | 
108 | </details>
109 | 
110 | <details>
111 | <summary><h4>:zap: Pull Requests Review Criteria 🧲</h4></summary>
112 | 
113 | 1. It must required to follow mentioned [do/don't](https://github.com/Avdhesh-Varshney/AI-Code/issues/9) guidelines.
114 | 2. Please fill the ***PR Template*** properly while making a Pull Request.
115 | 3. Do not commit directly to the `main` branch, or your PR will be instantly rejected.
116 | 4. Ensure all work is original and not copied from other sources.
117 | 5. Add comments to your code wherever necessary for clarity.
118 | 6. Include a working video and show integration with `AI-Code MkDocs Documentation` website as part of your PR.
119 | 7. For frontend updates, share screenshots and work samples before submitting a PR.
120 | 
121 | </details>
122 | 
123 | --- 
124 | 
125 | <div align="center">
126 | 
127 | ### ❄️ Open Source Programs
128 | 
129 | <table>
130 |    <tr align="center">
131 |     <td align="center">
132 |       <div>
133 |         <img src="https://github.com/user-attachments/assets/95ba44b4-016c-47ce-9285-2571562fabff" height=100px />
134 |         <p><sub><b>SSOC</b></sub></p>
135 |         <a href="https://hack2skill.com/hack/ssoc"><b>2024</b></a>
136 |       </div>
137 |     </td>
138 |     <td align="center">
139 |       <div>
140 |         <img src="https://github.com/user-attachments/assets/332f72de-90eb-4749-a013-6bbe1897d440" height=100px />
141 |         <p><sub><b>VSOC</b></sub></p>
142 |         <a href="https://www.vsoc.tech/"><b>2024</b></a>
143 |       </div>
144 |     </td>
145 |     <td align="center">
146 |       <div>
147 |         <img src="https://github.com/user-attachments/assets/4d9fea63-34df-48ac-a33a-f5a8c333b191" height=100px />
148 |         <p><sub><b>KWOC</b></sub></p>
149 |         <a href="https://kwoc.kossiitkgp.org/"><b>2024</b></a>
150 |       </div>
151 |     </td>
152 |     <td align="center">
153 |       <div>
154 |         <img src="https://github.com/user-attachments/assets/b2399ecc-a3d7-4ad5-acbb-87fb46477cae" height=100px />
155 |         <p><sub><b>IWOC</b></sub></p>
156 |         <a href="https://iwoc3.live/"><b>2025</b></a>
157 |       </div>
158 |     </td>
159 |     <td align="center">
160 |       <div>
161 |         <img src="https://github.com/user-attachments/assets/f2832e3c-f8a4-4b99-b4a1-934a9ee88c5a" height=100px />
162 |         <p><sub><b>SWOC</b></sub></p>
163 |         <a href="https://www.socialwinterofcode.com/"><b>2025</b></a>
164 |       </div>
165 |     </td>
166 |    </tr>
167 |    <tr>
168 |     <td align="center">
169 |       <div>
170 |         <img src="https://github.com/user-attachments/assets/728db452-1ce6-42d7-9dd4-4d4ba8dad90c" height=100px />
171 |         <p><sub><b>DWOC</b></sub></p>
172 |         <a href="https://dwoc.io/"><b>2025</b></a>
173 |       </div>
174 |     </td>
175 |    </tr>
176 | </table>
177 | 
178 | ### ✨ Our Valuable Contributors
179 | 
180 | <a href="https://github.com/Avdhesh-Varshney/AI-Code/graphs/contributors">
181 |   <img src="https://contrib.rocks/image?repo=Avdhesh-Varshney/AI-Code&&max=1000" />
182 | </a>
183 | 
184 | ![Line](https://github.com/Avdhesh-Varshney/WebMasterLog/assets/114330097/4b78510f-a941-45f8-a9d5-80ed0705e847)
185 | 
186 | # Tip from us 😇 
187 | ##### It always takes time to understand and learn. So, don't worry at all. We know <b>you have got this</b>! 💪 
188 | ### Show some &nbsp;❤️&nbsp; by &nbsp;🌟&nbsp; this repository! 
189 | 
190 | </div>
191 | 
192 | <a href="#top"><img src="https://img.shields.io/badge/⬆-Back%20to%20Top-red?style=for-the-badge" align="right"/></a>
193 | 


--------------------------------------------------------------------------------
/docs/computer-vision/bicep-reps-counting.md:
--------------------------------------------------------------------------------
  1 | # Counting Bicep Reps
  2 | 
  3 | 
  4 | ### AIM 
  5 | To track and count bicep curls in real time using computer vision techniques with OpenCV and Mediapipe's Pose module.
  6 | 
  7 | ### DATASET LINK 
  8 | This project does not use a specific dataset as it works with real-time video from a webcam.
  9 | 
 10 | 
 11 | ### NOTEBOOK LINK 
 12 | [https://drive.google.com/file/d/13Omm8Zy0lmtjmdHgfQbraBu3NJf3wknw/view?usp=sharing](https://drive.google.com/file/d/13Omm8Zy0lmtjmdHgfQbraBu3NJf3wknw/view?usp=sharing)
 13 | 
 14 | 
 15 | ### LIBRARIES NEEDED
 16 | 
 17 | ??? quote "LIBRARIES USED"
 18 | 
 19 |     - OpenCV
 20 |     - Mediapipe
 21 |     - NumPy
 22 | 
 23 | --- 
 24 | 
 25 | ### DESCRIPTION 
 26 | 
 27 | !!! info "What is the requirement of the project?"
 28 |     - The project aims to provide a computer vision-based solution for tracking fitness exercises like bicep curls without the need for wearable devices or sensors. 
 29 | 
 30 | ??? info "Why is it necessary?"
 31 |     - Helps fitness enthusiasts monitor their workouts in real time.
 32 |     - Provides an affordable and accessible alternative to wearable fitness trackers.
 33 | 
 34 | ??? info "How is it beneficial and used?"
 35 |     - Real-time feedback on workout form and repetition count.
 36 |     - Can be extended to other fitness exercises and integrated into fitness apps
 37 | 
 38 | ??? info "How did you start approaching this project? (Initial thoughts and planning)"
 39 |     - Explored Mediapipe's Pose module for pose landmark detection.
 40 |     - Integrated OpenCV for video frame processing and real-time feedback.
 41 |     - Planned the logic for detecting curls based on elbow angle thresholds.
 42 | 
 43 | ??? info "Mention any additional resources used (blogs, books, chapters, articles, research papers, etc.)."
 44 |     - Mediapipe official documentation.
 45 |     - OpenCV tutorials on video processing. 
 46 | 
 47 | 
 48 | --- 
 49 | 
 50 | ### EXPLANATION 
 51 | 
 52 | #### DETAILS OF THE DIFFERENT FEATURES 
 53 |     - Pose Estimation: Utilized Mediapipe's Pose module to detect key landmarks on the human body.
 54 |     - Angle Calculation: Calculated angles at the elbow joints to determine curl movement.
 55 |     - Rep Tracking: Incremented rep count when alternating between full curl and relaxed positions.
 56 |     - Real-Time Feedback: Displayed the remaining curl count on the video feed.
 57 | 
 58 | 
 59 | --- 
 60 | 
 61 | #### PROJECT WORKFLOW 
 62 | 
 63 | === "Step 1"
 64 |     Initial setup:
 65 | - Installed OpenCV and Mediapipe.
 66 | - Set up a webcam feed for video capture.  
 67 | 
 68 | 
 69 | === "Step 2"
 70 |     Pose detection:
 71 | - Used Mediapipe's Pose module to identify body landmarks.
 72 | 
 73 | 
 74 | === "Step 3"
 75 |     Angle calculation:
 76 | - Implemented a function to calculate the angle between shoulder, elbow, and wrist.
 77 | 
 78 | 
 79 | === "Step 4"
 80 |     Rep detection:
 81 | - Monitored elbow angles to track upward and downward movements.
 82 | 
 83 | 
 84 | === "Step 5"
 85 |     Real-time feedback:
 86 | - Displayed the remaining number of curls on the video feed using OpenCV.
 87 | 
 88 | 
 89 | === "Step 6"
 90 |     Completion:
 91 | - Stopped the program when the target reps were completed or on manual exit.
 92 | 
 93 | 
 94 | --- 
 95 | 
 96 | #### PROJECT TRADE-OFFS AND SOLUTIONS 
 97 | 
 98 | === "Trade Off 1"
 99 |     - Accuracy vs. Simplicity:
100 |         - Using elbow angles alone may not handle all body postures but ensures simplicity.
101 |         - Solution: Fine-tuned angle thresholds and added tracking for alternating arms.
102 | 
103 | === "Trade Off 2"
104 |     - Real-Time Performance vs. Model Complexity:
105 |         - Mediapipe's lightweight solution ensured smooth processing over heavier models.
106 | 
107 | --- 
108 | 
109 | ### SCREENSHOTS 
110 | 
111 | 1. Entering no of reps you want to perform
112 |    ![Screenshot 2025-01-19 184454](https://github.com/user-attachments/assets/afac56f4-c0ce-45ec-8f41-1b7effc02e5a)
113 | 
114 | 
115 | 3. Performing reps
116 |    ![Screenshot 2025-01-19 184607](https://github.com/user-attachments/assets/667b3e10-22b0-48a0-8e9b-42c3dcfc9f66)
117 | 
118 |     
119 | 
120 | !!! success "Project workflow"
121 | 
122 |     ```mermaid  
123 |     graph LR  
124 |     A[Webcam Feed] --> F[Enter No of Biceps Reps]
125 |     F --> B[Mediapipe Pose Detection]  
126 |     B --> C[Elbow Angle Calculation]  
127 |     C --> D[Rep Count Decrement]  
128 |     D --> E[Real-Time Update on Frsame]  
129 |     ```  
130 | 
131 | --- 
132 | 
133 | ### CONCLUSION 
134 | 
135 | #### KEY LEARNINGS 
136 | 
137 | !!! tip "Insights gained from the data"
138 |     - Real-time video processing using OpenCV.
139 |     - Pose detection and landmark analysis with Mediapipe.
140 | 
141 | ??? tip "Improvements in understanding machine learning concepts"
142 |     - Understanding geometric computations in pose analysis.
143 |     - Effective use of pre-trained models like Mediapipe Pose.
144 | 
145 | ??? tip "Challenges faced and how they were overcome"
146 |     - Challenge: Handling incorrect postures.
147 |         - Solution: Fine-tuning angle thresholds.
148 | 
149 | --- 
150 | 
151 | #### USE CASES
152 | === "Application 1"
153 | 
154 |     **Personal Fitness Tracker**  
155 |     - Helps users track their workouts without additional equipment.  
156 | 
157 | === "Application 2"
158 | 
159 |     **Fitness App Integration**  
160 |     - Can be integrated into fitness apps for real-time exercise tracking.  
161 | 


--------------------------------------------------------------------------------
/docs/computer-vision/black-and-white-image-colorizer.md:
--------------------------------------------------------------------------------
  1 | # Black and White Image Colorizer  
  2 | 
  3 | ### AIM 
  4 | Colorization of Black and White Images using OpenCV and pre-trained caffe models.
  5 | 
  6 | ### PRE-TRAINED MODELS
  7 | [colorization_deploy_v2.prototxt](https://github.com/richzhang/colorization/blob/caffe/models/colorization_deploy_v2.prototxt) - 
  8 | [colorization_release_v2.caffemodel](https://www.dropbox.com/s/dx0qvhhp5hbcx7z/colorization_release_v2.caffemodel?dl=1) - 
  9 | [pts_in_hull.npy](https://github.com/richzhang/colorization/blob/caffe/resources/pts_in_hull.npy)
 10 | 
 11 | ### NOTEBOOK LINK 
 12 | 
 13 | [Colab Notebook](https://colab.research.google.com/drive/1w5GbYEIsX41Uh8i_5q7c8Nh0y5UOpBGb)
 14 | 
 15 | ### LIBRARIES NEEDED 
 16 | 
 17 | ??? quote "LIBRARIES USED"
 18 | 
 19 |     - numpy
 20 |     - cv2
 21 | 
 22 | --- 
 23 | 
 24 | ### DESCRIPTION 
 25 | 
 26 | !!! info "What is the requirement of the project?"
 27 | 
 28 |     - The project aims to perform colorization of black and white images.
 29 |     - It involves in showcase the capabilities of OpenCV's DNN module and caffe models.
 30 |     - It is done by processing given image using openCV and use Lab Color space model to hallucinate an approximation of how colorized version of the image "might look like".
 31 | 
 32 | ??? info "Why is it necessary?"
 33 | 
 34 |     - It helps preserving historical black-and-white photos. 
 35 |     - It can be used adding color to grayscale images for creative industries.  
 36 |     - It acts an advancing computer vision applications in artistic and research fields.
 37 | 
 38 | ??? info "How is it beneficial and used?"
 39 | 
 40 |     - **Personal use :** It helps in restoring old family photographs.  
 41 |     - **Cultural and Political :** it also enhances grayscale photographs of important historic events for modern displays. 
 42 |     - **Creativity and Art  :** it improves AI-based creative tools for artists and designers.  
 43 | 
 44 | ??? info "How did you start approaching this project? (Initial thoughts and planning)"
 45 | 
 46 |     - **Initial approach** : reading various research papers and analyze different approaches on how to deal with this project.
 47 |     - Identified Richzhang research paper on the title : Colorful Image colorization.
 48 |     - Did some research on pre-trained models for image colorization.  
 49 |     - Understood OpenCV's DNN module and its implementation.  
 50 |     - Experimented with sample images to test model outputs. 
 51 | 
 52 | ??? info "Mention any additional resources used (blogs, books, chapters, articles, research papers, etc.)."
 53 |     
 54 |     - [Richzhang's Colorful Image Colorization](https://richzhang.github.io/colorization)
 55 |     - [Lab Color space](https://www.xrite.com/blog/lab-color-space)
 56 |     - [openCV Documentation ](https://pypi.org/project/opencv-python/)
 57 | 
 58 | --- 
 59 | 
 60 | ### EXPLANATION
 61 | 
 62 | #### WHAT I HAVE DONE 
 63 | 
 64 | === "Step 1"
 65 | 
 66 |     Initial data exploration and understanding:
 67 |     
 68 |       - Load the grayscale input image.
 69 |       - Load pre-trained caffe models using openCV dnn module.
 70 | 
 71 | === "Step 2"
 72 | 
 73 |     Data cleaning and preprocessing:
 74 |     
 75 |       - Preprocess image to normalize and convert to LAB color space.
 76 |       - Resize image for the network.
 77 |       - Split L channel and perform mean subtraction.
 78 |       - Predict ab channel from the input of L channel.
 79 | 
 80 | === "Step 3"
 81 | 
 82 |     Feature engineering and selection:
 83 |     
 84 |       - Resize predicted ab channel's volume to same dimension as our image.
 85 |       - Join L and predicted ab channel.
 86 |       - Convert image from Lab back to RGB.
 87 | 
 88 | === "Step 4"
 89 | 
 90 |     Result : 
 91 |     
 92 |     - Resize and Show the Original and Colorized image.
 93 | 
 94 | --- 
 95 | 
 96 | #### PROJECT TRADE-OFFS AND SOLUTIONS 
 97 | 
 98 | === "Trade Off 1"
 99 | 
100 |     - Computational efficiency vs. color accuracy.  
101 |     - **Solution : **Used optimized preprocessing pipelines to reduce runtime. 
102 | 
103 | === "Trade Off 2"
104 | 
105 |     - Pre-trained model generalization vs. custom training.  
106 |     - **Solution : **Choose the pre-trained model for faster implementation and reliable results.  
107 | 
108 | --- 
109 | 
110 | ### SCREENSHOTS 
111 | 
112 | !!! success "Project structure or tree diagram"
113 | 
114 |     ``` mermaid
115 |       graph LR  
116 |         A[Load Grayscale Image] --> B[Preprocess Image];  
117 |         B --> C[Load Pre-trained Model];  
118 |         C --> D[Predict A and B Channels];  
119 |         D --> E[Combine with L Channel];  
120 |         E --> F[Convert to RGB];  
121 |         F --> G[Display/Save Colorized Image];
122 |     ```
123 | 
124 | ??? tip "Visualizations of results"
125 | 
126 |     === "Original Image"
127 |         ![Original Image](https://github.com/user-attachments/assets/98a68022-eb8a-4e2e-b87a-edf5b8a392fa)
128 |     
129 |     === "Colorized Image"
130 |         ![Colorized Image](https://github.com/user-attachments/assets/181f585e-a2de-4bf5-aead-1c3a56ac7f8e)
131 | 
132 |     === "Result"
133 |         ![result](https://github.com/user-attachments/assets/6bc14754-e097-4093-95df-4826cd0bae85)
134 | 
135 | --- 
136 | 
137 | ### CONCLUSION 
138 | 
139 | #### KEY LEARNINGS 
140 | 
141 | !!! tip "Insights gained from the data"
142 | 
143 |     - **Color Space : **LAB color space facilitates colorization tasks.  
144 |     - **Pre-trained Models :** Pre-trained models can generalize across various grayscale images.
145 | 
146 | ??? tip "Improvements in understanding machine learning concepts"
147 | 
148 |     - **OpenCV : **Enhanced knowledge of OpenCV's DNN module.  
149 |     - **Caffe Models : **Usage of pre-trained models.
150 |     - **Image Dimensionality : **Understanding how Image can be manipulated.
151 | 
152 | ??? tip "Challenges faced and how they were overcome"
153 |     
154 |     - **Color Space Conversion : **Initial difficulties with LAB to RGB conversion; resolved using OpenCV documentation. 
155 | 
156 | --- 
157 | 
158 | #### USE CASES 
159 | 
160 | === "Application 1"
161 | 
162 |     **Image Restoration**
163 |     
164 |       - Restoring old family photographs to vivid colors.
165 | 
166 | === "Application 2"
167 | 
168 |     **Creative Industries**
169 |     
170 |       - Colorizing artistic grayscale sketches for concept designs.
171 | 


--------------------------------------------------------------------------------
/docs/computer-vision/brightness-control.md:
--------------------------------------------------------------------------------
  1 | # 📜 Brightness control  
  2 | <div align="center">
  3 |     <img src="https://user-images.githubusercontent.com/59369441/116009572-1542a280-a638-11eb-9d94-2a2d38b856a5.PNG" />
  4 | </div>
  5 | 
  6 | ## 🎯 AIM 
  7 | To develop a real-time brightness control system using hand gestures, leveraging OpenCV and MediaPipe for hand detection and brightness adjustment.
  8 | 
  9 | 
 10 | ## 📊 DATASET LINK 
 11 | No dataset used
 12 | 
 13 | ## 📓 NOTEBOOK LINK 
 14 | <!-- Provide the link to the notebook where you solved the project. It must be only Kaggle public notebook link. -->
 15 | [https://drive.google.com/file/d/1q7kraajGykfc2Kb6-84dCOjkrDGhIQcy/view?usp=sharing](https://drive.google.com/file/d/1q7kraajGykfc2Kb6-84dCOjkrDGhIQcy/view?usp=sharing)
 16 | 
 17 | 
 18 | ## ⚙️ TECH STACK
 19 | 
 20 | | **Category**             | **Technologies**                            |
 21 | |--------------------------|---------------------------------------------|
 22 | | **Languages**            | Python                          |
 23 | | **Libraries/Frameworks** | OpenCV, NumPy, MediaPipe, cvzone                    |
 24 | | **Tools**                | Jupyter Notebook, Local Python IDE               |
 25 | 
 26 | 
 27 | --- 
 28 | 
 29 | ## 📝 DESCRIPTION 
 30 | !!! info "What is the requirement of the project?"
 31 |     - The project requires a webcam to capture real-time video and detect hand gestures for brightness control.
 32 | 
 33 | ??? info "How is it beneficial and used?"
 34 |      - Allows users to control screen brightness without physical touch, making it useful for touchless interfaces. 
 35 |      - Ideal for applications in smart home systems and assistive technologies. 
 36 | 
 37 | ??? info "How did you start approaching this project? (Initial thoughts and planning)"
 38 |     - Identified the need for a touchless brightness control system. 
 39 |     - Selected OpenCV for video processing and MediaPipe for efficient hand tracking. 
 40 |     - Developed a prototype to calculate brightness based on hand distance. 
 41 | 
 42 | ??? info "Mention any additional resources used (blogs, books, chapters, articles, research papers, etc.)."
 43 |     - OpenCV documentation for video processing. 
 44 |     - MediaPipe's official guide for hand tracking. 
 45 | 
 46 | 
 47 | --- 
 48 | 
 49 | ## 🔍 EXPLANATION 
 50 | 
 51 | ### 🧩 DETAILS OF THE DIFFERENT FEATURES 
 52 | 
 53 | #### 🛠 Developed Features 
 54 | 
 55 | | Feature Name | Description | Reason   |
 56 | |--------------|-------------|----------|
 57 | | Hand Detection  | Detects hand gestures in real-time   | To control brightness with gestures |
 58 | | Distance Calculation    | Calculates distance between fingers   | To adjust brightness dynamically |
 59 | | Brightness Mapping    | Maps hand distance to brightness levels   | Ensures smooth adjustment of brightness |
 60 | 
 61 | 
 62 | --- 
 63 | 
 64 | ### 🛤 PROJECT WORKFLOW 
 65 | 
 66 | !!! success "Project workflow"
 67 | 
 68 |     ``` mermaid
 69 |       graph LR
 70 |     A[Start] --> B[Initialize Webcam];
 71 |     B --> C[Detect Hand Gestures];
 72 |     C --> D[Calculate Distance];
 73 |     D --> E[Adjust Brightness];
 74 |     E --> F[Display Output];
 75 |     ```
 76 | 
 77 | === "Step 1"
 78 | - Initialize the webcam using OpenCV.
 79 | 
 80 | 
 81 | === "Step 2"
 82 | - Use MediaPipe to detect hands in the video feed.
 83 | 
 84 | === "Step 3"
 85 | - Calculate the distance between two fingers (e.g., thumb and index).
 86 | 
 87 | === "Step 4"
 88 | - Map the distance to a brightness range.
 89 | 
 90 | === "Step 5"
 91 | - Display the adjusted brightness on the video feed.
 92 | 
 93 | --- 
 94 | 
 95 | ### 🖥 CODE EXPLANATION 
 96 | 
 97 | === "Section 1: Webcam Initialization" 
 98 | - The program begins by setting up the webcam to capture frames with a resolution of 640x480 pixels. This ensures consistent processing and visualization of the video stream.
 99 | 
100 |     ```python
101 |     cap = cv2.VideoCapture(0)  
102 |     cap.set(3, 640)  # Set width  
103 |     cap.set(4, 480)  # Set height 
104 |     ```
105 | 
106 | === "Section 2: Hand Detection and Brightness Control" 
107 | - Using the `HandDetector` from `cvzone`, the program tracks one hand (maxHands=1). The brightness of the video frame is dynamically adjusted based on the distance between the thumb and index finger.
108 | 
109 |     ```python
110 |     detector = HandDetector(detectionCon=0.8, maxHands=1)  
111 |     brightness = 1.0  # Default brightness level  
112 |     ```
113 | 
114 | - The HandDetector detects hand landmarks in each frame with a confidence threshold of 0.8. The initial brightness is set to 1.0 (normal).
115 | 
116 |     ```python
117 |     hands, img = detector.findHands(frame, flipType=False)  
118 | 
119 |     if hands:  
120 |         hand = hands[0]  
121 |         lm_list = hand['lmList']  
122 |         if len(lm_list) > 8:  
123 |             thumb_tip = lm_list[4]  
124 |             index_tip = lm_list[8]  
125 |             distance = int(((thumb_tip[0] - index_tip[0]) ** 2 + (thumb_tip[1] - index_tip[1]) ** 2) ** 0.5)  
126 |             brightness = np.interp(distance, [20, 200], [0, 1])  
127 |     ```
128 | 
129 | - The program calculates the distance between the thumb tip (`lmList[4]`) and index finger tip (`lmList[8]`). This distance is mapped to a brightness range of 0 to 1 using np.interp.
130 | 
131 | === "Section 3: Brightness Adjustment and Display " 
132 | 
133 | - The captured frame's brightness is modified by scaling the value (V) channel in the HSV color space according to the calculated brightness level.
134 | 
135 |     ```python
136 |     hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)  
137 |     hsv[..., 2] = np.clip(hsv[..., 2] * brightness, 0, 255).astype(np.uint8)  
138 |     frame_bright = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)  
139 |     cv2.imshow("Brightness Controller", frame_bright)  
140 |     ```
141 | 
142 | - This technique ensures smooth, real-time brightness adjustments based on the user's hand gestures. The output frame is displayed with the adjusted brightness level.
143 | 
144 | 
145 | --- 
146 | 
147 | ### ⚖️ PROJECT TRADE-OFFS AND SOLUTIONS 
148 | 
149 | 
150 | === "Trade Off 1" 
151 |     - Real-time processing vs. computational efficiency: Optimized hand detection by limiting the maximum number of detectable hands to 1.
152 | 
153 | 
154 | === "Trade Off 2" 
155 |     - Precision in brightness control vs. usability: Adjusted mapping function to ensure smooth transitions.
156 | 
157 | --- 
158 | 
159 | ## 🖼 SCREENSHOTS 
160 | ??? tip "Working of the model"
161 | 
162 |     === "Image Topic"
163 |     <img width="485" alt="image" src="https://github.com/user-attachments/assets/4b7ea465-a916-4592-9026-9bfa41711763" />
164 | 
165 | 
166 | 
167 | --- 
168 | 
169 | ## ✅ CONCLUSION 
170 | 
171 | ### 🔑 KEY LEARNINGS 
172 | !!! tip "Insights gained from the data" 
173 | - Improved understanding of real-time video processing. 
174 | - Learned to integrate gesture detection with hardware functionalities.
175 | 
176 | ??? tip "Improvements in understanding machine learning concepts" 
177 | - Gained insights into MediaPipe's efficient hand detection algorithms.
178 | 
179 | --- 
180 | 
181 | ### 🌍 USE CASES
182 | === "Smart Homes" 
183 | - Touchless brightness control for smart home displays.
184 | 
185 | === "Assistive Technologies" 
186 | - Brightness adjustment for users with limited mobility.
187 | 


--------------------------------------------------------------------------------
/docs/computer-vision/face-detection.md:
--------------------------------------------------------------------------------
  1 | # Face Detection
  2 | 
  3 | 
  4 | ### AIM 
  5 | The goal of this project is to build a face detection system using OpenCV, which identifies faces in static images using Haar Cascades.
  6 | 
  7 | 
  8 | ### DATASET LINK 
  9 | For this project we are going to use the pretrained Haar Cascade XML file for face detection from OpenCV's Github repository. 
 10 | 
 11 | [https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml](https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml)
 12 | 
 13 | 
 14 | ### NOTEBOOK LINK 
 15 | [https://colab.research.google.com/drive/1upcl9sa5cL5fUuVLBG5IVuU0xPYs3Nwf#scrollTo=94ggAdg5AnUk](https://colab.research.google.com/drive/1upcl9sa5cL5fUuVLBG5IVuU0xPYs3Nwf#scrollTo=94ggAdg5AnUk)
 16 | 
 17 | 
 18 | ### LIBRARIES NEEDED
 19 | 
 20 | ??? quote "LIBRARIES USED"
 21 | 
 22 |     - OpenCV
 23 |     - Random
 24 |     - Matplotlib
 25 | 
 26 | --- 
 27 | 
 28 | ### DESCRIPTION 
 29 | This project involves building a face detection model using OpenCV's pre-trained Haar Cascade Classifiers to detect faces in images.
 30 | 
 31 | !!! info "What is the requirement of the project?"
 32 |     - A face detection system is needed for various applications such as security, attendance tracking, and facial recognition systems.
 33 |     - This project demonstrates a basic use of computer vision techniques for detecting faces in static images.
 34 | 
 35 | 
 36 | ??? info "Why is it necessary?"
 37 |     - Face detection is the first crucial step in many computer vision applications such as face recognition and emotion analysis.
 38 |     -  It is an essential component in systems that require human identification or verification.
 39 | 
 40 | ??? info "How is it beneficial and used?"
 41 |     - Face detection can be used in automation systems, for example, in attendance tracking, photo tagging, and security surveillance.
 42 |     - It enables various applications in user experience enhancement and biometric systems.
 43 | 
 44 | ??? info "How did you start approaching this project? (Initial thoughts and planning)"
 45 |     - I began by exploring OpenCV documentation, focusing on how to implement Haar Cascade for face detection.
 46 |     - Initially, I focused on static image detection, planning to extend the project to video-based detection in the future.
 47 | 
 48 | ??? info "Mention any additional resources used (blogs, books, chapters, articles, research papers, etc.)."
 49 |     - OpenCV documentation
 50 |     - Book: "Learning OpenCV 3" by Adrian Kaehler and Gary Bradski
 51 | 
 52 | --- 
 53 | 
 54 | ### EXPLANATION 
 55 | 
 56 | #### DETAILS OF THE DIFFERENT FEATURES 
 57 | 
 58 |     - Haar Cascade Classifier: A machine learning-based approach for detecting objects in images or video. It works by training on a large set of positive and negative images of faces.
 59 |     - Cascade Classifier*: The classifier works through a series of stages, each aimed at increasing detection accuracy.
 60 |     - Face Detection: The primary feature of this project is detecting human faces in static images, which is the first step in many facial recognition systems.
 61 | 
 62 | 
 63 | 
 64 | --- 
 65 | 
 66 | #### PROJECT WORKFLOW 
 67 | 
 68 | === "Step 1"
 69 | 
 70 |     Initial data exploration and understanding:
 71 |   - Research the Haar Cascade method for face detection in OpenCV.
 72 |   - Collect sample images for testing the model's performance.
 73 | 
 74 | 
 75 | === "Step 2"
 76 | 
 77 |     Data cleaning and preprocessing:
 78 |   - Ensure all input images are properly formatted (e.g., grayscale images for face detection).
 79 |   - Resize or crop images to ensure optimal processing speed.
 80 | 
 81 | 
 82 | === "Step 3"
 83 | 
 84 |     Feature engineering and selection:
 85 |     - Use pre-trained Haar Cascade classifiers for detecting faces.
 86 |     - Select the appropriate classifier based on face orientation and conditions (e.g., frontal face, profile).
 87 | 
 88 | 
 89 | === "Step 4"
 90 | 
 91 |     Model training and evaluation:
 92 |     - Use OpenCV's pre-trained Haar Cascade models.
 93 |     - Test the detection accuracy on various sample images.
 94 | 
 95 | 
 96 | === "Step 5"
 97 | 
 98 |     Model optimization and fine-tuning:
 99 |   - Adjust parameters such as scale factor and minNeighbors to enhance accuracy.
100 |   - Experiment with different input image sizes to balance speed and accuracy.
101 | 
102 | 
103 | === "Step 6"
104 | 
105 |     Validation and testing:
106 |   - Validate the model's effectiveness on different test images, ensuring robust detection.
107 |   - Evaluate the face detection accuracy based on diverse lighting and image conditions.
108 | 
109 | 
110 | --- 
111 | 
112 | #### PROJECT TRADE-OFFS AND SOLUTIONS 
113 | 
114 | === "Trade Off 1"
115 |     - Accuracy vs. computational efficiency.
116 |         - Solution: Fine-tuned classifier parameters to ensure a balance between accuracy and speed.
117 | 
118 | === "Trade Off 2"
119 |     - Detection performance vs. image resolution. 
120 |         - Solution: Optimized input image resolution and processing flow to ensure both fast processing and accurate detection.
121 | 
122 | --- 
123 | 
124 | ### SCREENSHOTS 
125 | 
126 | !!! success "Project workflow"
127 | 
128 |     ``` mermaid
129 |       graph LR
130 |     A[Start] --> B{Face Detected?}
131 |     B -->|Yes| C[Mark Face]
132 |     C --> D[Display Result]
133 |     B -->|No| F[Idle/Do Nothing]
134 |     ```
135 | 
136 | --- 
137 | 
138 | ### CONCLUSION 
139 | 
140 | #### KEY LEARNINGS 
141 | 
142 | !!! tip "Insights gained from the data"
143 |      - Gained an understanding of face detection using Haar Cascades. 
144 |      - Improved ability to optimize computer vision models for accuracy and speed.
145 | 
146 | ??? tip "Improvements in understanding machine learning concepts"
147 |      - Learned how to handle trade-offs between accuracy and speed in real-time applications. 
148 |      - Gained hands-on experience with the implementation of object detection algorithms.
149 | 
150 | ??? tip "Challenges faced and how they were overcome"
151 |     - Challenge: Low detection accuracy in poor lighting conditions. 
152 |     - Solution: Adjusted classifier parameters and added preprocessing steps to improve accuracy.
153 | 
154 | --- 
155 | 
156 | #### USE CASES
157 | 
158 | === "Application 1"
159 | 
160 |     **Security Surveillance Systems**
161 | 
162 |     - Used for identifying individuals or monitoring for intruders in secure areas.
163 | 
164 | === "Application 2"
165 | 
166 |     **Attendance Systems**
167 | 
168 |     - Used to automate attendance tracking by detecting the faces of students or employees.
169 | 


--------------------------------------------------------------------------------
/docs/computer-vision/index.md:
--------------------------------------------------------------------------------
 1 | # 🎥 Computer Vision
 2 | 
 3 | <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 15px; padding: 10px;">
 4 | 
 5 |   <!-- Face Detection -->
 6 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
 7 |     <a href="face-detection" style="color: white; text-decoration: none; display: block;">
 8 |       <img src="https://imerit.net/wp-content/uploads/2021/07/Face-Detection-in-Images-with-Bounding-Boxes.jpg" alt="" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;">
 9 |       <div style="padding: 0.8rem;">
10 |         <h3 style="margin: 0; font-size: 18px;">Face Detection Model</h3>
11 |         <p style="font-size: 14px; opacity: 0.8;">Detecting faces in images using OpenCV's powerful Haar cascades.</p>
12 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-16 | ⏱️ 10 min read</p>
13 |       </div>
14 |     </a>
15 |   </figure>
16 | 
17 |   <!-- Bicep Reps Counting -->
18 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
19 |     <a href="bicep-reps-counting" style="color: white; text-decoration: none; display: block;">
20 |       <img src="https://www.researchgate.net/publication/368864097/figure/fig1/AS:11431281123465557@1677717597497/Mediapipe-Poses-position-detection-of-33-posture-joints-Mediapipe-has-many-functions.png" 
21 |            alt="Bicep Reps Counting" 
22 |            style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;">
23 |       <div style="padding: 0.8rem;">
24 |         <h3 style="margin: 0; font-size: 18px;">Counting Bicep Reps</h3>
25 |         <p style="font-size: 14px; opacity: 0.8;">Real-time tracking and counting of bicep curls with MediaPipe's Pose module and OpenCV.</p>
26 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-18 | ⏱️ 15 min read</p>
27 |       </div>
28 |     </a>
29 |   </figure>
30 | 
31 |   <!-- Brightness Control -->
32 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
33 |     <a href="brightness-control" style="color: white; text-decoration: none; display: block;">
34 |       <img src="https://user-images.githubusercontent.com/59369441/116009572-1542a280-a638-11eb-9d94-2a2d38b856a5.PNG" 
35 |            alt="Brightness Control" 
36 |            style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;">
37 |       <div style="padding: 0.8rem;">
38 |         <h3 style="margin: 0; font-size: 18px;">Brightness Control</h3>
39 |         <p style="font-size: 14px; opacity: 0.8;">Adjust screen brightness dynamically using MediaPipe's Hand Tracking and OpenCV.</p>
40 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-24 | ⏱️ 10 min read</p>
41 |       </div>
42 |     </a>
43 |   </figure>
44 | 
45 |   <!-- Black and White Image Colorizer -->
46 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
47 |     <a href="black-and-white-image-colorizer" style="color: white; text-decoration: none; display: block;">
48 |       <img src="https://gts.ai/wp-content/uploads/2024/05/teaser-1-1024x613.webp" alt="" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;">
49 |       <div style="padding: 0.8rem;">
50 |         <h3 style="margin: 0; font-size: 18px;">Black and White Image Colorizer</h3>
51 |         <p style="font-size: 14px; opacity: 0.8;">Colorization of Black and White Images using OpenCV and pre-trained caffe models.</p>
52 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-14 | ⏱️ 8 min read</p>
53 |       </div>
54 |     </a>
55 |   </figure>
56 | 
57 | </div>
58 | 


--------------------------------------------------------------------------------
/docs/contribute.md:
--------------------------------------------------------------------------------
 1 | # 📝 Contribute to AI-Code 🚀
 2 | 
 3 | Welcome to **AI-Code**! Whether you're an expert or a beginner, your contributions matter. Let's build AI projects together!
 4 | 
 5 | ## Getting Started
 6 | 
 7 | 1. **Star & Fork:** [Star](https://github.com/Avdhesh-Varshney/AI-Code) ⭐ & fork the repo.
 8 | 2. **Clone:**
 9 |    ```bash
10 |    git clone https://github.com/<your-github-username>/AI-Code.git && cd AI-Code
11 |    ```
12 | 3. **Create Branch:**
13 |    ```bash
14 |    git checkout -b <your_branch_name>
15 |    ```
16 | 4. **Set Up Environment:**
17 |    ```bash
18 |    python -m venv env && source env/bin/activate  # (Windows: env\Scripts\activate)
19 |    pip install -r requirements.txt
20 |    ```
21 | 5. **Preview Locally:**
22 |    ```bash
23 |    mkdocs serve  # Visit http://127.0.0.1:8000/AI-Code/
24 |    ```
25 | 
26 | ## Making Contributions
27 | 
28 | 1. **Edit Code:** Follow project standards.
29 | 2. **Stage & Commit:**
30 |    ```bash
31 |    git add . && git commit -m "<your_commit_message>"
32 |    ```
33 | 3. **Push Changes:**
34 |    ```bash
35 |    git push -u origin <your_branch_name>
36 |    ```
37 | 4. **Create a Pull Request (PR):**
38 |    - Go to GitHub → Open a PR → Provide clear details.
39 | 
40 | ## Contribution Guidelines
41 | 
42 | - **File Naming:** Use `kebab-case` (e.g., `ai-model.py`).
43 | - **Docs:** Follow [README Template](./project-readme-template.md).
44 | - **Commits:** Keep them concise & meaningful.
45 | - **PRs:** No direct commits to `main`, use PR templates, and include screenshots if relevant.
46 | - **Code Quality:** Clean, maintainable & well-commented.
47 | 
48 | ## Resources
49 | 
50 | - **Git & GitHub:** [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo), [Clone](https://help.github.com/en/desktop/contributing-to-projects/creating-an-issue-or-pull-request), [PR Guide](https://opensource.com/article/19/7/create-pull-request-github)
51 | - **Learn Python:** [LearnPython.org](https://www.learnpython.org/)
52 | - **MkDocs:** [Documentation](https://www.mkdocs.org/)
53 | 


--------------------------------------------------------------------------------
/docs/data-visualization/index.md:
--------------------------------------------------------------------------------
 1 | # 📊 Data Visualization 
 2 | 
 3 | <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 15px; padding: 10px;">
 4 | 
 5 |   <!-- Bangladesh Premier League Analysis -->
 6 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
 7 |     <a href="bangladesh-premier-league-analysis" style="color: white; text-decoration: none; display: block;">
 8 |       <img src="https://static.toiimg.com/thumb/msid-88446922,width-1280,height-720,resizemode-4/88446922.jpg" 
 9 |            alt="BPL Analysis" 
10 |            style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;">
11 |       <div style="padding: 0.8rem;">
12 |         <h3 style="margin: 0; font-size: 18px;">Bangladesh Premier League</h3>
13 |         <p style="font-size: 14px; opacity: 0.8;">Team performances, player stats & key insights.</p>
14 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-10 | ⏱️ 10 min read</p>
15 |       </div>
16 |     </a>
17 |   </figure>
18 | 
19 | </div>
20 | 


--------------------------------------------------------------------------------
/docs/deep-learning/anamoly-detection.md:
--------------------------------------------------------------------------------
  1 | # 📜 Time-Series Anomaly Detection
  2 | 
  3 | <div align="center">
  4 |     <img src="https://fr.mathworks.com/help/examples/nnet/win64/TimeSeriesAnomalyDetectionUsingDeepLearningExample_08.png" />
  5 | </div>
  6 | 
  7 | ## 🎯 AIM 
  8 | To detect anomalies in time-series data using Long Short-Term Memory (LSTM) networks.
  9 | 
 10 | ## 📊 DATASET LINK 
 11 | [NOT USED]
 12 | 
 13 | ## 📓 KAGGLE NOTEBOOK 
 14 | [https://www.kaggle.com/code/thatarguy/lstm-anamoly-detection/notebook](https://www.kaggle.com/code/thatarguy/lstm-anamoly-detection/notebook)
 15 | 
 16 | ??? Abstract "Kaggle Notebook"
 17 | 
 18 |     <iframe 
 19 |         src="https://www.kaggle.com/embed/thatarguy/lstm-anamoly-detection?kernelSessionId=222020820" 
 20 |         height="600" 
 21 |         style="margin: 0 auto; width: 100%; max-width: 950px;" 
 22 |         frameborder="0" 
 23 |         scrolling="auto" 
 24 |         title="lstm-anamoly-detection">
 25 |     </iframe>
 26 | 
 27 | ## ⚙️ TECH STACK 
 28 | 
 29 | | **Category**             | **Technologies**                            |
 30 | |--------------------------|---------------------------------------------|
 31 | | **Languages**            | Python                                     |
 32 | | **Libraries/Frameworks** | TensorFlow, Keras, scikit-learn, numpy, pandas, matplotlib |
 33 | | **Tools**                | Jupyter Notebook, VS Code                  |
 34 | 
 35 | --- 
 36 | 
 37 | ## 📝 DESCRIPTION 
 38 | 
 39 | !!! info "What is the requirement of the project?"
 40 |     - The project focuses on identifying anomalies in time-series data using an LSTM autoencoder. The model learns normal patterns and detects deviations indicating anomalies.
 41 | 
 42 | ??? info "Why is it necessary?"
 43 |     - Anomaly detection is crucial in various domains such as finance, healthcare, and cybersecurity, where detecting unexpected behavior can prevent failures, fraud, or security breaches.
 44 | 
 45 | ??? info "How is it beneficial and used?"
 46 |     - Businesses can use it to detect irregularities in stock market trends.
 47 |     - It can help monitor industrial equipment to identify faults before failures occur.
 48 |     - It can be applied in fraud detection for financial transactions.
 49 | 
 50 | ??? info "How did you start approaching this project? (Initial thoughts and planning)"
 51 |     - Understanding time-series anomaly detection methodologies.
 52 |     - Generating synthetic data to simulate real-world scenarios.
 53 |     - Implementing an LSTM autoencoder to learn normal patterns and detect anomalies.
 54 |     - Evaluating model performance using Mean Squared Error (MSE).
 55 | 
 56 | ??? info "Mention any additional resources used (blogs, books, chapters, articles, research papers, etc.)."
 57 |     - Research paper: "Deep Learning for Time-Series Anomaly Detection"
 58 |     - Public notebook: LSTM Autoencoder for Anomaly Detection
 59 | 
 60 | --- 
 61 | 
 62 | ## 🔍 PROJECT EXPLANATION 
 63 | 
 64 | ### 🧩 DATASET OVERVIEW & FEATURE DETAILS 
 65 | 
 66 | ??? example "📂 Synthetic dataset"
 67 | 
 68 |     - The dataset consists of a sine wave with added noise.
 69 | 
 70 |     | Feature Name | Description |   Datatype   |
 71 |     |--------------|-------------|:------------:|
 72 |     | time         | Timestamp   |   int64      |
 73 |     | value        | Sine wave value with noise | float64 |
 74 | 
 75 | --- 
 76 | 
 77 | ### 🛤 PROJECT WORKFLOW 
 78 | 
 79 | !!! success "Project workflow"
 80 | 
 81 |     ``` mermaid
 82 |       graph LR
 83 |         A[Start] --> B{Generate Data};
 84 |         B --> C[Normalize Data];
 85 |         C --> D[Create Sequences];
 86 |         D --> E[Train LSTM Autoencoder];
 87 |         E --> F[Compute Reconstruction Error];
 88 |         F --> G[Identify Anomalies];
 89 |     ```
 90 | 
 91 | === "Step 1"
 92 |     - Generate synthetic data (sine wave with noise)
 93 |     - Normalize data using MinMaxScaler
 94 |     - Split data into training and validation sets
 95 | 
 96 | === "Step 2"
 97 |     - Create sequential data using a rolling window approach
 98 |     - Reshape data for LSTM compatibility
 99 | 
100 | === "Step 3"
101 |     - Implement LSTM autoencoder for anomaly detection
102 |     - Optimize model using Adam optimizer
103 | 
104 | === "Step 4"
105 |     - Compute reconstruction error for anomaly detection
106 |     - Identify threshold for anomalies using percentile-based method
107 | 
108 | === "Step 5"
109 |     - Visualize detected anomalies using Matplotlib
110 | 
111 | --- 
112 | 
113 | ### 🖥 CODE EXPLANATION 
114 | 
115 | === "LSTM Autoencoder"
116 |     - The model consists of an encoder, bottleneck, and decoder.
117 |     - It learns normal time-series behavior and reconstructs it.
118 |     - Deviations from normal patterns are considered anomalies.
119 | 
120 | --- 
121 | 
122 | ### ⚖️ PROJECT TRADE-OFFS AND SOLUTIONS 
123 | 
124 | === "Reconstruction Error Threshold Selection"
125 |     - Setting a high threshold may miss subtle anomalies, while a low threshold might increase false positives.
126 |     - **Solution**: Use the 95th percentile of reconstruction errors as the threshold to balance false positives and false negatives.
127 | 
128 | --- 
129 | 
130 | ## 🖼 SCREENSHOTS 
131 | 
132 | !!! tip "Visualizations and EDA of different features"
133 | 
134 |     === "Synthetic Data Plot"
135 |      ![img](https://github.com/user-attachments/assets/e33a0537-9e23-4e21-b0e5-153a78ac4000)
136 |    
137 | 
138 | ??? example "Model performance graphs"
139 | 
140 |     === "Reconstruction Error Plot"
141 |      ![img](https://github.com/user-attachments/assets/4ff144a9-756a-43e3-aba2-609d92cbacd2)
142 | --- 
143 | 
144 | ## 📉 MODELS USED AND THEIR EVALUATION METRICS 
145 | 
146 | |    Model          | Reconstruction Error (MSE) |
147 | |------------------|---------------------------|
148 | | LSTM Autoencoder |           0.015           |
149 | 
150 | --- 
151 | 
152 | ## ✅ CONCLUSION 
153 | 
154 | ### 🔑 KEY LEARNINGS 
155 | 
156 | !!! tip "Insights gained from the data"
157 |     - Time-series anomalies often appear as sudden deviations from normal patterns.
158 | 
159 | ??? tip "Improvements in understanding machine learning concepts"
160 |     - Learned about LSTM autoencoders and their ability to reconstruct normal sequences.
161 | 
162 | ??? tip "Challenges faced and how they were overcome"
163 |     - Handling high reconstruction errors by tuning model hyperparameters.
164 |     - Selecting an appropriate anomaly threshold using statistical methods.
165 | 
166 | --- 
167 | 
168 | ### 🌍 USE CASES 
169 | 
170 | === "Financial Fraud Detection"
171 |     - Detect irregular transaction patterns using anomaly detection.
172 | 
173 | === "Predictive Maintenance"
174 |     - Identify equipment failures in industrial settings before they occur.
175 | 
176 | 
177 | 
178 | 


--------------------------------------------------------------------------------
/docs/deep-learning/brain-tumor-detection-model.md:
--------------------------------------------------------------------------------
  1 | # Brain Tumor Detectioon
  2 | 
  3 | ### AIM 
  4 | 
  5 | To predict the Brain Tumor using Convolutional Neural Network
  6 | 
  7 | ### DATASET LINK 
  8 | 
  9 | [https://www.kaggle.com/datasets/primus11/brain-tumor-mri](https://www.kaggle.com/datasets/primus11/brain-tumor-mri)
 10 | 
 11 | ### MY NOTEBOOK LINK 
 12 | 
 13 | [https://colab.research.google.com/github/11PRIMUS/ALOK/blob/main/Tumor3.ipynb](https://colab.research.google.com/github/11PRIMUS/ALOK/blob/main/Tumor3.ipynb)
 14 | 
 15 | ### LIBRARIES NEEDED 
 16 | 
 17 | ??? quote "LIBRARIES USED"
 18 | 
 19 |     - pandas
 20 |     - numpy
 21 |     - scikit-learn (>=1.5.0 for TunedThresholdClassifierCV)
 22 |     - matplotlib
 23 |     - seaborn
 24 |     - streamlit
 25 | 
 26 | --- 
 27 | 
 28 | ### DESCRIPTION 
 29 | 
 30 | !!! info "What is the requirement of the project?"
 31 |     - This project aims to predict early stage brain tumor it uses Convolutional Neural Network to classify wheter tumor is present or not.
 32 | 
 33 | ??? info "Why is it necessary?"
 34 |     - Brain Tumor is leading case of deaths on world and most of the cases can be solved by detecting the cancer in its initial stages so one can take medication according to that without having further risks.
 35 | 
 36 | ??? info "How is it beneficial and used?"
 37 |     - Doctors can use it to detect cancer and the region affected by that using MRI scans an help patient to overcom ethat with right and proper guidance. It also acts as a fallback mechanism in rare cases where the diagnosis is not obvious.
 38 |     - People (patients in particular) can check simply by using MRI scans to detect Tumor and take necessary medication and precautions
 39 | 
 40 | ??? info "How did you start approaching this project? (Initial thoughts and planning)"
 41 |     - Going through previous research and articles related to the problem.
 42 |     - Data exploration to understand the features. 
 43 |     - Identifying key metrics for the problem based on ratio of target classes.
 44 |     - Feature engineering and selection based on EDA.
 45 |     - Setting up a framework for easier testing of multiple models even for peoples.
 46 |     - Analysing results of models simply using MRI scans
 47 | 
 48 | ??? info "Mention any additional resources used (blogs, books, chapters, articles, research papers, etc.)."
 49 |     - Research paper: [Review of MRI-based Brain Tumor Image Segmentation Using Deep Learning Methods](https://www.sciencedirect.com/science/article/pii/S187705091632587X)
 50 |     - Public notebook: [Brain Tumor Classification](https://colab.research.google.com/github/11PRIMUS/ALOK/blob/main/Tumor3.ipynb)
 51 | 
 52 | --- 
 53 | 
 54 | ### Model Architecture
 55 |     - The CNN architecture is designed to perform binary classification. The key layers used in the architecture are:
 56 |     - Convolutional Layers: For feature extraction from images. MaxPooling Layers: To downsample the image features. Dense Layers: To perform the classification. Dropout: For regularization to prevent overfitting.
 57 | ### Model Structure
 58 |     - Input Layer 224*224 pixels.
 59 |     - Convolutionla layer followed by MaxPooling layers.
 60 |     - Flattern layer to convert feature into 1D vector
 61 |     - Fully connected layer for Classification.
 62 |     - Output Layer: Sigmoid activation for binary classification (tumor/no tumor)
 63 | 
 64 | 
 65 | --- 
 66 | 
 67 | 
 68 | #### WHAT I HAVE DONE 
 69 | 
 70 | === "Step 1"
 71 | 
 72 |     Exploratory Data Analysis
 73 | 
 74 |     - Summary statistics
 75 |     - Data visualization for numerical feature distributions
 76 |     - Splitting of data (70% for training, 15% for validation and 15% for testing)
 77 | 
 78 | === "Step 2"
 79 | 
 80 |     Data cleaning and Preprocessing
 81 | 
 82 |     - Data preperation using Image Generator
 83 |     - Categorical feature encoding
 84 |     - Image resized to 224*224 pixels
 85 | 
 86 | === "Step 3"
 87 | 
 88 |     Feature engineering and selection
 89 | 
 90 |     - Combining original features based on domain knowledge
 91 |     - Using MobileNet to process input
 92 | 
 93 | === "Step 4"
 94 | 
 95 |     Modeling
 96 | 
 97 |     - Convolutional layer followed by MaxPooling layer
 98 |     - Flattering the layer to convert features into 1D vector
 99 |     - Sigmoid function to actiavte binary classification
100 |     - Holdout dataset created or model testing
101 |     - Using VGG16 and ResNet for future improvement
102 | 
103 | === "Step 5"
104 | 
105 |     Result analysis
106 | 
107 |     - Hosted on Streamlit to ensure one can easily upload MRI scans and detect wether canncer is present or not.
108 |     - Early stopping to ensure better accuracy is achieved.
109 |     - Experiment with differnt agumentation techniques to improve model's robustness.
110 | 
111 | --- 
112 | 
113 | #### PROJECT TRADE-OFFS AND SOLUTIONS 
114 | 
115 | === "Trade Off 1"
116 | 
117 |     **Accuracy vs Validation_accuracy:**
118 |     The training accuracy is much higher than the validation accuracy after epoch 2, suggesting that the model may be overfitting the training data.
119 | 
120 |     - **Solution**: It might be better to stop training around epoch 2 or 3 to avoid overfitting and ensure better generalization.
121 | 
122 | 
123 | 
124 | 
125 | 
126 | --- 
127 | 
128 | ### CONCLUSION 
129 | 
130 | #### WHAT YOU HAVE LEARNED 
131 | 
132 | !!! tip "Insights gained from the data"
133 |     - Early detection of cancer can lead to easily reduce the leading death of person and take proper medication on that basis.
134 | 
135 | ??? tip "Improvements in understanding machine learning concepts"
136 |     - Learned and implemented the concept of predicting probability and tuning the prediction threshold for more accurate results, compared to directly predicting with the default thresold for models.
137 | 
138 | ??? tip "Challenges faced and how they were overcome"
139 |     - Resigning the RGB image to grayscale and reducing the pixel by 225 * 225 was big challenge. 
140 |     - Lesser dataset so we reached out to some hospitals which helped us collecting the MRI scans.
141 | 
142 | --- 
143 | 
144 | #### USE CASES OF THIS MODEL 
145 | 
146 | === "Application 1"
147 | 
148 |     - Doctors can identify the cancer stage and type accurately, allowing for tailored treatment approaches.
149 | 
150 | === "Application 2"
151 | 
152 |     - Treatments at early stages are often less invasive and have fewer side effects compared to late-stage therapies
153 | 
154 | --- 
155 | 
156 | 


--------------------------------------------------------------------------------
/docs/deep-learning/index.md:
--------------------------------------------------------------------------------
 1 | # Deep Learning ✨
 2 | 
 3 | <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 15px; padding: 10px;">
 4 | 
 5 | <!-- Brain Tumor Detection Model -->
 6 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
 7 |     <a href="brain-tumor-detection-model" style="color: white; text-decoration: none; display: block;">
 8 |       <img src="https://t3.ftcdn.net/jpg/02/61/57/66/360_F_261576629_qbzv83cBaYxMjBCTtY85cHyTK2GFRvk7.jpg" alt="" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;" />
 9 |       <div style="padding: 0.8rem;">
10 |         <h3 style="margin: 0; font-size: 18px;">Brain Tumor Detection Model</h3>
11 |         <p style="font-size: 14px; opacity: 0.8;">Deep learning algorithm for image and video recognition.</p>
12 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-10 | ⏱️ 10 mins</p>
13 |       </div>
14 |     </a>
15 |   </figure>
16 |   
17 |   <!-- Music Genre Classification Model -->
18 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
19 |     <a href="music-genre-classification-model" style="color: white; text-decoration: none; display: block;">
20 |       <img src="https://user-images.githubusercontent.com/81585804/204538070-b036f85f-a95b-4a92-858c-d64687081f1a.png" alt="" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;">
21 |       <div style="padding: 0.8rem;">
22 |         <h3 style="margin: 0; font-size: 18px;">Music Genre Classification Model</h3>
23 |         <p style="font-size: 14px; opacity: 0.8;">Unlock the rhythm of sound with AI-powered music genre classification using OpenCV!</p>
24 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-10 | ⏱️ 12 min read</p>
25 |       </div>
26 |     </a>
27 |   </figure>
28 |   
29 |   <!-- LSTM Autoencoder for Time Series Anomaly Detection -->
30 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
31 |     <a href="anamoly-detection" style="color: white; text-decoration: none; display: block;">
32 |       <img src="https://fr.mathworks.com/help/examples/nnet/win64/TimeSeriesAnomalyDetectionUsingDeepLearningExample_08.png" alt="" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;">
33 |       <div style="padding: 0.8rem;">
34 |         <h3 style="margin: 0; font-size: 18px;">Time Series Anomaly Detection</h3>
35 |         <p style="font-size: 14px; opacity: 0.8;">A deep learning approach to detect anomalies in time series data.</p>
36 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-02-12 | ⏱️ 10 mins</p>
37 |       </div>
38 |     </a>
39 |   </figure>
40 | 
41 | </div>
42 | 


--------------------------------------------------------------------------------
/docs/deep-learning/music-genre-classification-model.md:
--------------------------------------------------------------------------------
  1 | # Music Genre Classification Model 
  2 | 
  3 | ### AIM 
  4 | 
  5 | To develop a precise and effective music genre classification model using Convolutional Neural Networks (CNN), Support Vector Machines (SVM), Random Forest and XGBoost Classifier algorithms for the Kaggle GTZAN Dataset Music Genre Classification. 
  6 | 
  7 | ### DATASET LINK 
  8 | 
  9 | [https://www.kaggle.com/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/data](https://www.kaggle.com/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/data)
 10 | 
 11 | ### MY NOTEBOOK LINK 
 12 | 
 13 | [https://colab.research.google.com/drive/1j8RZccP2ee5XlWEFSkTyJ98lFyNrezHS?usp=sharing](https://colab.research.google.com/drive/1j8RZccP2ee5XlWEFSkTyJ98lFyNrezHS?usp=sharing)
 14 | 
 15 | ### LIBRARIES NEEDED 
 16 | 
 17 | ??? quote "LIBRARIES USED"
 18 | 
 19 |     - librosa
 20 |     - matplotlib
 21 |     - pandas
 22 |     - sklearn
 23 |     - seaborn
 24 |     - numpy
 25 |     - scipy
 26 |     - xgboost
 27 | 
 28 | --- 
 29 | 
 30 | ### DESCRIPTION 
 31 | 
 32 | !!! info "What is the requirement of the project?"
 33 |     - The objective of this research is to develop a precise and effective music genre classification model using Convolutional Neural Networks (CNN), Support Vector Machines (SVM), Random Forest and XGBoost algorithms for the Kaggle GTZAN Dataset Music Genre Classification.
 34 | 
 35 | ??? info "Why is it necessary?"
 36 |     - Music genre classification has several real-world applications, including music recommendation, content-based music retrieval, and personalized music services. However, the task of music genre classification is challenging due to the subjective nature of music and the complexity of audio signals.
 37 | 
 38 | ??? info "How is it beneficial and used?"
 39 |     - **For User:** Provides more personalised music
 40 | 	- **For Developers:** A recommendation system for songs that are of interest to the user 
 41 | 	- **For Business:** Able to charge premium for the more personalised and recommendation services provided
 42 | 
 43 | ??? info "How did you start approaching this project? (Initial thoughts and planning)"
 44 | 	- Initially how the different sounds are structured. 
 45 | 	- Learned how to represent sound signal in 2D format on graphs using the librosa library.
 46 | 	- Came to know about the various features of sound like 
 47 |     	- Mel-frequency cepstral coefficients (MFCC)
 48 |     	- Chromagram
 49 |     	- Spectral Centroid
 50 |     	- Zero-crossing rate
 51 |     	- BPM - Beats Per Minute
 52 | 
 53 | ??? info "Mention any additional resources used (blogs, books, chapters, articles, research papers, etc.)."
 54 | 	- [https://scholarworks.calstate.edu/downloads/73666b68n](https://scholarworks.calstate.edu/downloads/73666b68n)
 55 | 	- [https://www.kaggle.com/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/data](https://www.kaggle.com/datasets/andradaolteanu/gtzan-dataset-music-genre-classification/data)
 56 | 	- [https://towardsdatascience.com/music-genre-classification-with-python-c714d032f0d8](https://towardsdatascience.com/music-genre-classification-with-python-c714d032f0d8)
 57 | 
 58 | --- 
 59 | 
 60 | ### EXPLANATION 
 61 | 
 62 | #### DETAILS OF THE DIFFERENT FEATURES 
 63 | 
 64 |   	There are 3 different types of the datasets.
 65 | 
 66 | 	- genres_original
 67 | 	- images_original
 68 | 	- features_3_sec.csv
 69 | 	- feature_30_sec.csv
 70 | 
 71 | - The features in `genres_original`
 72 | 
 73 |     ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
 74 |     Each and every genre has 100 WAV files
 75 | 
 76 | - The features in `genres_original`
 77 | 
 78 |     ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
 79 |     Each and every genre has 100 PNG files
 80 | 
 81 | - There are 60 features in `features_3_sec.csv`
 82 | 
 83 | - There are 60 features in `features_30_sec.csv`
 84 | 
 85 | --- 
 86 | 
 87 | #### WHAT I HAVE DONE 
 88 | 
 89 | === "Step 1"
 90 | 
 91 |     - Created data visual reprsentation of the data to help understand the data
 92 | 
 93 | === "Step 2"
 94 | 
 95 |     - Found strong relationships between independent features and dependent feature using correlation.
 96 | 
 97 | === "Step 3"
 98 | 
 99 |     - Performed Exploratory Data Analysis on data.
100 | 
101 | === "Step 4"
102 | 
103 |     - Used different Classification techniques like SVM, Random Forest, 
104 | 
105 | === "Step 5"
106 | 
107 |     - Compared various models and used best performance model to make predictions.
108 | 
109 | === "Step 6"
110 | 
111 |     - Used Mean Squared Error and R2 Score for evaluating model's performance.
112 | 
113 | === "Step 7"
114 | 
115 |     - Visualized best model's performance using matplotlib and seaborn library.
116 | 
117 | --- 
118 | 
119 | #### PROJECT TRADE-OFFS AND SOLUTIONS 
120 | 
121 | === "Trade Off 1"
122 | 
123 |     How do you visualize audio signal
124 | 
125 |      - **Solution**: 
126 | 
127 |       - **_librosa_**: It is the mother of all audio file libraries
128 |       - **Plotting Graphs**: As I have the necessary libraries to visualize the data. I started plotting the audio signals
129 |       - **Spectogram**:A spectrogram is a visual representation of the spectrum of frequencies of a signal as it varies with time. When applied to an audio signal, spectrograms are sometimes called sonographs, voiceprints, or voicegrams. Here we convert the frequency axis to a logarithmic one.
130 | 
131 | === "Trade Off 2"
132 | 
133 |     Features that help classify the data
134 | 
135 |      - **Solution**:
136 | 
137 |       - **Feature Engineering**: What are the features present in audio signals
138 |       - **Spectral Centroid**: Indicates where the ”centre of mass” for a sound is located and is calculated as the weighted mean of the frequencies present in the sound.
139 |       - **Mel-Frequency Cepstral Coefficients**: The Mel frequency cepstral coefficients (MFCCs) of a signal are a small set of features (usually about 10–20) which concisely describe the overall shape of a spectral envelope. It models the characteristics of the human voice.
140 |       - **Chroma Frequencies**: Chroma features are an interesting and powerful representation for music audio in which the entire spectrum is projected onto 12 bins representing the 12 distinct semitones (or chroma) of the musical octave.
141 | 
142 | === "Trade Off 3"
143 | 
144 |     Performing EDA on the CSV files
145 | 
146 |      - **Solution**:
147 | 
148 |       - **Tool Selection**: Used the correlation matrix on the features_30_sec.csv dataset to extract most related datasets
149 |       - **Visualization Best Practices**: Followed best practices such as using appropriate chart types (e.g., box plots for BPM data, PCA plots for correlations), adding labels and titles, and ensuring readability.
150 |       - **Iterative Refinement**: Iteratively refined visualizations based on feedback and self-review to enhance clarity and informativeness.
151 | 
152 | === "Trade Off 4"
153 | 
154 |     Implementing Machine Learning Models
155 | 
156 |      - **Solution**:
157 | 
158 |       - **Cross-validation**: Used cross-validation techniques to ensure the reliability and accuracy of the analysis results.
159 |       - **Collaboration with Experts**: Engaged with Music experts and enthusiasts to validate the findings and gain additional perspectives.
160 |       - **Contextual Understanding**: Interpreted results within the context of the music, considering factors such as mood of the users, surrounding, and specific events to provide meaningful and actionable insights.
161 | 
162 | --- 
163 | 
164 | ### SCREENSHOTS 
165 | 
166 | !!! success "Project workflow"
167 | 
168 |     ``` mermaid
169 |       graph LR
170 |         A[Start] --> B{Error?};
171 |         B -->|Yes| C[Hmm...];
172 |         C --> D[Debug];
173 |         D --> B;
174 |         B ---->|No| E[Yay!];
175 |     ```
176 | 
177 | ??? tip "Visualizations and EDA of different features"
178 | 
179 |     === "Harm Perc"
180 |         ![harm _perc](https://github.com/user-attachments/assets/1faab657-812b-453f-bbea-f0ba9a7cfb44)
181 | 
182 |     === "Sound Wave"
183 |         ![sound _wave](https://github.com/user-attachments/assets/f83fd865-567f-4943-9776-fc4e1223caa3)
184 | 
185 |     === "STFT"
186 |         ![stft](https://github.com/user-attachments/assets/22d288bf-9063-4593-a3d2-7225b2550807)
187 | 
188 |     === "Pop Mel-Spec"
189 |         ![pop _mel-_spec](https://github.com/user-attachments/assets/1bee61ef-3fd3-4d21-becc-bf14621824a1)
190 | 
191 |     === "Blues Mel-Spec"
192 |         ![blues _mel-_spec](https://github.com/user-attachments/assets/fb31fcc4-c40f-4767-bec3-95c520c031ca)
193 | 
194 |     === "Spec Cent"
195 |         ![spec _cent](https://github.com/user-attachments/assets/b203347d-cd37-42c8-b9e1-f8da59d235db)
196 | 
197 |     === "Spec Rolloff"
198 |         ![spec _rolloff](https://github.com/user-attachments/assets/e7a468d3-f6e2-4877-b3a7-687a14d8566b)
199 | 
200 |     === "MFCC"
201 |         ![m_f_c_c](https://github.com/user-attachments/assets/f1eaf291-ecc3-4710-bbbb-286e89b348b3)
202 | 
203 |     === "Chromogram"
204 |         ![chromogram](https://github.com/user-attachments/assets/fffc9b5c-5466-45f0-a552-b369b44e197c)
205 | 
206 |     === "Corr Heatmap"
207 |         ![corr _heatmap](https://github.com/user-attachments/assets/6f2afb34-e6c0-4319-a474-d1cbf8631c92)
208 | 
209 |     === "BPM Boxplot"
210 |         ![b_p_m _boxplot](https://github.com/user-attachments/assets/b8a9be0f-d686-4c12-8157-f5dcf06fcb06)
211 | 
212 |     === "PCA Scatter Plot"
213 |         ![p_c_a _scattert](https://github.com/user-attachments/assets/66fe7232-1166-4af0-932f-f6ba197fa042)
214 | 
215 |     === "Confusion Matrix"
216 |         ![conf matrix](https://github.com/user-attachments/assets/f53b1aa8-34e4-4839-bd39-9f6837805b01)
217 | 
218 | --- 
219 | 
220 | ### MODELS USED AND THEIR ACCURACIES 
221 | 
222 | | Model                        | Accuracy   | 
223 | |------------------------------|------------|
224 | | KNN                          |0.80581     |
225 | | Random Forest                |0.81415     |
226 | | Cross Gradient Booster       |0.90123     |
227 | | SVM                          |0.75409     |
228 | 
229 | --- 
230 | 
231 | #### MODELS COMPARISON GRAPHS 
232 | 
233 | !!! tip "Models Comparison Graphs"
234 | 
235 |     === "ACC Plot"
236 |         ![accplot](https://github.com/user-attachments/assets/4d4f3eff-c8f3-4163-9dc8-6cdb3c96ab28)
237 | 
238 | --- 
239 | 
240 | ### CONCLUSION 
241 | 
242 |     We can see that Accuracy plots of the different models.
243 |     XGB Classifier can predict most accurate results for predicting the Genre of the music.
244 | 
245 | #### WHAT YOU HAVE LEARNED 
246 | 
247 | !!! tip "Insights gained from the data"
248 |     - Discovered a new library that help visualize audio signal
249 |     - Discovered new features related to audio like STFT, MFCC, Spectral Centroid, Spectral Rolloff
250 |     - Gained a deeper understanding of the features of different genres of music
251 | 
252 | ??? tip "Improvements in understanding machine learning concepts"
253 |     - Enhanced knowledge of data cleaning and preprocessing techniques to handle real-world datasets.
254 |     - Improved skills in exploratory data analysis (EDA) to extract meaningful insights from raw data.
255 |     - Learned how to use visualization tools to effectively communicate data-driven findings.
256 | 
257 | --- 
258 | 
259 | #### USE CASES OF THIS MODEL 
260 | 
261 | === "Application 1"
262 | 
263 | 	**User Personalisation**
264 | 
265 |      - It can be used to provide more personalised music recommendation for users based on their taste in music or the various genres they listen to. This personalisation experience can be used to develop 'Premium' based business models.
266 | 
267 | === "Application 2"
268 | 
269 | 	**Compatability Between Users**
270 | 
271 |      - Based on the musical taste and the genres they listen we can identify the user behaviour and pattern come with similar users who can be friends with. This increases social interaction within the app.
272 | 
273 | --- 
274 | 
275 | #### FEATURES PLANNED BUT NOT IMPLEMENTED 
276 | 
277 | === "Feature 1"
278 | 
279 |     - **Real-time Compatability Tracking**
280 | 
281 |     - Implementing a real-time tracking system to view compatability between users.
282 | 
283 | === "Feature 1"
284 | 
285 |     - **Predictive Analytics**
286 | 
287 |     - Using advanced machine learning algorithms to predict the next song the users is likely to listen to.
288 | 
289 | 


--------------------------------------------------------------------------------
/docs/generative-adversarial-networks/index.md:
--------------------------------------------------------------------------------
1 | # Generative Adversarial Networks 💱 
2 | 
3 | <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 15px; padding: 10px;">
4 | 
5 | 
6 | </div>
7 | 


--------------------------------------------------------------------------------
/docs/large-language-models/index.md:
--------------------------------------------------------------------------------
1 | # Large Language Models 🤪 
2 | 
3 | <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 15px; padding: 10px;">
4 | 
5 | 
6 | </div>
7 | 


--------------------------------------------------------------------------------
/docs/machine-learning/air-quality-prediction.md:
--------------------------------------------------------------------------------
  1 | # Air Quality Prediction Model 
  2 | 
  3 | <div align='center'>
  4 |    <img src='https://storage.googleapis.com/kaggle-datasets-images/1057064/1777920/920b70651a7618bac205ea41a336d4df/dataset-cover.jpg' alt='' />
  5 | </div>
  6 | 
  7 | ## 🎯 AIM 
  8 | 
  9 | To predict air quality levels based on various features such as CO (Carbon Monoxide), NO (Nitrogen Oxides), NO2 (Nitrogen Dioxide), O3 (Ozone), and other environmental factors. By applying machine learning models, this project explores how different algorithms perform in predicting air quality and understanding the key factors that influence it.
 10 | 
 11 | ## 📊 DATASET LINK 
 12 | 
 13 | [https://www.kaggle.com/datasets/fedesoriano/air-quality-data-set](https://www.kaggle.com/datasets/fedesoriano/air-quality-data-set)
 14 | 
 15 | ## 📓 NOTEBOOK 
 16 | 
 17 | [https://www.kaggle.com/code/disha520/air-quality-predictor](https://www.kaggle.com/code/disha520/air-quality-predictor)
 18 | 
 19 | ??? Abstract "Kaggle Notebook"
 20 | 
 21 |     <iframe 
 22 |         src="https://www.kaggle.com/embed/disha520/air-quality-predictor?kernelSessionId=216175275" 
 23 |         height="600" 
 24 |         style="margin: 0 auto; width: 100%; max-width: 950px;" 
 25 |         frameborder="0" 
 26 |         scrolling="auto" 
 27 |         title="Air Quality Predictor">
 28 |     </iframe>
 29 | 
 30 | ## ⚙️ TECH STACK
 31 | 
 32 | | **Category**             | **Technologies**                            |
 33 | |--------------------------|---------------------------------------------|
 34 | | **Languages**            | Python                          |
 35 | | **Libraries/Frameworks** | Pandas, Numpy, Matplotlib, Seaborn, Scikit-learn |
 36 | | **Tools**                | Git, Jupyter, VS Code               |
 37 | 
 38 | --- 
 39 | 
 40 | ## 📝 DESCRIPTION 
 41 | 
 42 | The project focuses on predicting air quality levels based on the features of air pollutants and environmental parameters. 
 43 | The objective is to test various regression models to see which one gives the best predictions for CO (Carbon Monoxide) levels.
 44 | 
 45 | !!! info "What is the requirement of the project?"
 46 |     - Air quality is a critical issue for human health, and accurate forecasting models can provide insights to policymakers and the public. 
 47 |     - To accurately predict the CO levels based on environmental data.
 48 | 
 49 | ??? info "How is it beneficial and used?"
 50 |     - Predicting air quality can help in early detection of air pollution and assist in controlling environmental factors effectively.
 51 |     - This model can be used by environmental agencies, city planners, and policymakers to predict and manage air pollution in urban areas, contributing to better public health outcomes.
 52 | 
 53 | ??? info "How did you start approaching this project? (Initial thoughts and planning)"
 54 |     - Began by cleaning the dataset, handling missing data, and converting categorical features into numerical data.
 55 |     - After preparing the data, various machine learning models were trained and evaluated to identify the best-performing model.
 56 | 
 57 | ??? info "Mention any additional resources used (blogs, books, chapters, articles, research papers, etc.)."
 58 |     - Kaggle kernels and documentation for additional dataset understanding.
 59 |     - Tutorials on machine learning regression techniques, particularly for Random Forest, SVR, and Decision Trees.
 60 | 
 61 | --- 
 62 | 
 63 | ## 🔍 EXPLANATION 
 64 | 
 65 | ### 🧩 DETAILS OF THE DIFFERENT FEATURES 
 66 | 
 67 | #### 📂 AirQuality.csv 
 68 | 
 69 | | Feature Name | Description |
 70 | |--------------|-------------|
 71 | | CO(GT)       | Carbon monoxide in the air |
 72 | | Date & Time  | Record of data collection time |
 73 | | PT08.S1(CO), PT08.S2(NMHC), PT08.S3(NOX), PT08.S4(NO2), PT08.S5(O3) | These are sensor readings for different gas pollutants |
 74 | | T, RH, AH | Temperature, Humidity, and Absolute Humidity respectively, recorded as environmental factors |
 75 | 
 76 | --- 
 77 | 
 78 | ### 🛤 PROJECT WORKFLOW 
 79 | 
 80 | ```mermaid
 81 |   graph LR
 82 |     A[Start] --> B{Is data clean?};
 83 |     B -->|Yes| C[Explore Data];
 84 |     C --> D[Data Preprocessing];
 85 |     D --> E[Feature Selection & Engineering];
 86 |     E --> F[Split Data into Training & Test Sets];
 87 |     F --> G[Define Models];
 88 |     G --> H[Train and Evaluate Models];
 89 |     H --> I[Visualize Evaluation Metrics];
 90 |     I --> J[Model Testing];
 91 |     J --> K[Conclusion and Observations];
 92 |     B ---->|No| L[Clean Data];
 93 | ```
 94 | 
 95 | 
 96 | === "Import Necessary Libraries"
 97 |     - First, we import all the essential libraries needed for handling, analyzing, and modeling the dataset. 
 98 |     - This includes libraries like Pandas for data manipulation, Numpy for numerical computations, Matplotlib and Seaborn for data visualization, and Scikit-learn for machine learning models, evaluation, and data preprocessing. 
 99 |     - These libraries will enable us to perform all required tasks efficiently.
100 | 
101 | === "Load Dataset"
102 |     - We load the dataset using Pandas `read_csv()` function. The dataset contains air quality data, which is loaded with a semicolon delimiter. 
103 |     - After loading, we inspect the first few rows to understand the structure of the data and ensure that the dataset is correctly loaded.
104 | 
105 | === "Data Cleaning Process"
106 |     Data cleaning is a crucial step in any project. In this step:
107 | 
108 |     - We remove unnamed columns that aren't useful for analysis (such as 'Unnamed: 15', 'Unnamed: 16').
109 |     - We correct data consistency issues, specifically replacing commas with periods in numeric columns to ensure the correct parsing of values.
110 |     - Missing values in numeric columns are replaced with the mean of that respective column.
111 |     - We eliminate rows that consist entirely of missing values (NaN).
112 |     - A new datetime feature is created by combining the 'Date' and 'Time' columns.
113 |     - Additional temporal features such as month, day, weekday, and hour are derived from the new datetime feature.
114 |     - The original Date and Time columns are dropped as they are no longer needed.
115 | 
116 | === "Visualizing Correlations Between Features"
117 | 
118 |     - To understand relationships among the features, a heatmap is used to visualize correlations between all numeric columns. 
119 |     - The heatmap highlights how features are correlated with each other, helping to identify possible redundancies or important predictors for the target variable.
120 | 
121 | === "Data Preparation - Features (X) and Target (y)"
122 |     After cleaning the data, we separate the dataset into features (X) and the target variable (y):
123 | 
124 |     - **Features (X)**: These are the columns used to predict the target value. We exclude the target variable column ‘CO(GT)’ and include all other columns as features.
125 |     - **Target (y)**: This is the variable we want to predict. We extract the 'CO(GT)' column and ensure all values are numeric.
126 | 
127 |     To prepare the data for machine learning, any non-numeric columns in the features (X) are encoded using `LabelEncoder`.
128 | 
129 | === "Split the Data into Training and Test Sets"
130 | 
131 |     - We split the dataset into training and testing sets, allocating 80% of the data for training and the remaining 20% for testing. 
132 |     - This split allows us to evaluate model performance on unseen data and validate the effectiveness of the model.
133 | 
134 | === "Define Models"
135 |     We define multiple regression models to train and evaluate on the dataset:
136 | 
137 |     - **RandomForestRegressor**: A robust ensemble method that performs well on non-linear datasets.
138 |     - **LinearRegression**: A fundamental regression model, useful for establishing linear relationships.
139 |     - **SVR (Support Vector Regression)**: A regression model based on Support Vector Machines, useful for complex, non-linear relationships.
140 |     - **DecisionTreeRegressor**: A decision tree-based model, capturing non-linear patterns and interactions.
141 | 
142 | === "Train and Evaluate Each Model"
143 |     Each model is trained on the training data and used to make predictions on the testing set. The performance is evaluated using two metrics:
144 | 
145 |     - **Mean Absolute Error (MAE)**: Measures the average error between predicted and actual values.
146 |     - **R2 Score**: Represents the proportion of the variance in the target variable that is predictable from the features.
147 | 
148 |     The evaluation metrics for each model are stored for comparison.
149 | 
150 | === "Visualizing Model Evaluation Metrics"
151 |     We visualize the evaluation results for all models to get a comparative view of their performances. Two plots are generated:
152 | 
153 |     - **Mean Absolute Error (MAE)** for each model, showing how much deviation there is between predicted and actual values.
154 |     - **R2 Score**, depicting the models' ability to explain the variability in the target variable. Higher R2 values indicate a better fit.
155 | 
156 |     These visualizations make it easy to compare model performances and understand which model is performing the best.
157 | 
158 | === "Conclusion and Observations"
159 | 
160 |     - In this final step, we summarize the results and draw conclusions based on the evaluation metrics. We discuss which model achieved the best performance in terms of both MAE and R2 Score, along with insights from the data cleaning and feature engineering steps. 
161 |     - Key observations include the importance of feature selection, the efficacy of different models for regression tasks, and which model has the most accurate predictions based on the dataset at hand.
162 | 
163 | --- 
164 | 
165 | ### 🖥 CODE EXPLANATION 
166 | 
167 | 
168 | --- 
169 | 
170 | ### ⚖️ PROJECT TRADE-OFFS AND SOLUTIONS 
171 | 
172 | === "Trade Off 1"
173 |     - **Trade-off**: Choosing between model accuracy and training time.
174 |       - **Solution**: Random Forest was chosen due to its balance between accuracy and efficiency, with SVR considered for its powerful predictive power despite longer training time.
175 | 
176 | === "Trade Off 2"
177 |     - **Trade-off**: Model interpretability vs complexity.
178 |       - **Solution**: Decision trees were avoided in favor of Random Forest, which tends to be more robust in dealing with complex data and prevents overfitting.
179 | 
180 | ---
181 | 
182 | ## 🖼 SCREENSHOTS 
183 | 
184 | !!! tip "Visualizations and EDA of different features"
185 | 
186 |     === "HeatMap"
187 |         ![img](https://github.com/user-attachments/assets/e23421bf-7177-4281-9cb1-8a2c91659ac6)
188 | 
189 |     === "Model Comparison"
190 |         ![model-comparison](https://github.com/user-attachments/assets/d59c412d-0893-455b-93c4-de53ddf8d750)
191 | 
192 | --- 
193 | 
194 | ## 📉 MODELS USED AND THEIR EVALUATION METRICS 
195 | 
196 | | Model                    | Mean Absolute Error (MAE) | R2 Score |
197 | |--------------------------|---------------------------|----------|
198 | | Random Forest Regressor   | 1.2391                    | 0.885    |
199 | | Linear Regression         | 1.4592                    | 0.82     |
200 | | SVR                       | 1.3210                    | 0.843    |
201 | | Decision Tree Regressor   | 1.5138                    | 0.755    |
202 | 
203 | --- 
204 | 
205 | ## ✅ CONCLUSION 
206 | 
207 | ### 🔑 KEY LEARNINGS 
208 | 
209 | !!! tip "Insights gained from the data"
210 |     - Learned how different machine learning models perform on real-world data and gained insights into their strengths and weaknesses.
211 |     - Understood the significance of feature engineering and preprocessing to achieve better model performance.
212 |     - Data had missing values that required filling.
213 |     - Feature creation from datetime led to better prediction accuracy.
214 | 
215 | ??? tip "Improvements in understanding machine learning concepts"
216 |     - Learned how to effectively implement and optimize machine learning models using libraries like scikit-learn.
217 | 
218 | --- 
219 | 
220 | ### 🌍 USE CASES 
221 | 
222 | === "Application 1"
223 |     **Predicting Air Quality in Urban Areas**
224 | 
225 |     - Local governments can use this model to predict air pollution levels and take early actions to reduce pollution in cities.
226 | 
227 | === "Application 2"
228 |     **Predicting Seasonal Air Pollution Levels**
229 | 
230 |     - The model can help forecast air quality during different times of the year, assisting in long-term policy planning.
231 | 
232 | 


--------------------------------------------------------------------------------
/docs/machine-learning/autism-detection.md:
--------------------------------------------------------------------------------
  1 | # 🌟 Autism Spectrum Disorder (ASD) Detection using Machine Learning
  2 | 
  3 | <div align="center">
  4 |     <img src="https://github.com/user-attachments/assets/62cc5129-b502-4164-849b-8f74da079ee3" />
  5 | </div>
  6 | 
  7 | ## 🎯 AIM
  8 | To develop a machine learning model that predicts the likelihood of Autism Spectrum Disorder (ASD) based on behavioral and demographic features.
  9 | 
 10 | ## 🌊 DATASET LINK
 11 | [Autism Screening Data](https://www.kaggle.com/code/konikarani/autism-prediction/data)  
 12 | 
 13 | ## 📚 KAGGLE NOTEBOOK
 14 | [Autism Detection Kaggle Notebook](https://www.kaggle.com/code/thatarguy/autism-prediction-using-ml?kernelSessionId=224830771)
 15 | 
 16 | ??? Abstract "Kaggle Notebook"
 17 | 
 18 |     <iframe src="https://www.kaggle.com/embed/thatarguy/autism-prediction-using-ml?kernelSessionId=224830771" height="800" style="margin: 0 auto; width: 100%; max-width: 950px;" frameborder="0" scrolling="auto" title="autism prediction using ml"></iframe>
 19 | ## ⚙️ TECH STACK
 20 | 
 21 | | **Category**             | **Technologies**                            |
 22 | |--------------------------|---------------------------------------------|
 23 | | **Languages**            | Python                                     |
 24 | | **Libraries/Frameworks** | Pandas, NumPy, Scikit-learn,      |
 25 | | **Tools**                | Jupyter Notebook, VS Code                  |
 26 | 
 27 | ---
 28 | 
 29 | ## 🖍 DESCRIPTION
 30 | !!! info "What is the requirement of the project?"
 31 |     - The rise in Autism cases necessitates early detection.
 32 |     - Traditional diagnostic methods are time-consuming and expensive.
 33 |     - Machine learning can provide quick, accurate predictions to aid early intervention.
 34 | 
 35 | ??? info "How is it beneficial and used?"
 36 |     - Helps doctors and researchers identify ASD tendencies early.
 37 |     - Reduces the time taken for ASD screening.
 38 |     - Provides a scalable and cost-effective approach.
 39 | 
 40 | ??? info "How did you start approaching this project? (Initial thoughts and planning)"
 41 |     - Collected and preprocessed the dataset.
 42 |     - Explored different ML models for classification.
 43 |     - Evaluated models based on accuracy and efficiency.
 44 | 
 45 | 
 46 | ---
 47 | 
 48 | ## 🔍 PROJECT EXPLANATION
 49 | 
 50 | ### 🧩 DATASET OVERVIEW & FEATURE DETAILS
 51 | The dataset consists of **800 rows** and **22 columns**, containing information related to autism spectrum disorder (ASD) detection based on various parameters.
 52 | 
 53 | 
 54 | | **Feature Name**    | **Description**                                    | **Datatype** |
 55 | |---------------------|----------------------------------------------------|:-----------:|
 56 | | `ID`               | Unique identifier for each record                   | `int64`     |
 57 | | `A1_Score` - `A10_Score` | Responses to 10 screening questions (0 or 1) | `int64`     |
 58 | | `age`              | Age of the individual                               | `float64`   |
 59 | | `gender`           | Gender (`m` for male, `f` for female)               | `object`    |
 60 | | `ethnicity`        | Ethnic background                                  | `object`    |
 61 | | `jaundice`        | Whether the individual had jaundice at birth (`yes/no`) | `object`    |
 62 | | `austim`          | Family history of autism (`yes/no`)                 | `object`    |
 63 | | `contry_of_res`   | Country of residence                                | `object`    |
 64 | | `used_app_before` | Whether the individual used a screening app before (`yes/no`) | `object`    |
 65 | | `result`         | Score calculated based on the screening test        | `float64`   |
 66 | | `age_desc`       | Age description (e.g., "18 and more")               | `object`    |
 67 | | `relation`       | Relation of the person filling out the form          | `object`    |
 68 | | `Class/ASD`      | ASD diagnosis label (`1` for ASD, `0` for non-ASD)   | `int64`     |
 69 | 
 70 | This dataset provides essential features for training a model to detect ASD based on questionnaire responses and demographic information.
 71 | 
 72 | 
 73 | ---
 74 | 
 75 | ### 🛠 PROJECT WORKFLOW
 76 | !!! success "Project workflow"
 77 |     ``` mermaid
 78 |       graph LR
 79 |         A[Start] --> B[Data Preprocessing];
 80 |         B --> C[Feature Engineering];
 81 |         C --> D[Model Training];
 82 |         D --> E[Model Evaluation];
 83 |         E --> F[Deployment];
 84 |     ```
 85 | 
 86 | === "Step 1"
 87 |     - Collected dataset and performed exploratory data analysis.
 88 | 
 89 | === "Step 2"
 90 |     - Preprocessed data (handling missing values, encoding categorical data).
 91 | 
 92 | === "Step 3"
 93 |     - Feature selection and engineering.
 94 | 
 95 | === "Step 4"
 96 |     - Trained multiple classification models (Decision Tree, Random Forest, XGBoost).
 97 | 
 98 | === "Step 5"
 99 |     - Evaluated models using accuracy, precision, recall, and F1-score.
100 | 
101 | 
102 | ---
103 | 
104 | ### 🖥️ CODE EXPLANATION
105 | === "Section 1: Data Preprocessing"
106 |     - Loaded dataset and handled missing values.
107 | 
108 | === "Section 2: Model Training"
109 |     - Implemented Logistic Regression and Neural Networks for classification.
110 | 
111 | ---
112 | 
113 | ### ⚖️ PROJECT TRADE-OFFS AND SOLUTIONS
114 | === "Trade Off 1"
115 |     - **Accuracy vs. Model Interpretability**: Used a Random Forest model instead of a deep neural network for better interpretability.
116 | 
117 | === "Trade Off 2"
118 |     - **Speed vs. Accuracy**: Chose Logistic Regression for quick predictions in real-time applications.
119 | 
120 | ---
121 | 
122 | ## 🖼 SCREENSHOTS
123 | !!! tip "Visualizations and EDA of different features"
124 | 
125 |     === "Age Distribution"
126 |         ![img](https://github.com/user-attachments/assets/412aa82d-0f7a-4c7a-bdca-30a553de36b4)
127 | 
128 | ??? example "Model performance graphs"
129 | 
130 |     === "Confusion Matrix"
131 |         ![img](https://github.com/user-attachments/assets/71c5773c-fe1f-42bb-ab76-e1150f564507)
132 | 
133 | ??? example "Features Correlation"
134 | 
135 |     === "Feature Correlation Heatmap"
136 |         ![img](https://github.com/user-attachments/assets/60d24749-2f2e-4222-9895-c46c29ea596e)
137 | 
138 | 
139 | ---
140 | 
141 | ## 📉 MODELS USED AND THEIR EVALUATION METRICS
142 | |    Model   | Accuracy | Precision | Recall | F1-score |
143 | |------------|----------|-----------|--------|----------|
144 | | Decision Tree | 73%   | 0.71 | 0.73 | 0.72 |
145 | | Random Forest | 82%   | 0.82 | 0.82 | 0.82 |
146 | | XGBoost      | 81%   | 0.81 | 0.81 | 081 |
147 | 
148 | ---
149 | 
150 | ## ✅ CONCLUSION
151 | ### 🔑 KEY LEARNINGS
152 | !!! tip "Insights gained from the data"
153 |     - Behavioral screening scores are the strongest predictors of ASD.
154 |     - Family history and neonatal jaundice also show correlations with ASD diagnosis.
155 | 
156 | ??? tip "Improvements in understanding machine learning concepts"
157 |     - Feature selection and engineering play a crucial role in medical predictions.
158 |     - Trade-offs between accuracy, interpretability, and computational efficiency need to be balanced.
159 | 
160 | ---
161 | 
162 | ### 🌍 USE CASES
163 | === "Early ASD Screening"
164 |     - Helps parents and doctors identify ASD tendencies at an early stage.
165 | 
166 | === "Assistive Diagnostic Tool"
167 |     - Can support psychologists in preliminary ASD assessments before clinical diagnosis.
168 | 
169 | 
170 | 


--------------------------------------------------------------------------------
/docs/machine-learning/bulldozer-price-prediction.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # 📜 Project Title:  Bulldozer-Price-Prediction-using-ML
  3 | 
  4 | ## 🎯 AIM 
  5 | This project aims to predict the auction prices of bulldozers using machine learning techniques. The dataset used for this project comes from the Kaggle competition "Blue Book for Bulldozers," which provides historical data on bulldozer sales.
  6 | 
  7 | ## 📊 DATASET LINK 
  8 | <!-- Attach the link of the Dataset. If no, Mention "NOT USED" -->
  9 | [Kaggle Blue Book for Bulldozers](https://www.kaggle.com/c/bluebook-for-bulldozers/data)
 10 | 
 11 | 
 12 | ## 📓 KAGGLE NOTEBOOK 
 13 | <!-- Attach both links Kaggle URL/ Embed URL public notebook link. -->
 14 | [Kaggle Notebook](https://www.kaggle.com/code/nandagopald2004/bulldozer-price-prediction-using-ml)
 15 | 
 16 | 
 17 | ## ⚙️ TECH STACK 
 18 | 
 19 | | **Category**             | **Technologies**                            |
 20 | |--------------------------|---------------------------------------------|
 21 | | **Languages**            | Python                                      |
 22 | | **Libraries/Frameworks** | Scikit Learn,Numpy,Pandas,Matplotlib        |
 23 | 
 24 | 
 25 | 
 26 | --- 
 27 | 
 28 | ## 📝 DESCRIPTION 
 29 | 
 30 | Requirement of the Project
 31 | 
 32 | The project aims to predict the price of used bulldozers based on various factors such as equipment type, usage hours, manufacturing year, and other relevant parameters. The goal is to develop an accurate pricing model using Machine Learning (ML) techniques.
 33 | 
 34 | Why is it Necessary?
 35 | 
 36 | The construction and heavy machinery industry heavily relies on the resale of used equipment. Incorrect pricing can lead to financial losses for sellers or overpriced purchases for buyers. A data-driven approach helps ensure fair pricing, improving efficiency in the marketplace.
 37 | 
 38 | How is it Beneficial and Used?
 39 | 
 40 | 1. Helps businesses and individuals estimate bulldozer prices before buying or selling.
 41 | 
 42 | 2. Assists construction companies in budgeting for equipment procurement.
 43 | 
 44 | 3. Enables auction houses and dealerships to set competitive and data-backed prices.
 45 | 
 46 | 4. Reduces reliance on manual estimation, making pricing more transparent and objective.
 47 | 
 48 | Approach to the Project
 49 | 
 50 | **Data Collection** – Gathered historical sales data of bulldozers, including features like sale date, equipment age, and location.
 51 | 
 52 | **Data Preprocessing** – Cleaned missing values, handled categorical variables, and transformed data for ML models.
 53 | 
 54 | **Exploratory Data Analysis (EDA)** – Identified key factors influencing bulldozer prices.
 55 | 
 56 | **Model Selection & Training** – Implemented and evaluated various ML models such as Random Forest, Gradient Boosting, and Linear Regression.
 57 | 
 58 | **Evaluation & Optimization** – Tuned hyperparameters and tested model performance using metrics like RMSE (Root Mean Squared Error).
 59 | 
 60 | **Deployment** – Integrated the trained model into a user-friendly interface for real-world use.
 61 | 
 62 | This project ensures a more systematic and accurate approach to bulldozer pricing, leveraging ML to enhance decision-making in the heavy equipment industry.
 63 | 
 64 | 
 65 | --- 
 66 | 
 67 | ## 🔍 PROJECT EXPLANATION 
 68 | 
 69 | ### 🧩 DATASET OVERVIEW & FEATURE DETAILS 
 70 | 
 71 | 
 72 | **SalesID**	  unique identifier of a particular sale of a machine at auction
 73 | 
 74 | **MachineID**	  identifier for a particular machine;  machines may have multiple sales
 75 | 
 76 | **ModelID**	  identifier for a unique machine model (i.e. fiModelDesc)
 77 | 
 78 | **datasource**	  source of the sale record;  some sources are more diligent about reporting attributes of the machine than others.  Note that a particular datasource may report on multiple auctioneerIDs.
 79 | 
 80 | **auctioneerID**	  identifier of a particular auctioneer, i.e. company that sold the machine at auction.  Not the same as datasource.
 81 | 
 82 | **YearMade**	  year of manufacturer of the Machine
 83 | 
 84 | **MachineHoursCurrentMeter**	  current usage of the machine in hours at time of sale (saledate);  null or 0 means no hours have been reported for that sale
 85 | 
 86 | **UsageBand**	  value (low, medium, high) calculated comparing this particular Machine-Sale hours to average usage for the fiBaseModel;  e.g. 'Low' means this machine has less hours given it's lifespan relative to average of fiBaseModel.
 87 | 
 88 | **Saledate**	  time of sale
 89 | 
 90 | **Saleprice**	  cost of sale in USD
 91 | 
 92 | **fiModelDesc**	  Description of a unique machine model (see ModelID); concatenation of fiBaseModel & fiSecondaryDesc & fiModelSeries & fiModelDescriptor
 93 | 
 94 | **fiBaseModel**	  disaggregation of fiModelDesc
 95 | 
 96 | **fiSecondaryDesc**	  disaggregation of fiModelDesc
 97 | 
 98 | **fiModelSeries**	  disaggregation of fiModelDesc
 99 | 
100 | **fiModelDescriptor**	  disaggregation of fiModelDesc
101 | 
102 | **ProductSize**	  Don't know what this is 
103 | 
104 | **ProductClassDesc**	  description of 2nd level hierarchical grouping (below ProductGroup) of fiModelDesc
105 | 
106 | **State**	  US State in which sale occurred
107 | 
108 | **ProductGroup**	  identifier for top-level hierarchical grouping of fiModelDesc
109 | 
110 | **ProductGroupDesc**	  description of top-level hierarchical grouping of fiModelDesc
111 | 
112 | **Drive_System	machine configuration**;  typcially describes whether 2 or 4 wheel drive
113 | 
114 | **Enclosure	machine configuration** - does machine have an enclosed cab or not
115 | 
116 | **Forks	machine configuration** - attachment used for lifting
117 | 
118 | **Pad_Type	machine configuration** - type of treads a crawler machine uses
119 | 
120 | **Ride_Control	machine configuration** - optional feature on loaders to make the ride smoother
121 | 
122 | **Stick	machine configuration** - type of control 
123 | 
124 | **Transmission	machine configuration** - describes type of transmission;  typically automatic or manual
125 | 
126 | **Turbocharged	machine configuration** - engine naturally aspirated or turbocharged
127 | 
128 | **Blade_Extension	machine configuration** - extension of standard blade
129 | 
130 | **Blade_Width	machine configuration** - width of blade
131 | 
132 | **Enclosure_Type	machine configuration** - does machine have an enclosed cab or not
133 | 
134 | **Engine_Horsepower	machine configuration** - engine horsepower rating
135 | 
136 | **Hydraulics	machine configuration** - type of hydraulics
137 | 
138 | **Pushblock	machine configuration** - option
139 | 
140 | **Ripper	machine configuration** - implement attached to machine to till soil
141 | 
142 | **Scarifier	machine configuration** - implement attached to machine to condition soil
143 | 
144 | **Tip_control	machine configuration** - type of blade control
145 | 
146 | **Tire_Size	machine configuration** - size of primary tires
147 | 
148 | **Coupler	machine configuration** - type of implement interface
149 | 
150 | **Coupler_System	machine configuration** - type of implement interface
151 | 
152 | **Grouser_Tracks	machine configuration** - describes ground contact interface
153 | 
154 | **Hydraulics_Flow	machine configuration** - normal or high flow hydraulic system
155 | 
156 | **Track_Type	machine configuration** - type of treads a crawler machine uses
157 | 
158 | **Undercarriage_Pad_Width	machine configuration** - width of crawler treads
159 | 
160 | **Stick_Length	machine configuration** - length of machine digging implement
161 | 
162 | **Thumb	machine configuration** - attachment used for grabbing
163 | 
164 | **Pattern_Changer	machine configuration** - can adjust the operator control configuration to suit the user
165 | 
166 | **Grouser_Type	machine configuration** - type of treads a crawler machine uses
167 | 
168 | **Backhoe_Mounting	machine configuration** - optional interface used to add a backhoe attachment
169 | 
170 | **Blade_Type	machine configuration** - describes type of blade
171 | 
172 | **Travel_Controls	machine configuration** - describes operator control configuration
173 | 
174 | **Differential_Type	machine configuration** - differential type, typically locking or standard
175 | 
176 | **Steering_Controls	machine configuration** - describes operator control configuration
177 | 
178 | ### 🛤 PROJECT WORKFLOW 
179 | The following steps are followed in building the machine learning model:
180 | 1. **Data Preprocessing**
181 |    - Handling missing values
182 |    - Feature engineering
183 |    - Encoding categorical variables
184 |    
185 | 2. **Exploratory Data Analysis (EDA)**
186 |    - Identifying trends and relationships
187 |    - Visualizing key insights
188 |    
189 | 3. **Model Selection and Training**
190 |    - Random Forest Regressor
191 |    - Hyperparameter tuning using RandomizedSearchCV
192 | 
193 | 4. **Model Evaluation**
194 |    - Root Mean Squared Log Error (RMSLE)
195 |    - R² Score
196 | 
197 | 
198 | 
199 | ## 📉 MODELS USED AND THEIR EVALUATION METRICS 
200 | <!-- Summarize the models used and their evaluation metrics in a table. -->
201 | 
202 | |    Model   | Accuracy |  MSE  | R2 Score |
203 | |------------|----------|-------|----------|
204 | | RandomForestRegressor |    95%   | 0.022 |   0.832588403039663   |
205 | 
206 | --- 
207 | 
208 | ## ✅ CONCLUSION 
209 | 
210 | The Bulldozer Price Prediction using ML project successfully demonstrates the power of machine learning in estimating the resale price of used bulldozers. By leveraging historical sales data and applying predictive modeling techniques, the project provides a data-driven approach to price estimation, reducing uncertainty and improving decision-making in the heavy equipment market. The final model helps sellers, buyers, and auction houses determine fair market prices, making the process more transparent and efficient.
211 | 
212 | ## Key Learnings
213 | 
214 | 1. **Data Quality Matters** – Handling missing values, feature engineering, and proper data preprocessing significantly impact model performance.
215 | 
216 | 2. **Feature Importance** – Certain factors, such as equipment age, sale date, and operational hours, play a crucial role in price prediction.
217 | 
218 | 3. **Model Selection & Tuning** – Experimenting with different machine learning models (Random Forest, Gradient Boosting, etc.) and optimizing hyperparameters enhances prediction accuracy.
219 | 
220 | 4. **Evaluation Metrics** – Understanding and applying RMSE and other performance metrics helps assess and improve model reliability.
221 | 
222 | 5. **Real-World Deployment** – Preparing a model for deployment requires considering scalability, usability, and integration with business applications.
223 | --- 
224 | 


--------------------------------------------------------------------------------
/docs/machine-learning/crop-recommendation.md:
--------------------------------------------------------------------------------
  1 | # Crop Recommendation Model 
  2 | 
  3 | <div align='center'>
  4 |    <img src='https://glu.global/wp-content/uploads/2022/04/AgriTech.jpg' alt='' />
  5 | </div>
  6 | 
  7 | ## 🎯 AIM 
  8 | 
  9 | It is an AI-powered Crop Recommendation System that helps farmers and agricultural stakeholders determine the most suitable crops for cultivation based on environmental conditions. The system uses machine learning models integrated with Flask to analyze key parameters and suggest the best crop to grow in a given region.
 10 | 
 11 | ## 📊 DATASET LINK 
 12 | 
 13 | [https://www.kaggle.com/datasets/atharvaingle/crop-recommendation-dataset/data](https://www.kaggle.com/datasets/atharvaingle/crop-recommendation-dataset/data)
 14 | 
 15 | ## 📓 NOTEBOOK 
 16 | 
 17 | [https://www.kaggle.com/code/kashishkhurana1204/crop-recommendation-system](https://www.kaggle.com/code/kashishkhurana1204/crop-recommendation-system)
 18 | 
 19 | ??? Abstract "Kaggle Notebook"
 20 | 
 21 |     <iframe 
 22 |         src="https://www.kaggle.com/embed/kashishkhurana1204/crop-recommendation-system?kernelSessionId=222810700" 
 23 |         height="600" 
 24 |         style="margin: 0 auto; width: 100%; max-width: 950px;" 
 25 |         frameborder="0" 
 26 |         scrolling="auto" 
 27 |         title="Crop Recommendation System">
 28 |     </iframe>
 29 | 
 30 | ## ⚙️ TECH STACK
 31 | 
 32 | | **Category**             | **Technologies**                        |
 33 | |--------------------------|-----------------------------------------|
 34 | | **Languages**            | Python                                  |
 35 | | **Libraries/Frameworks** | Pandas, Numpy, Matplotlib, Scikit-learn |
 36 | | **Tools**                | Github, Jupyter, VS Code                |
 37 | 
 38 | --- 
 39 | 
 40 | ## 📝 DESCRIPTION 
 41 | 
 42 | 
 43 | !!! info "What is the requirement of the project?"
 44 |     - To provide accurate crop recommendations based on environmental conditions.
 45 |     - To assist farmers in maximizing yield and efficiency.
 46 | 
 47 | ??? info "How is it beneficial and used?"
 48 |     - Helps in optimizing agricultural planning.
 49 |     - Reduces trial-and-error farming practices.
 50 |     
 51 | 
 52 | ??? info "How did you start approaching this project? (Initial thoughts and planning)"
 53 |     - Initial thoughts : The goal is to help farmers determine the most suitable crops based on their field’s environmental conditions.
 54 | 
 55 |     - Dataset Selection : I searched for relevant datasets on Kaggle that include soil properties, weather conditions, and nutrient levels such as nitrogen (N), phosphorus (P), and potassium (K).
 56 | 
 57 |     - Initial Data Exploration : I analyzed the dataset structure to understand key attributes like soil pH, humidity, rainfall, and nutrient values, which directly impact crop suitability.
 58 | 
 59 |     - Feature Analysis : Studied how different environmental factors influence crop growth and identified the most significant parameters for prediction.
 60 | 
 61 |     - Model Selection & Implementation : Researched various ML models and implemented algorithms like Naïve Bayes, Decision Trees, and Random Forest to predict the best-suited crops.
 62 |     
 63 | ??? info "Mention any additional resources used (blogs, books, chapters, articles, research papers, etc.)."
 64 |     - [https://www.kaggle.com/datasets/atharvaingle/crop-recommendation-dataset/data](https://www.kaggle.com/datasets/atharvaingle/crop-recommendation-dataset/data)
 65 |     
 66 | 
 67 | --- 
 68 | 
 69 | ## 🔍 EXPLANATION 
 70 | 
 71 | ### DATASET OVERVIEW & FEATURE DETAILS
 72 | 
 73 | 📂 dataset.csv
 74 | | **Feature**| **Description** | **Data Type**  |
 75 | |------------|-----------------|----------------|
 76 | | Soil_pH    | Soil pH level   | float          |
 77 | | Humidity   | Humidity level  | float          |
 78 | | Rainfall   | Rainfall amount | float          |
 79 | | N          | Nitrogen level  | int64          |
 80 | | P          | Phosphorus level| int64          |
 81 | | K          | Potassium level | int64          |
 82 | |Temperature | Temperature     | float          |
 83 | | crop       | Recommended crop| categorical    |
 84 | 
 85 | 
 86 | 
 87 | ### 🛤 PROJECT WORKFLOW 
 88 | 
 89 | ```mermaid
 90 |   graph 
 91 |   Start -->|No| End;
 92 |   Start -->|Yes| Import_Libraries --> Load_Dataset --> Data_Cleaning --> Feature_Selection --> Train_Test_Split --> Define_Models;
 93 |   Define_Models --> Train_Models --> Evaluate_Models --> Save_Best_Model --> Develop_Flask_API --> Deploy_Application --> Conclusion;
 94 |   Deploy_Application -->|Error?| Debug --> Yay!;
 95 | 
 96 | ```
 97 | 
 98 | 
 99 | === "Import Necessary Libraries"
100 |     - First, we import all the essential libraries needed for handling, analyzing, and modeling the dataset. 
101 |     - This includes libraries like Pandas for data manipulation, Numpy for numerical computations, Matplotlib and Seaborn for data visualization, and Scikit-learn for machine learning models, evaluation, and data preprocessing. 
102 |     - These libraries will enable us to perform all required tasks efficiently.
103 | 
104 | === "Load Dataset"
105 |     - We load the dataset using Pandas `read_csv()` function. The dataset contains crop data, which is loaded with a semicolon delimiter. 
106 |     - After loading, we inspect the first few rows to understand the structure of the data and ensure that the dataset is correctly loaded.
107 | 
108 | === "Data Cleaning Process"
109 |     Data cleaning is a crucial step in any project. In this step:
110 | 
111 |     - Handle missing values, remove duplicates, and ensure data consistency.
112 |     - Convert categorical variables if necessary and normalize numerical values.
113 | 
114 | === "Visualizing Correlations Between Features"
115 | 
116 |     - Use heatmaps and scatter plots to understand relationships between features and how they impact crop recommendations.
117 | 
118 | === "Data Preparation - Features (X) and Target (y)"
119 |     
120 |     - Separate independent variables (environmental parameters) and the target variable (recommended crop).
121 | 
122 | === "Split the Data into Training and Test Sets"
123 | 
124 |     - Use train_test_split() from Scikit-learn to divide data into training and testing sets, ensuring model generalization. 
125 | 
126 | === "Define Models"
127 |     We define multiple regression models to train and evaluate on the dataset:
128 | 
129 |     - **RandomForestRegressor**: A robust ensemble method that performs well on non-linear datasets.
130 |     - **Naive Bayes**: A probabilistic classifier based on Bayes' theorem, which assumes independence between features and is effective for classification tasks.
131 |     - **DecisionTreeRegressor**: A decision tree-based model, capturing non-linear patterns and interactions.
132 | 
133 | === "Train and Evaluate Each Model"
134 | 
135 |     - Fit models using training data and evaluate performance using accuracy, precision, recall, and F1-score metrics.
136 | 
137 | === "Visualizing Model Evaluation Metrics"
138 | 
139 |     - Use confusion matrices, precision-recall curves, and ROC curves to assess model performance.
140 | 
141 | == "Conclusion and Observations"
142 | 
143 |     **Best-Performing Models and Insights Gained:**
144 | 
145 |     - The Random Forest model provided the highest accuracy and robustness in predictions.
146 | 
147 |     - Decision Tree performed well but was prone to overfitting on training data.
148 | 
149 |     - Naïve Bayes, though simple, showed competitive performance for certain crop categories.
150 | 
151 |     - Feature importance analysis revealed that soil pH and nitrogen levels had the most significant impact on crop recommendation.
152 | 
153 |     **Potential Improvements and Future Enhancements:**
154 | 
155 |     - Implement deep learning models for better feature extraction and prediction accuracy.
156 | 
157 |     - Expand the dataset by incorporating satellite and real-time sensor data.
158 | 
159 |     - Integrate weather forecasting models to enhance crop suitability predictions.
160 | 
161 |     - Develop a mobile-friendly UI for better accessibility to farmers.
162 | 
163 | --- 
164 | 
165 | ### 🖥 CODE EXPLANATION 
166 | 
167 | === "Code to compute F1-score, Precision, and Recall"
168 | 
169 |     ```py
170 |     from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
171 | 
172 |     # Initialize a dictionary to store model scores
173 |     model_scores = {}
174 | 
175 |     # Iterate through each model and compute evaluation metrics
176 |     for name, model in models.items():
177 |         print(f"Evaluating {name}...")
178 |     
179 |         # Train the model
180 |         model.fit(x_train, y_train)
181 |     
182 |         # Predict on the test set
183 |         y_pred = model.predict(x_test)
184 |     
185 |         # Compute metrics
186 |         precision = precision_score(y_test, y_pred, average='weighted')
187 |         recall = recall_score(y_test, y_pred, average='weighted')
188 |         f1 = f1_score(y_test, y_pred, average='weighted')
189 |     
190 |         # Store results
191 |         model_scores[name] = {
192 |             'Precision': precision,
193 |             'Recall': recall,
194 |             'F1 Score': f1
195 |         }
196 |     
197 |         # Print results for each model
198 |         print(f"Precision: {precision:.4f}")
199 |         print(f"Recall: {recall:.4f}")
200 |         print(f"F1 Score: {f1:.4f}")
201 |         print("\nClassification Report:\n")
202 |         print(classification_report(y_test, y_pred))
203 |         print("-" * 50)
204 | 
205 |     # Print a summary of all model scores
206 |     print("\nSummary of Model Performance:\n")
207 |     for name, scores in model_scores.items():
208 |         print(f"{name}: Precision={scores['Precision']:.4f}, Recall={scores['Recall']:.4f}, F1 Score={scores['F1 Score']:.4f}")
209 | 
210 |     ```
211 | 
212 |     - This code evaluates multiple machine learning models and displays performance metrics such as Precision, Recall, F1 Score, and a Classification Report for each model.
213 | 
214 | --- 
215 | 
216 | ### ⚖️ PROJECT TRADE-OFFS AND SOLUTIONS 
217 | 
218 | === "Trade Off 1"
219 |     - **Trade-off**: Accuracy vs. Computational Efficiency
220 |       - **Solution**:  Optimized hyperparameters and used efficient algorithms.
221 | 
222 | === "Trade Off 2"
223 |     - **Trade-off**: Model interpretability vs complexity.
224 |       - **Solution**: Selected models balancing accuracy and interpretability.
225 | 
226 | ---
227 | 
228 | ## 🖼 SCREENSHOTS 
229 | 
230 | !!! tip "Visualizations of different features"
231 | 
232 |     === "HeatMap"
233 |         ![img](https://github.com/Kashishkh/FarmSmart/blob/main/Screenshot%202025-02-04%20195349.png)
234 | 
235 |     === "Model Comparison"
236 |         ![model-comparison](https://github.com/Kashishkh/FarmSmart/blob/main/Screenshot%202025-02-05%20011859.png)
237 | 
238 |     
239 | --- 
240 | 
241 | ## 📉 MODELS USED AND THEIR EVALUATION METRICS 
242 | 
243 | | Model                     | Accuracy | Precision | Recall |F1-score|
244 | |---------------------------|----------|-----------|--------|--------|
245 | | Naive Bayes               | 99.54%   | 99.58%    | 99.55% | 99.54% |
246 | | Random Forest Regressor   | 99.31%   | 99.37%    | 99.32% | 99.32% |
247 | | Decision Tree Regressor   | 98.63%   | 98.68%    | 98.64% | 98.63% |
248 | 
249 | --- 
250 | 
251 | ## ✅ CONCLUSION 
252 | 
253 | ### 🔑 KEY LEARNINGS 
254 | 
255 | !!! tip "Insights gained from the data"
256 |     - Soil conditions play a crucial role in crop recommendation.
257 |     - Environmental factors significantly impact crop yield.
258 | 
259 | ??? tip "Improvements in understanding machine learning concepts"
260 |     - Feature engineering and hyperparameter tuning.
261 |     - Deployment of ML models in real-world applications.
262 | 
263 | --- 
264 | 
265 | ### 🌍 USE CASES 
266 | 
267 | === "Application 1"
268 |     **Application of FarmSmart in precision farming.**
269 | 
270 |     - FarmSmart helps optimize resource allocation, enabling farmers to make data-driven decisions for sustainable and profitable crop production.
271 |     [https://github.com/Kashishkh/FarmSmart](https://github.com/Kashishkh/FarmSmart)
272 | 
273 | === "Application 2"
274 |     **Use in government agricultural advisory services.**
275 | 
276 |     - Government agencies can use FarmSmart to provide region-specific crop recommendations, improving food security and agricultural productivity through AI-driven insights.
277 | 
278 | 
279 | 
280 | 


--------------------------------------------------------------------------------
/docs/machine-learning/heart-disease-detection-model.md:
--------------------------------------------------------------------------------
  1 | # Heart Disease Detection Model
  2 | 
  3 | ### AIM 
  4 | The aim of this project is to develop a reliable and efficient machine learning-based system for the early detection and diagnosis of heart disease. By leveraging advanced algorithms, the system seeks to analyze patient data, identify significant patterns, and predict the likelihood of heart disease, thereby assisting healthcare professionals in making informed decisions.
  5 | 
  6 | 
  7 | ### DATASET LINK 
  8 | This project uses a publicly available heart disease dataset from [UCI Machine Learning Repository](https://archive.ics.uci.edu/dataset/45/heart+disease)
  9 | 
 10 | 
 11 | ### NOTEBOOK LINK 
 12 | This is notebook of the following project [Kaggle](https://www.kaggle.com/code/nandagopald2004/heart-disease-detection-using-ml)
 13 | 
 14 | 
 15 | ### LIBRARIES NEEDED
 16 | 
 17 | 
 18 |     - pandas
 19 |     - numpy
 20 |     - scikit-learn
 21 |     - matplotlib
 22 |     - seaborn
 23 | 
 24 | --- 
 25 | 
 26 | ### DESCRIPTION 
 27 | 
 28 | what is the requirement of the project?, 
 29 | The project requires a dataset containing patient health records, including attributes like age, cholesterol levels, blood pressure, and medical history. Additionally, it needs machine learning tools and frameworks (e.g., Python, scikit-learn) for building and evaluating predictive models.
 30 | 
 31 | why is it necessary?, 
 32 | Early detection of heart disease is crucial to prevent severe complications and reduce mortality rates. A machine learning-based system provides accurate, fast, and cost-effective predictions, aiding timely medical intervention and improved patient outcomes.
 33 | 
 34 | how is it beneficial and used?, 
 35 | This system benefits healthcare by improving diagnostic accuracy and reducing reliance on invasive procedures. It can be used by doctors for decision support, by patients for risk assessment, and in hospitals for proactive healthcare management.
 36 | 
 37 | how did you start approaching this project?, 
 38 | The project begins by collecting and preprocessing a heart disease dataset, ensuring it is clean and ready for analysis. Next, machine learning models are selected, trained, and evaluated to identify the most accurate algorithm for predicting heart disease.
 39 | 
 40 | Any additional resources used like blogs reading, books reading (mention the name of book along with the pages you have read)?
 41 | Kaggle kernels and documentation for additional dataset understanding.
 42 | Tutorials on machine learning regression techniques, particularly for Random Forest, SVR, and Decision Trees.
 43 | 
 44 | ### EXPLANATION 
 45 | 
 46 | #### DETAILS OF THE DIFFERENT FEATURES 
 47 | <!-- Elaborate the features as mentioned in the issues, perfoming any googling to learn about the features -->
 48 | <!-- Describe the key features of the project, explaining each one in detail. -->
 49 | Age: Patient's age in years.
 50 | 
 51 | Sex: Gender of the patient (1 = male; 0 = female).
 52 | 
 53 | Chest Pain Type (cp): Categorized as:
 54 | 
 55 | 0: Typical angina
 56 | 1: Atypical angina
 57 | 2: Non-anginal pain
 58 | 3: Asymptomatic
 59 | Resting Blood Pressure (trestbps): Measured in mm Hg upon hospital admission.
 60 | 
 61 | Serum Cholesterol (chol): Measured in mg/dL.
 62 | 
 63 | Fasting Blood Sugar (fbs): Indicates if fasting blood sugar > 120 mg/dL (1 = true; 0 = false).
 64 | 
 65 | Resting Electrocardiographic Results (restecg):
 66 | 
 67 | 0: Normal
 68 | 1: Having ST-T wave abnormality (e.g., T wave inversions and/or ST elevation or depression > 0.05 mV)
 69 | 2: Showing probable or definite left ventricular hypertrophy by Estes' criteria
 70 | Maximum Heart Rate Achieved (thalach): Peak heart rate during exercise.
 71 | 
 72 | Exercise-Induced Angina (exang): Presence of angina induced by exercise (1 = yes; 0 = no).
 73 | 
 74 | Oldpeak: ST depression induced by exercise relative to rest.
 75 | 
 76 | Slope of the Peak Exercise ST Segment (slope):
 77 | 
 78 | 0: Upsloping
 79 | 1: Flat
 80 | 2: Downsloping
 81 | Number of Major Vessels Colored by Fluoroscopy (ca): Ranges from 0 to 3.
 82 | 
 83 | Thalassemia (thal):
 84 | 
 85 | 1: Normal
 86 | 2: Fixed defect
 87 | 3: Reversible defect
 88 | Target: Diagnosis of heart disease (0 = no disease; 1 = disease).
 89 | 
 90 | --- 
 91 | 
 92 | #### PROJECT WORKFLOW 
 93 | 
 94 | ### 1.Problem Definition
 95 | 
 96 | Identify the objective: To predict the presence or absence of heart disease based on patient data.
 97 | Define the outcome variable (target) and input features.
 98 | 
 99 | ### 2.Data Collection
100 | 
101 | Gather a reliable dataset, such as the Cleveland Heart Disease dataset, which includes features relevant to heart disease prediction.
102 | 
103 | ### 3.Data Preprocessing
104 | 
105 | Handle missing values: Fill or remove records with missing data.
106 | Normalize/standardize data to ensure all features have comparable scales.
107 | Encode categorical variables like sex, cp, and thal using techniques like one-hot encoding or label encoding.
108 | 
109 | ### 4.Exploratory Data Analysis (EDA)
110 | 
111 | Visualize data distributions using histograms, boxplots, or density plots.
112 | Identify relationships between features using correlation matrices and scatterplots.
113 | Detect and handle outliers to improve model performance.
114 | 
115 | ### 5.Feature Selection
116 | 
117 | Use statistical methods or feature importance metrics to identify the most relevant features for prediction.
118 | Remove redundant or less significant features.
119 | 
120 | ### 6.Data Splitting
121 | 
122 | Divide the dataset into training, validation, and testing sets (e.g., 70%-15%-15%).
123 | Ensure a balanced distribution of the target variable in all splits.
124 | 
125 | ### 7.Model Selection
126 | 
127 | Experiment with multiple machine learning algorithms such as Logistic Regression, Random Forest, Decision Trees, Support Vector Machines (SVM), and Neural Networks.
128 | Select models based on the complexity and nature of the dataset.
129 | 
130 | ### 8.Model Training
131 | 
132 | Train the chosen models using the training dataset.
133 | Tune hyperparameters using grid search or random search techniques.
134 | 
135 | ### 9.Model Evaluation
136 | 
137 | Assess models on validation and testing datasets using metrics such as:
138 | Accuracy
139 | Precision, Recall, and F1-score
140 | Receiver Operating Characteristic (ROC) curve and Area Under the Curve (AUC).
141 | Compare models to identify the best-performing one.
142 | 
143 | ### 10.##Deployment and Prediction
144 | 
145 | Save the trained model using frameworks like joblib or pickle.
146 | Develop a user interface (UI) or API for end-users to input data and receive predictions.
147 | 
148 | ### 11.Iterative Improvement
149 | 
150 | Continuously refine the model using new data or advanced algorithms.
151 | Address feedback and optimize the system based on real-world performance.
152 | 
153 | 
154 | 
155 | #### PROJECT TRADE-OFFS AND SOLUTIONS 
156 | 
157 | 
158 | === "Trade Off 1"
159 |     - Accuracy vs. Interpretability
160 |       - Complex models like Random Forests or Neural Networks offer higher accuracy but are less interpretable compared to simpler models like Logistic Regression.
161 | === "Trade Off 2"
162 |     - Overfitting vs. Generalization
163 |       -  Models with high complexity may overfit the training data, leading to poor generalization on unseen data.
164 | --- 
165 | 
166 | 
167 | 
168 | ### MODELS USED AND THEIR EVALUATION METRICS 
169 | <!-- Summarize the models used and their evaluation metrics in a table. -->
170 | 
171 | |    Model   | Score | 
172 | |------------|----------|
173 | | Logistic regression |    88%   | 
174 | | K-Nearest Classifier |    68%   |
175 | | Random Forest Classifier |    86%   | 
176 | 
177 | --- 
178 | 
179 | ### CONCLUSION 
180 | 
181 | #### KEY LEARNINGS 
182 | 
183 | 
184 | 1. Data Insights
185 | Understanding Healthcare Data: Learned how medical attributes (e.g., age, cholesterol, chest pain type) influence heart disease risk.
186 | Data Imbalance: Recognized the challenges posed by imbalanced datasets and explored techniques like SMOTE and class weighting to address them.
187 | Importance of Preprocessing: Gained expertise in handling missing values, scaling data, and encoding categorical variables, which are crucial for model performance.
188 | 
189 | 2. Techniques Mastered
190 | Exploratory Data Analysis (EDA): Applied visualization tools (e.g., histograms, boxplots, heatmaps) to uncover patterns and correlations in data.
191 | Feature Engineering: Identified and prioritized key features using statistical methods and feature importance metrics.
192 | Modeling: Implemented various machine learning algorithms, including Logistic Regression, Random Forest, Gradient Boosting, and Support Vector Machines.
193 | Evaluation Metrics: Learned to evaluate models using metrics like Precision, Recall, F1-score, and ROC-AUC to optimize for healthcare-specific goals.
194 | Hyperparameter Tuning: Used grid search and random search to optimize model parameters and improve performance.
195 | Interpretability Tools: Utilized SHAP and feature importance analysis to explain model predictions.
196 | 
197 | 3. Skills Developed
198 | Problem-Solving: Addressed trade-offs such as accuracy vs. interpretability, and overfitting vs. generalization.
199 | Critical Thinking: Improved decision-making on model selection, preprocessing methods, and evaluation strategies.
200 | Programming: Strengthened Python programming skills, including the use of libraries like scikit-learn, pandas, matplotlib, and TensorFlow.
201 | Collaboration: Enhanced communication and teamwork when discussing medical insights and technical challenges with domain experts.
202 | Time Management: Balanced experimentation with computational efficiency, focusing on techniques that maximized impact.
203 | Ethical Considerations: Gained awareness of ethical issues like ensuring fairness in predictions and minimizing false negatives, which are critical in healthcare applications.
204 | 
205 | 4. Broader Understanding
206 | Interdisciplinary Knowledge: Combined expertise from data science, healthcare, and statistics to create a meaningful application.
207 | Real-World Challenges: Understood the complexities of translating machine learning models into practical tools for healthcare.
208 | Continuous Learning: Learned that model development is iterative, requiring continuous refinement based on feedback and new data. 
209 | 
210 | #### USE CASES
211 | 
212 | 
213 | === "Application 1"
214 | 
215 |     **Clinical Decision Support Systems (CDSS)**
216 |     
217 |       - ML models can be integrated into Electronic Health Record (EHR) systems to assist doctors in diagnosing heart disease. The model can provide predictions based on patient data, helping clinicians make faster and more accurate decisions.
218 | === "Application 2"
219 | 
220 |     **Early Screening and Risk Assessment**
221 |     
222 |       - Patients can undergo routine screening using a heart disease detection system to assess their risk level. The system can predict whether a patient is at high, moderate, or low risk, prompting early interventions or lifestyle changes.
223 | 
224 | 


--------------------------------------------------------------------------------
/docs/machine-learning/index.md:
--------------------------------------------------------------------------------
  1 | # Machine Learning 🤖 
  2 | 
  3 | <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 15px; padding: 10px;">
  4 | 
  5 |   <!-- Air Quality Prediction Model -->
  6 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
  7 |     <a href="air-quality-prediction" style="color: white; text-decoration: none; display: block;">
  8 |       <img src="https://storage.googleapis.com/kaggle-datasets-images/1057064/1777920/920b70651a7618bac205ea41a336d4df/dataset-cover.jpg" alt="Air Quality Prediction" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;" />
  9 |       <div style="padding: 0.8rem;">
 10 |         <h3 style="margin: 0; font-size: 18px;">Air Quality Prediction</h3>
 11 |         <p style="font-size: 14px; opacity: 0.8;">Predicting Air Quality with Precision, One Sensor at a Time!</p>
 12 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-26 | ⏱️ 9 mins</p>
 13 |       </div>
 14 |     </a>
 15 |   </figure>
 16 | 
 17 |   <!-- Poker Hand Prediction -->
 18 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
 19 |     <a href="poker-hand-prediction" style="color: white; text-decoration: none; display: block;">
 20 |       <img src="https://cdn.britannica.com/73/244173-050-13235B84/Royal-Flush-poker-card-game-gambling.jpg" alt="Poker Hand Prediction" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;" />
 21 |       <div style="padding: 0.8rem;">
 22 |         <h3 style="margin: 0; font-size: 18px;">Poker Hand Prediction</h3>
 23 |         <p style="font-size: 14px; opacity: 0.8;">Predicting Poker Hands Using Machine Learning</p>
 24 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-26 | ⏱️ 7 mins</p>
 25 |       </div>
 26 |     </a>
 27 |   </figure>
 28 | 
 29 |   <!-- Heart Disease Detection -->
 30 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
 31 |     <a href="heart-disease-detection-model" style="color: white; text-decoration: none; display: block;">
 32 |       <img src="https://img.freepik.com/free-photo/heart-rate-monitoring_53876-63324.jpg" alt="Heart Disease Detection" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;" />
 33 |       <div style="padding: 0.8rem;">
 34 |         <h3 style="margin: 0; font-size: 18px;">Heart Disease Detection</h3>
 35 |         <p style="font-size: 14px; opacity: 0.8;">Early Detection of Heart Disease Using ML</p>
 36 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-26 | ⏱️ 8 mins</p>
 37 |       </div>
 38 |     </a>
 39 |   </figure>
 40 | 
 41 |   <!-- Used Cars Price Prediction -->
 42 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
 43 |     <a href="used-cars-price-prediction" style="color: white; text-decoration: none; display: block;">
 44 |       <img src="https://img.freepik.com/free-photo/front-view-white-modern-car-isolated-white_114579-3744.jpg" alt="Used Cars Price Prediction" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;" />
 45 |       <div style="padding: 0.8rem;">
 46 |         <h3 style="margin: 0; font-size: 18px;">Used Cars Price Prediction</h3>
 47 |         <p style="font-size: 14px; opacity: 0.8;">Accurate Price Predictions for Used Vehicles</p>
 48 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-26 | ⏱️ 6 mins</p>
 49 |       </div>
 50 |     </a>
 51 |   </figure>
 52 | 
 53 |   <!-- Sleep Quality Prediction -->
 54 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
 55 |     <a href="sleep-quality-prediction" style="color: white; text-decoration: none; display: block;">
 56 |       <img src="https://img.freepik.com/free-photo/young-woman-sleeping-bed_23-2148943747.jpg" alt="Sleep Quality Prediction" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;" />
 57 |       <div style="padding: 0.8rem;">
 58 |         <h3 style="margin: 0; font-size: 18px;">Sleep Quality Prediction</h3>
 59 |         <p style="font-size: 14px; opacity: 0.8;">Predicting Sleep Quality Based on Lifestyle</p>
 60 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-26 | ⏱️ 5 mins</p>
 61 |       </div>
 62 |     </a>
 63 |   </figure>
 64 | 
 65 |   <!-- Health Insurance Cross-Sell Prediction -->
 66 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
 67 |     <a href="health-insurance-cross-sell-prediction" style="color: white; text-decoration: none; display: block;">
 68 |       <img src="https://img.freepik.com/free-photo/health-insurance-concept-still-life_23-2150376818.jpg" alt="Health Insurance Cross-Sell" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;" />
 69 |       <div style="padding: 0.8rem;">
 70 |         <h3 style="margin: 0; font-size: 18px;">Insurance Cross-Sell Prediction</h3>
 71 |         <p style="font-size: 14px; opacity: 0.8;">Predicting Vehicle Insurance Cross-Sell Opportunities</p>
 72 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-26 | ⏱️ 7 mins</p>
 73 |       </div>
 74 |     </a>
 75 |   </figure>
 76 | 
 77 |   <!-- Cardiovascular Disease Prediction -->
 78 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
 79 |     <a href="cardiovascular-disease-prediction" style="color: white; text-decoration: none; display: block;">
 80 |       <img src="https://img.freepik.com/free-photo/heart-rate-monitoring-screen_53876-64671.jpg" alt="Cardiovascular Disease Prediction" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;" />
 81 |       <div style="padding: 0.8rem;">
 82 |         <h3 style="margin: 0; font-size: 18px;">Cardiovascular Disease Prediction</h3>
 83 |         <p style="font-size: 14px; opacity: 0.8;">Predicting Cardiovascular Disease Risk</p>
 84 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-26 | ⏱️ 8 mins</p>
 85 |       </div>
 86 |     </a>
 87 |   </figure>
 88 | 
 89 |   <!-- Crop Recommendation Model -->
 90 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
 91 |     <a href="crop-recommendation" style="color: white; text-decoration: none; display: block;">
 92 |       <img src="https://glu.global/wp-content/uploads/2022/04/AgriTech.jpg" alt="" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;" />
 93 |       <div style="padding: 0.8rem;">
 94 |         <h3 style="margin: 0; font-size: 18px;">Crop Recommendation Model</h3>
 95 |         <p style="font-size: 14px; opacity: 0.8;">Smart Farming: AI-Powered Crop Recommendations for Better Yields!</p>
 96 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-02-24 | ⏱️ 10 mins</p>
 97 |       </div>
 98 |     </a>
 99 |   </figure>
100 | <!-- autism detection -->
101 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
102 |     <a href="autism-detection" style="color: white; text-decoration: none; display: block;">
103 |       <img src="https://github.com/user-attachments/assets/2c1bdd07-f30a-4b1e-b0c5-74248ae0b700" alt="autism detcion using ml" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;" />
104 |       <div style="padding: 0.8rem;">
105 |         <h3 style="margin: 0; font-size: 18px;">Autism Detection</h3>
106 |         <p style="font-size: 14px; opacity: 0.8;">Predicting Autism Using Machine Learning</p>
107 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-02-26 | ⏱️ 8 mins</p>
108 |       </div>
109 |     </a>
110 |   </figure>
111 | 
112 | </div>
113 | 


--------------------------------------------------------------------------------
/docs/machine-learning/sleep-quality-prediction.md:
--------------------------------------------------------------------------------
  1 | <h1>Sleep Quality Prediction</h1>
  2 | 
  3 | <h2>AIM</h2>
  4 | <p>To predict sleep quality based on lifestyle and health factors.</p>
  5 | 
  6 | ---
  7 | 
  8 | <h2>DATASET LINK</h2>
  9 | <p>
 10 |     <a href="https://www.kaggle.com/datasets/uom190346a/sleep-health-and-lifestyle-dataset">
 11 |         Sleep Health and Lifestyle Dataset
 12 |     </a>
 13 | </p>
 14 | 
 15 | ---
 16 | 
 17 | <h2>DESCRIPTION</h2>
 18 | 
 19 | <h3>What is the requirement of the project?</h3>
 20 | <ul>
 21 |     <li>This project aims to predict the quality of sleep using various health and lifestyle metrics. Predicting sleep quality helps individuals and healthcare professionals address potential sleep-related health issues early.</li>
 22 | </ul>
 23 | 
 24 | <h3>Why is it necessary?</h3>
 25 | <ul>
 26 |     <li>Sleep quality significantly impacts physical and mental health. Early predictions can prevent chronic conditions linked to poor sleep, such as obesity, heart disease, and cognitive impairment.</li>
 27 | </ul>
 28 | 
 29 | <h3>How is it beneficial and used?</h3>
 30 | <ul>
 31 |     <li><strong>Individuals:</strong> Assess their sleep health and make lifestyle changes to improve sleep quality.</li>
 32 |     <li><strong>Healthcare Professionals:</strong> Use the model as an auxiliary diagnostic tool to recommend personalized interventions.</li>
 33 | </ul>
 34 | 
 35 | <h3>How did you start approaching this project? (Initial thoughts and planning)</h3>
 36 | <ul>
 37 |     <li>Researching sleep health factors and existing literature.</li>
 38 |     <li>Exploring and analyzing the dataset to understand feature distributions.</li>
 39 |     <li>Preprocessing data for effective feature representation.</li>
 40 |     <li>Iterating over machine learning models to find the optimal balance between accuracy and interpretability.</li>
 41 | </ul>
 42 | 
 43 | <h3>Mention any additional resources used</h3>
 44 | <ul>
 45 |     <li><strong>Research Paper:</strong> <a href="https://www.researchgate.net/publication/355188118_Long_Short_Term_Memory-based_Models_for_Sleep_Quality_Prediction_from_Wearable_Device_Time_Series_Data">Analyzing Sleep Patterns Using AI</a></li>
 46 |     <li><strong>Public Notebook:</strong> <a href="https://www.kaggle.com/code/alexandermk04/sleep-quality-prediction-with-96-accuracy/notebook">Sleep Quality Prediction with 96% Accuracy</a></li>
 47 | </ul>
 48 | 
 49 | ---
 50 | 
 51 | <h2>LIBRARIES USED</h2>
 52 | <ul>
 53 |     <li>pandas</li>
 54 |     <li>numpy</li>
 55 |     <li>scikit-learn</li>
 56 |     <li>matplotlib</li>
 57 |     <li>seaborn</li>
 58 |     <li>joblib</li>
 59 |     <li>flask</li>
 60 | </ul>
 61 | 
 62 | ---
 63 | 
 64 | <h2>EXPLANATION</h2>
 65 | 
 66 | <h3>DETAILS OF THE DIFFERENT FEATURES</h3>
 67 | 
 68 | <table>
 69 |     <thead>
 70 |         <tr>
 71 |             <th>Feature Name</th>
 72 |             <th>Description</th>
 73 |             <th>Type</th>
 74 |             <th>Values/Range</th>
 75 |         </tr>
 76 |     </thead>
 77 |     <tbody>
 78 |         <tr>
 79 |             <td>Gender</td>
 80 |             <td>Respondent's gender</td>
 81 |             <td>Categorical</td>
 82 |             <td>[Male, Female]</td>
 83 |         </tr>
 84 |         <tr>
 85 |             <td>Age</td>
 86 |             <td>Respondent's age</td>
 87 |             <td>Numerical</td>
 88 |             <td>Measured in years</td>
 89 |         </tr>
 90 |         <tr>
 91 |             <td>Sleep Duration (hours)</td>
 92 |             <td>Hours of sleep per day</td>
 93 |             <td>Numerical</td>
 94 |             <td>Measured in hours</td>
 95 |         </tr>
 96 |         <tr>
 97 |             <td>Physical Activity Level</td>
 98 |             <td>Daily physical activity in minutes</td>
 99 |             <td>Numerical</td>
100 |             <td>Measured in minutes</td>
101 |         </tr>
102 |         <tr>
103 |             <td>Stress Level</td>
104 |             <td>Stress level on a scale</td>
105 |             <td>Numerical</td>
106 |             <td>1 to 5 (low to high)</td>
107 |         </tr>
108 |         <tr>
109 |             <td>BMI Category</td>
110 |             <td>Body Mass Index category</td>
111 |             <td>Categorical</td>
112 |             <td>[Underweight, Normal, Overweight, Obese]</td>
113 |         </tr>
114 |         <tr>
115 |             <td>Systolic Blood Pressure</td>
116 |             <td>Systolic blood pressure</td>
117 |             <td>Numerical</td>
118 |             <td>Measured in mmHg</td>
119 |         </tr>
120 |         <tr>
121 |             <td>Diastolic Blood Pressure</td>
122 |             <td>Diastolic blood pressure</td>
123 |             <td>Numerical</td>
124 |             <td>Measured in mmHg</td>
125 |         </tr>
126 |         <tr>
127 |             <td>Heart Rate (bpm)</td>
128 |             <td>Resting heart rate</td>
129 |             <td>Numerical</td>
130 |             <td>Beats per minute</td>
131 |         </tr>
132 |         <tr>
133 |             <td>Daily Steps</td>
134 |             <td>Average number of steps per day</td>
135 |             <td>Numerical</td>
136 |             <td>Measured in steps</td>
137 |         </tr>
138 |         <tr>
139 |             <td>Sleep Disorder</td>
140 |             <td>Reported sleep disorder</td>
141 |             <td>Categorical</td>
142 |             <td>[Yes, No]</td>
143 |         </tr>
144 |     </tbody>
145 | </table>
146 | 
147 | ---
148 | 
149 | <h2>WHAT I HAVE DONE</h2>
150 | 
151 | <h4>Step 1: Exploratory Data Analysis</h4>
152 | <ul>
153 |     <li>Summary statistics</li>
154 |     <li>Data visualization for numerical feature distributions</li>
155 |     <li>Target splits for categorical features</li>
156 | </ul>
157 | 
158 | <h4>Step 2: Data Cleaning and Preprocessing</h4>
159 | <ul>
160 |     <li>Handling missing values</li>
161 |     <li>Label encoding categorical features</li>
162 |     <li>Standardizing numerical features</li>
163 | </ul>
164 | 
165 | <h4>Step 3: Feature Engineering and Selection</h4>
166 | <ul>
167 |     <li>Merging features based on domain knowledge</li>
168 |     <li>Creating derived features such as "Activity-to-Sleep Ratio"</li>
169 | </ul>
170 | 
171 | <h4>Step 4: Modeling</h4>
172 | <ul>
173 |     <li>Model trained: Decision Tree</li>
174 |     <li>Class imbalance handled using SMOTE</li>
175 |     <li>Metric for optimization: F1-score</li>
176 | </ul>
177 | 
178 | <h4>Step 5: Result Analysis</h4>
179 | <ul>
180 |     <li>Visualized results using confusion matrices and classification reports</li>
181 |     <li>Interpreted feature importance for tree-based models</li>
182 | </ul>
183 | 
184 | ---
185 | 
186 | <h2>MODELS USED AND THEIR ACCURACIES</h2>
187 | 
188 | <table>
189 |     <thead>
190 |         <tr>
191 |             <th>Model</th>
192 |             <th>Accuracy (%)</th>
193 |             <th>F1-Score (%)</th>
194 |             <th>Precision (%)</th>
195 |             <th>Recall (%)</th>
196 |         </tr>
197 |     </thead>
198 |     <tbody>
199 |         <tr>
200 |             <td>Decision Tree</td>
201 |             <td>74.50</td>
202 |             <td>75.20</td>
203 |             <td>73.00</td>
204 |             <td>77.50</td>
205 |         </tr>
206 |     </tbody>
207 | </table>
208 | 
209 | ---
210 | 
211 | 
212 | <h2>CONCLUSION</h2>
213 | 
214 | <h3>WHAT YOU HAVE LEARNED</h3>
215 | 
216 | <div>
217 |     <h4>Insights gained from the data</h4>
218 |     <ul>
219 |         <li>Sleep Duration, Stress Level, and Physical Activity are the most indicative features for predicting sleep quality.</li>
220 |     </ul>
221 | </div>
222 | 
223 | <div>
224 |     <h4>Improvements in understanding machine learning concepts</h4>
225 |     <ul>
226 |         <li>Learned and implemented preprocessing techniques like encoding categorical variables and handling imbalanced datasets.</li>
227 |         <li>Gained insights into deploying a machine learning model using Flask for real-world use cases.</li>
228 |     </ul>
229 | </div>
230 | 
231 | <div>
232 |     <h4>Challenges faced and how they were overcome</h4>
233 |     <ul>
234 |         <li>Managing imbalanced classes: Overcame this by using SMOTE for oversampling the minority class.</li>
235 |         <li>Choosing a simple yet effective model: Selected Decision Tree for its interpretability and ease of deployment.</li>
236 |     </ul>
237 | </div>
238 | 
239 | ---
240 | 
241 | <h3>USE CASES OF THIS MODEL</h3>
242 | 
243 | <div>
244 |     <h4>Application 1</h4>
245 |     <p>
246 |         A health tracker app can integrate this model to assess and suggest improvements in sleep quality based on user inputs.
247 |     </p>
248 | </div>
249 | 
250 | <div>
251 |     <h4>Application 2</h4>
252 |     <p>
253 |         Healthcare providers can use this tool to make preliminary assessments of patients' sleep health, enabling timely interventions.
254 |     </p>
255 | </div>
256 | 
257 | ---
258 | 
259 | <h3>FEATURES PLANNED BUT NOT IMPLEMENTED</h3>
260 | 
261 | <div>
262 |     <h4>Feature 1</h4>
263 |     <p>
264 |         Advanced models such as Random Forest, AdaBoost, and Gradient Boosting were not implemented due to the project's focus on simplicity and interpretability.
265 |     </p>
266 | </div>
267 | 
268 | <div>
269 |     <h4>Feature 2</h4>
270 |     <p>
271 |         Integration with wearable device data for real-time predictions was not explored but remains a potential enhancement for future work.
272 |     </p>
273 | </div>
274 | 
275 | 
276 | 
277 | 


--------------------------------------------------------------------------------
/docs/machine-learning/used-cars-price-prediction.md:
--------------------------------------------------------------------------------
  1 | # Used Cars Price Prediction 
  2 | 
  3 | ### AIM 
  4 | 
  5 | Predicting the prices of used cars based on their configuration and previous usage.
  6 | 
  7 | ### DATASET LINK 
  8 | 
  9 | [https://www.kaggle.com/datasets/avikasliwal/used-cars-price-prediction](https://www.kaggle.com/datasets/avikasliwal/used-cars-price-prediction)
 10 | 
 11 | ### MY NOTEBOOK LINK 
 12 | 
 13 | [https://www.kaggle.com/code/sid4ds/used-cars-price-prediction/](https://www.kaggle.com/code/sid4ds/used-cars-price-prediction/)
 14 | 
 15 | ### LIBRARIES NEEDED 
 16 | 
 17 | ??? quote "LIBRARIES USED"
 18 | 
 19 |     - pandas
 20 |     - numpy
 21 |     - scikit-learn (>=1.5.0 required for Target Encoding)
 22 |     - xgboost
 23 |     - catboost
 24 |     - matplotlib
 25 |     - seaborn
 26 | 
 27 | --- 
 28 | 
 29 | ### DESCRIPTION 
 30 | 
 31 | !!! info "Why is it necessary?"
 32 |     - This project aims to predict the prices of used cars listed on an online marketplace based on their features and usage by previous owners. This model can be used by sellers to estimate an approximate price for their cars when they list them on the marketplace. Buyers can use the model to check if the listed price is fair when they decide to buy a used vehicle.
 33 | 
 34 | ??? info "How did you start approaching this project? (Initial thoughts and planning)"
 35 |     - Researching previous projects and articles related to the problem.
 36 |     - Data exploration to understand the features.  
 37 |        - Identifying different preprocessing strategies for different feature types.
 38 |     - Choosing key metrics for the problem - Root Mean Squared Error (for error estimation), R2-Score (for model explainability)
 39 | 
 40 | ??? info "Mention any additional resources used (blogs, books, chapters, articles, research papers, etc.)."
 41 |     - [Dealing with features that have high cardinality](https://towardsdatascience.com/dealing-with-features-that-have-high-cardinality-1c9212d7ff1b)
 42 |     - [Target-encoding Categorical Variables](https://towardsdatascience.com/dealing-with-categorical-variables-by-using-target-encoder-a0f1733a4c69)
 43 |     - [Cars Price Prediction](https://www.kaggle.com/code/khotijahs1/cars-price-prediction)
 44 | 
 45 | --- 
 46 | 
 47 | ### EXPLANATION 
 48 | 
 49 | #### DETAILS OF THE DIFFERENT FEATURES 
 50 | 
 51 | | **Feature Name** | **Description** | **Type** | **Values/Range** |
 52 | |------------------|-----------------|----------|------------------|
 53 | | Name | Car model | Categorical  | Names of car models |
 54 | | Location | City where the car is listed for sale | Categorical  | Names of cities|
 55 | | Year | Year of original purchase of car | Numerical | Years (e.g., 2010, 2015, etc.) |
 56 | | Kilometers_Driven | Odometer reading of the car | Numerical | Measured in kilometers |
 57 | | Fuel_Type| Fuel type of the car | Categorical  | [Petrol, Diesel, CNG, Electric, etc.] |
 58 | | Transmission | Transmission type of the car | Categorical  | [Automatic, Manual] |
 59 | | Owner_Type | Number of previous owners of the car | Numerical | Whole numbers  |
 60 | | Mileage  | Current mileage provided by the car | Numerical | Measured in km/l or equivalent |
 61 | | Engine | Engine capacity of the car | Numerical | Measured in CC (Cubic Centimeters) |
 62 | | Power | Engine power output of the car | Numerical | Measured in BHP (Brake Horsepower) |
 63 | | Seats | Seating capacity of the car | Numerical | Whole numbers  |
 64 | | New_Price| Original price of the car at the time of purchase | Numerical | Measured in currency |
 65 | 
 66 | --- 
 67 | 
 68 | #### WHAT I HAVE DONE 
 69 | 
 70 | === "Step 1"
 71 | 
 72 |     Exploratory Data Analysis
 73 | 
 74 |     - Summary statistics
 75 |     - Data visualization for numerical feature distributions
 76 |     - Target splits for categorical features
 77 | 
 78 | === "Step 2"
 79 | 
 80 |     Data cleaning and Preprocessing
 81 | 
 82 |     - Removing rare categories of brands
 83 |     - Removing outliers for numerical features and target
 84 |     - Categorical feature encoding for low-cardinality features
 85 |     - Target encoding for high-cardinality categorical features (in model pipeline)
 86 | 
 87 | === "Step 3"
 88 | 
 89 |     Feature engineering and selection
 90 | 
 91 |     - Extracting brand name from model name for a lower-cardinality feature.
 92 |     - Converting categorical Owner_Type to numerical Num_Previous_Owners.
 93 |     - Feature selection based on model-based feature importances and statistical tests.
 94 | 
 95 | === "Step 4"
 96 | 
 97 |     Modeling
 98 | 
 99 |     - Holdout dataset created for model testing
100 |     - Setting up a framework for easier testing of multiple models.
101 |     - Models trained: LLinear Regression, K-Nearest Neighbors, Decision Tree, Random Forest, AdaBoost, Multi-Layer Perceptron, XGBoost and CatBoost.
102 |     - Models were ensembled using Simple and Weighted averaging.
103 | 
104 | === "Step 5"
105 | 
106 |     Result analysis
107 | 
108 |     - Predictions made on holdout test set
109 |     - Models compared based on chosen metrics: RMSE and R2-Score.
110 |     - Visualized predicted prices vs actual prices to analyze errors.
111 | 
112 | --- 
113 | 
114 | #### PROJECT TRADE-OFFS AND SOLUTIONS 
115 | 
116 | === "Trade Off 1"
117 | 
118 |     **Training time & Model complexity vs Reducing error**
119 | 
120 |     - **Solution:** Limiting depth and number of estimators for tree-based models. Overfitting detection and early stopping mechanism for neural network training.
121 | 
122 | --- 
123 | 
124 | #### SCREENSHOTS 
125 | 
126 | !!! success "Project workflow"
127 | 
128 |     ``` mermaid
129 |       graph LR
130 |         A[Start] --> B{Error?};
131 |         B -->|Yes| C[Hmm...];
132 |         C --> D[Debug];
133 |         D --> B;
134 |         B ---->|No| E[Yay!];
135 |     ```
136 | 
137 | ??? tip "Data Exploration"
138 | 
139 |     === "Price"
140 |         ![target_dist](https://github.com/user-attachments/assets/066a9cf6-5a03-49d3-a5a4-68bb1f8e07e4)
141 | 
142 |     === "Year"
143 |         ![featdist_year](https://github.com/user-attachments/assets/594127f7-2a0d-405c-ba00-68aa71711c4b)
144 | 
145 |     === "KM Driven"
146 |         ![featdist_kmdriven](https://github.com/user-attachments/assets/6ae7c0d4-8247-4d8e-bf24-e7c209910b59)
147 | 
148 |     === "Engine"
149 |         ![featdist_engine](https://github.com/user-attachments/assets/683dfcaa-6464-4486-88eb-ea2c4e954730)
150 | 
151 |     === "Power"
152 |         ![featdist_power](https://github.com/user-attachments/assets/76bfaef7-9c2c-46aa-81d8-7b9ae44f1dbe)
153 | 
154 |     === "Mileage"
155 |         ![featdist_mileage](https://github.com/user-attachments/assets/cf4c2840-e116-4e87-b5ec-60db8a1b259a)
156 | 
157 |     === "Seats"
158 |         ![featdist_seats](https://github.com/user-attachments/assets/7d5ff47b-20f1-42b5-a4f3-53a8dcabcca7)
159 | 
160 | ??? tip "Feature Selection"
161 | 
162 |     === "Feature Correlation"
163 |         ![featselect_corrfeatures](https://github.com/user-attachments/assets/b0368243-8b87-4158-b527-657cb27d39e7)
164 | 
165 |     === "Target Correlation"
166 |         ![featselect_corrtarget](https://github.com/user-attachments/assets/858ce60b-4bde-4e78-b132-5c17d92c5111)
167 | 
168 |     === "Mutual Information"
169 |         ![featselect_mutualinfo](https://github.com/user-attachments/assets/420a81a5-9a16-42a4-99cc-62db49cb5dd6)
170 | 
171 | --- 
172 | 
173 | #### MODELS USED AND THEIR PERFORMANCE 
174 | 
175 | | Model | RMSE | R2-Score
176 | |:-----|:-----:|:-----:
177 | | Linear Regression | 3.5803 | 0.7915 |
178 | | K-Nearest Neighbors | 2.8261 | 0.8701 |
179 | | Decision Tree | 2.6790 | 0.8833 |
180 | | Random Forest | 2.4619 | 0.9014 |
181 | | AdaBoost | 2.3629 | 0.9092 |
182 | | Multi-layer Perceptron | 2.6255 | 0.8879 |
183 | | XGBoost w/o preprocessing | 2.1649 | 0.9238 |
184 | | **XGBoost with preprocessing** | **2.0987** | **0.9284** |
185 | | CatBoost w/o preprocessing | 2.1734 | 0.9232 |
186 | | Simple average ensemble | 2.2804 | 0.9154 |
187 | | Weighted average ensemble | 2.1296 | 0.9262 |
188 | 
189 | --- 
190 | 
191 | ### CONCLUSION 
192 | 
193 | #### WHAT YOU HAVE LEARNED 
194 | 
195 | !!! tip "Insights gained from the data"
196 |     1. Features related to car configuration such as Power, Engine and Transmission are some of the most informative features. Usage-related features such as Year and current Mileage are also important.
197 |     2. Seating capacity and Number of previous owners had relatively less predictive power. However, none of the features were candidates for removal.
198 | 
199 | ??? tip "Improvements in understanding machine learning concepts"
200 |     1. Implemented target-encoding for high-cardinality categorical features.
201 |     2. Designed pipelines to avoid data leakage.
202 |     3. Ensembling models using prediction averaging.
203 | 
204 | ??? tip "Challenges faced and how they were overcome"
205 |     1. Handling mixed feature types in preprocessing pipelines.
206 |     2. Regularization and overfitting detection to reduce training time while maintaining performance.
207 | 
208 | --- 
209 | 
210 | #### USE CASES OF THIS MODEL 
211 | 
212 | === "Application 1"
213 | 
214 |     - Sellers can use the model to estimate an approximate price for their cars when they list them on the marketplace.
215 | 
216 | === "Application 2"
217 | 
218 |     - Buyers can use the model to check if the listed price is fair when they decide to buy a used vehicle.
219 | 
220 | --- 
221 | 
222 | #### FEATURES PLANNED BUT NOT IMPLEMENTED 
223 | 
224 | === "Feature 1"
225 | 
226 |     - Complex model-ensembling through stacking or hill-climbing was not implemented due to significantly longer training time.
227 | 
228 | 


--------------------------------------------------------------------------------
/docs/natural-language-processing/chatbot-implementation.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Chatbot Implementation Project
  3 | 
  4 | ## AIM
  5 | To develop a chatbot using Natural Language Processing (NLP) and a Naive Bayes classifier for intent classification. The chatbot takes user input, predicts the intent, and generates an appropriate response based on predefined intents and responses stored in a CSV file.
  6 | 
  7 | 
  8 | ### DATASET LINK   
  9 | [https://drive.google.com/file/d/1J7mGS16EkgCEtN7UJtBlJACeqoDbdS4F/view?usp=drive_link](https://drive.google.com/file/d/1J7mGS16EkgCEtN7UJtBlJACeqoDbdS4F/view?usp=drive_link)
 10 | 
 11 | 
 12 | ### NOTEBOOK LINK
 13 | [https://colab.research.google.com/drive/1L2LKfbVv4pb4yzczcRnnU4AkEW-kCZSZ?usp=sharing](https://colab.research.google.com/drive/1L2LKfbVv4pb4yzczcRnnU4AkEW-kCZSZ?usp=sharing)
 14 | 
 15 | 
 16 | 
 17 | ## LIBRARIES NEEDED
 18 | 
 19 | ??? quote "LIBRARIES USED"
 20 |     - nltk
 21 |     - scikit-learn
 22 |     - numpy
 23 |     - pickle
 24 |     
 25 | 
 26 | ---
 27 | 
 28 | 
 29 | ### DESCRIPTION
 30 | 
 31 | ??? info "What is the Requirement of the Project?"
 32 |     -A chatbot is required to automate conversations and provide immediate responses to user queries. It can be used to answer  FAQs, provide customer support, and improve user interaction.
 33 | 
 34 | ??? info "Why is it Necessary?"
 35 |     - Chatbots are essential for improving user engagement and providing 24/7 service.
 36 |     - They automate responses, saving time and providing immediate help.
 37 | 
 38 | ??? info "How is it Beneficial and Used?"
 39 |     - Chatbots can be used for:
 40 |     1. Customer service automation.
 41 |     2. Answering user questions.
 42 |     3. Guiding users through processes on websites or apps.
 43 | 
 44 | 
 45 | ####  "Initial Thoughts and Planning"
 46 |    - **Intents and Responses**: Gathered and stored in CSV format.
 47 |    - **Preprocessing**: Tokenized and lemmatized text to prepare for model training.
 48 |    - **Model**: Built a Naive Bayes classifier to predict intents.
 49 |    - **Deployment**: Deployed the model to predict user queries and return appropriate responses.
 50 | 
 51 | ---
 52 | 
 53 | #### Additional Resources Used
 54 |    - [Scikit-learn Documentation](https://scikit-learn.org)
 55 |    - **Tutorial**: Building Chatbots with NLP and Machine Learning
 56 | 
 57 | ---
 58 | 
 59 | ### FEATURES IN THE DATASET
 60 | 
 61 | | Feature   | Description                                       |
 62 | |-----------|---------------------------------------------------|
 63 | | `intents` | User query categories like greetings, farewells.  |
 64 | | `responses` | Predefined chatbot responses for each intent.   |
 65 | 
 66 | ---
 67 | 
 68 | #### STEPS AND IMPLEMENTATION
 69 | 
 70 | ===  "step 1" 
 71 |       Data Preprocessing
 72 |     - Loaded the intents from CSV files.
 73 |     - Cleaned data by removing duplicates and handling null values.
 74 | 
 75 | 
 76 | 
 77 | ===   "step 2"
 78 |       Vectorization
 79 |     - Used `TfidfVectorizer` to convert text into vectors.
 80 |     - Split data into training and testing sets.
 81 | 
 82 | 
 83 | ===   "step 3"
 84 |       Model Training
 85 |     - Trained a Naive Bayes classifier on the preprocessed data.
 86 |     - Saved the model for future use with `pickle`.
 87 |     - Created an intent-response mapping.
 88 | 
 89 | ===  "step 4"
 90 |       Prediction and Response Generation**
 91 |     - The chatbot predicts the intent based on user input.
 92 |     - Fetches and returns the appropriate response.
 93 | 
 94 | === "step 5" 
 95 |       Testing
 96 |     - Conducted live interaction tests with the chatbot.
 97 | 
 98 | 
 99 | ---
100 | 
101 | ### Features Not Implemented Yet
102 | - Integration of a deep learning model (e.g., RNN or LSTM) for better context handling.
103 | 
104 | ---
105 | 
106 | 
107 | ### Flow Chart
108 | 
109 | ```mermaid
110 | graph TD
111 |     A[Data Preprocessing] --> B[Vectorization]
112 |     B --> C[Model Training]
113 |     C --> D[Prediction and Response Generation]
114 |     D --> E[Testing the Chatbot]
115 | 
116 |     A1[Load intents from CSV] --> A2[Clean data: remove duplicates and handle nulls]
117 |     A --> A1
118 |     A --> A2
119 | 
120 |     B1[Use TfidfVectorizer to convert text into vectors] --> B2[Split data into training and testing sets]
121 |     B --> B1
122 |     B --> B2
123 | 
124 |     C1[Train Naive Bayes classifier] --> C2[Save model with pickle] --> C3[Create intent-response mapping]
125 |     C --> C1
126 |     C --> C2
127 |     C --> C3
128 | 
129 |     D1[Chatbot predicts intent] --> D2[Fetch appropriate response based on intent] --> D3[Return response to user]
130 |     D --> D1
131 |     D --> D2
132 |     D --> D3
133 | 
134 |     E1[Live interaction with chatbot] --> E2[Test accuracy and responses]
135 |     E --> E1
136 |     E --> E2
137 | ```
138 | ```
139 | 
140 | #### Example Chatbot Interaction:
141 | 
142 | ```text
143 | You: Hello
144 | Bot: Hi, How can I assist you?
145 | ```
146 | 
147 | ---
148 | 
149 | ## MODELS AND EVALUATION METRICS
150 | 
151 | | Model            | Accuracy | Precision | Recall |
152 | |------------------|----------|-----------|--------|
153 | | Naive Bayes      | 92%      | 91%       | 90%    |
154 | 
155 | ---
156 | 
157 | ### CONCLUSION
158 | 
159 | ??? "What Have You Learned?"
160 |       - Building a chatbot using NLP techniques can automate interactions and provide user-friendly interfaces for businesses.
161 |       -The Naive Bayes classifier is an effective yet simple model for intent prediction.
162 | 
163 | ---
164 | 
165 | #### USE CASES
166 | 
167 | === "Application 1"
168 | 
169 |     **Customer Support Automation**
170 | 
171 |     -Provide 24/7 automated support for customers.
172 | 
173 | 
174 | === "Application 2" 
175 | 
176 |     **FAQ Automation**
177 | 
178 |     - Automatically respond to frequently asked questions on websites or apps.
179 | 
180 | 
181 | 


--------------------------------------------------------------------------------
/docs/natural-language-processing/email-spam-detection.md:
--------------------------------------------------------------------------------
  1 | # 🌟 Email Spam Detection
  2 | 
  3 | <div align="center">
  4 |     <img src="https://github.com/user-attachments/assets/c90bf132-68a6-4155-b191-d2da7e35d0ca" />
  5 | </div>
  6 | 
  7 | ## 🎯 AIM
  8 | To classify emails as spam or ham using machine learning models, ensuring better email filtering and security.
  9 | 
 10 | ## 📊 DATASET LINK
 11 | [Email Spam Detection Dataset](https://www.kaggle.com/datasets/shantanudhakadd/email-spam-detection-dataset-classification)
 12 | 
 13 | ## 📚 KAGGLE NOTEBOOK
 14 | [Notebook Link](https://www.kaggle.com/code/thatarguy/email-spam-classifier?kernelSessionId=224262023)
 15 | 
 16 | ??? Abstract "Kaggle Notebook"
 17 | 
 18 |     <iframe src="https://www.kaggle.com/embed/thatarguy/email-spam-classifier?kernelSessionId=224262023" height="800" style="margin: 0 auto; width: 100%; max-width: 950px;" frameborder="0" scrolling="auto" title="email-spam-classifier"></iframe>
 19 | 
 20 | ## ⚙️ TECH STACK
 21 | 
 22 | | **Category**             | **Technologies**                            |
 23 | |--------------------------|---------------------------------------------|
 24 | | **Languages**            | Python                                     |
 25 | | **Libraries/Frameworks** | Scikit-learn, NumPy, Pandas, Matplotlib, Seaborn |
 26 | | **Databases**            | NOT USED                                   |
 27 | | **Tools**                | Kaggle, Jupyter Notebook                   |
 28 | | **Deployment**           | NOT USED                                   |
 29 | 
 30 | ---
 31 | 
 32 | ## 📝 DESCRIPTION
 33 | !!! info "What is the requirement of the project?"
 34 |     - To efficiently classify emails as spam or ham.
 35 |     - To improve email security by filtering out spam messages.
 36 | 
 37 | ??? info "How is it beneficial and used?"
 38 |     - Helps in reducing unwanted spam emails in user inboxes.
 39 |     - Enhances productivity by filtering out irrelevant emails.
 40 |     - Can be integrated into email service providers for automatic filtering.
 41 | 
 42 | ??? info "How did you start approaching this project? (Initial thoughts and planning)"
 43 |     - Collected and preprocessed the dataset.
 44 |     - Explored various machine learning models.
 45 |     - Evaluated models based on performance metrics.
 46 |     - Visualized results for better understanding.
 47 | 
 48 | ??? info "Mention any additional resources used (blogs, books, chapters, articles, research papers, etc.)."
 49 |     - Scikit-learn documentation.
 50 |     - Various Kaggle notebooks related to spam detection.
 51 | 
 52 | ---
 53 | 
 54 | ## 🔍 PROJECT EXPLANATION
 55 | 
 56 | ### 🧩 DATASET OVERVIEW & FEATURE DETAILS
 57 | 
 58 | ??? example "📂 spam.csv"
 59 | 
 60 |     - The dataset contains the following features:
 61 | 
 62 |     | Feature Name | Description |   Datatype   |
 63 |     |--------------|-------------|:------------:|
 64 |     | Category     | Spam or Ham |    object    |
 65 |     | Text        | Email text  |    object    |
 66 |     | Length      | Length of email | int64 |
 67 | 
 68 | ??? example "🛠 Developed Features from spam.csv"
 69 | 
 70 |     | Feature Name | Description | Reason   |   Datatype   |
 71 |     |--------------|-------------|----------|:------------:|
 72 |     | Length      | Email text length | Helps in spam detection | int64 |
 73 | 
 74 | ---
 75 | 
 76 | ### 🛤 PROJECT WORKFLOW
 77 | 
 78 | !!! success "Project workflow"
 79 | 
 80 |     ``` mermaid
 81 |       graph LR
 82 |         A[Start] --> B[Load Dataset]
 83 |         B --> C[Preprocess Data]
 84 |         C --> D[Vectorize Text]
 85 |         D --> E[Train Models]
 86 |         E --> F[Evaluate Models]
 87 |         F --> G[Visualize Results]
 88 |     ```
 89 | 
 90 | === "Step 1"
 91 |     - Load the dataset and clean unnecessary columns.
 92 | 
 93 | === "Step 2"
 94 |     - Preprocess text and convert categorical labels.
 95 | 
 96 | === "Step 3"
 97 |     - Convert text into numerical features using CountVectorizer.
 98 | 
 99 | === "Step 4"
100 |     - Train machine learning models.
101 | 
102 | === "Step 5"
103 |     - Evaluate models using accuracy, precision, recall, and F1 score.
104 | 
105 | === "Step 6"
106 |     - Visualize performance using confusion matrices and heatmaps.
107 | 
108 | ---
109 | 
110 | ### 🖥 CODE EXPLANATION
111 | 
112 | === "Section 1"
113 |     - Data loading and preprocessing.
114 | 
115 | === "Section 2"
116 |     - Text vectorization using CountVectorizer.
117 | 
118 | === "Section 3"
119 |     - Training models (MLP Classifier, MultinomialNB, BernoulliNB).
120 | 
121 | === "Section 4"
122 |     - Evaluating models using various metrics.
123 | 
124 | === "Section 5"
125 |     - Visualizing confusion matrices and metric comparisons.
126 | 
127 | ---
128 | 
129 | ### ⚖️ PROJECT TRADE-OFFS AND SOLUTIONS
130 | 
131 | === "Trade Off 1"
132 |     - Balancing accuracy and computational efficiency.
133 |       - Used Naive Bayes for speed and MLP for improved accuracy.
134 | 
135 | === "Trade Off 2"
136 |     - Handling false positives vs. false negatives.
137 |       - Tuned models to improve precision for spam detection.
138 | 
139 | ---
140 | 
141 | ## 🎮 SCREENSHOTS
142 | 
143 | !!! tip "Visualizations and EDA of different features"
144 | 
145 |     === "Confusion Matrix comparision"
146 |         ![img](https://github.com/user-attachments/assets/94a3b2d8-c7e5-41a5-bba7-8ba4cb1435a7)
147 | 
148 | 
149 | ??? example "Model performance graphs"
150 | 
151 |     === "Meteric comparison"
152 |         ![img](https://github.com/user-attachments/assets/c2be4340-89c9-4aee-9a27-8c40bf2c0066)
153 | 
154 | 
155 | ---
156 | 
157 | ## 📉 MODELS USED AND THEIR EVALUATION METRICS
158 | 
159 | |    Model   | Accuracy |  Precision  | Recall | F1 Score |
160 | |------------|----------|------------|--------|----------|
161 | | MLP Classifier |  95% | 0.94 | 0.90 | 0.92 |
162 | | Multinomial NB |  93% | 0.91 | 0.88 | 0.89 |
163 | | Bernoulli NB |  92% | 0.89 | 0.85 | 0.87 |
164 | 
165 | ---
166 | 
167 | ## ✅ CONCLUSION
168 | 
169 | ### 🔑 KEY LEARNINGS
170 | 
171 | !!! tip "Insights gained from the data"
172 |     - Text length plays a role in spam detection.
173 |     - Certain words appear more frequently in spam emails.
174 | 
175 | ??? tip "Improvements in understanding machine learning concepts"
176 |     - Gained insights into text vectorization techniques.
177 |     - Understood trade-offs between different classification models.
178 | 
179 | ---
180 | 
181 | ### 🌍 USE CASES
182 | 
183 | === "Email Filtering Systems"
184 |     - Can be integrated into email services like Gmail and Outlook.
185 | 
186 | === "SMS Spam Detection"
187 |     - Used in mobile networks to block spam messages.
188 | 
189 | 


--------------------------------------------------------------------------------
/docs/natural-language-processing/index.md:
--------------------------------------------------------------------------------
 1 | # Natural Language Processing 🗣️
 2 | 
 3 | <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 15px; padding: 10px;">
 4 | 
 5 |   <!-- Chatbot Implementation -->
 6 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
 7 |     <a href="chatbot-implementation" style="color: white; text-decoration: none; display: block;">
 8 |       <img src="https://img.freepik.com/free-vector/chat-bot-concept-illustration_114360-5522.jpg" alt="Chatbot Implementation" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;" />
 9 |       <div style="padding: 0.8rem;">
10 |         <h3 style="margin: 0; font-size: 18px;">Chatbot Implementation</h3>
11 |         <p style="font-size: 14px; opacity: 0.8;">Modern Chatbot System Using NLP & AI</p>
12 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-21 | ⏱️ 15 mins</p>
13 |       </div>
14 |     </a>
15 |   </figure>
16 | 
17 |   <!-- Twitter Sentiment Analysis -->
18 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
19 |     <a href="twitter-sentiment-analysis" style="color: white; text-decoration: none; display: block;">
20 |       <img src="https://img.freepik.com/free-photo/social-media-marketing-concept-marketing-with-applications_23-2150063130.jpg" alt="Twitter Sentiment Analysis" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;" />
21 |       <div style="padding: 0.8rem;">
22 |         <h3 style="margin: 0; font-size: 18px;">Twitter Sentiment Analysis</h3>
23 |         <p style="font-size: 14px; opacity: 0.8;">Analyzing Sentiment in Twitter Data</p>
24 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-21 | ⏱️ 12 mins</p>
25 |       </div>
26 |     </a>
27 |   </figure>
28 | 
29 |   <!-- Email Spam Detection -->
30 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
31 |     <a href="email-spam-detection" style="color: white; text-decoration: none; display: block;">
32 |       <img src="https://github.com/user-attachments/assets/c90bf132-68a6-4155-b191-d2da7e35d0ca" alt="Email Spam Detection" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;" />
33 |       <div style="padding: 0.8rem;">
34 |         <h3 style="margin: 0; font-size: 18px;">Email Spam Detection</h3>
35 |         <p style="font-size: 14px; opacity: 0.8;">ML-Based Email Spam Classification</p>
36 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-21 | ⏱️ 10 mins</p>
37 |       </div>
38 |     </a>
39 |   </figure>
40 | 
41 |   <!-- Next Word Prediction -->
42 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
43 |     <a href="next-word-pred" style="color: white; text-decoration: none; display: block;">
44 |       <img src="https://img.freepik.com/free-photo/ai-technology-brain-background-digital-transformation-concept_53876-124672.jpg" alt="Next Word Prediction" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;" />
45 |       <div style="padding: 0.8rem;">
46 |         <h3 style="margin: 0; font-size: 18px;">Next Word Prediction</h3>
47 |         <p style="font-size: 14px; opacity: 0.8;">LSTM-Based Word Prediction System</p>
48 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-21 | ⏱️ 8 mins</p>
49 |       </div>
50 |     </a>
51 |   </figure>
52 | 
53 |   <!-- Named Entity Recognition -->
54 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
55 |     <a href="name-entity-recognition" style="color: white; text-decoration: none; display: block;">
56 |       <img src="https://img.freepik.com/free-photo/close-up-hand-writing-notebook-top-view_23-2148888824.jpg" alt="Named Entity Recognition" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;" />
57 |       <div style="padding: 0.8rem;">
58 |         <h3 style="margin: 0; font-size: 18px;">Named Entity Recognition</h3>
59 |         <p style="font-size: 14px; opacity: 0.8;">Identifying & Classifying Named Entities</p>
60 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-21 | ⏱️ 7 mins</p>
61 |       </div>
62 |     </a>
63 |   </figure>
64 | 
65 |   <!-- Text Summarization -->
66 |   <figure style="padding: 1rem; background: rgba(39, 39, 43, 0.5); border-radius: 10px; border: 1px solid rgba(76, 76, 82, 0.4); box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); transition: transform 0.2s ease-in-out; text-align: center; max-width: 320px; margin: auto;">
67 |     <a href="text-summarization" style="color: white; text-decoration: none; display: block;">
68 |       <img src="https://img.freepik.com/free-photo/close-up-typing-laptop-keyboard_23-2149319316.jpg" alt="Text Summarization" style="width: 100%; height: 150px; object-fit: cover; border-radius: 8px; transition: transform 0.2s;" />
69 |       <div style="padding: 0.8rem;">
70 |         <h3 style="margin: 0; font-size: 18px;">Text Summarization</h3>
71 |         <p style="font-size: 14px; opacity: 0.8;">Summarizing Long Articles Concisely</p>
72 |         <p style="font-size: 12px; opacity: 0.6;">📅 2025-01-21 | ⏱️ 11 mins</p>
73 |       </div>
74 |     </a>
75 |   </figure>
76 | 
77 | </div> 
78 | 


--------------------------------------------------------------------------------
/docs/natural-language-processing/name-entity-recognition.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Name Entity Recognition (NER) Project
 3 | 
 4 | ## AIM
 5 | To develop a system that identifies and classifies named entities (such as persons, organizations, locations, dates, etc.) in text using Named Entity Recognition (NER) with SpaCy.
 6 | 
 7 | ## DATASET LINK
 8 | N/A (This project uses text input for NER analysis, not a specific dataset)
 9 | - It uses real time data as input .
10 | 
11 | ## NOTEBOOK LINK
12 | [https://colab.research.google.com/drive/1pBIEFA4a9LzyZKUFQMCypQ22M6bDbXM3?usp=sharing](https://colab.research.google.com/drive/1pBIEFA4a9LzyZKUFQMCypQ22M6bDbXM3?usp=sharing)
13 | 
14 | ## LIBRARIES NEEDED
15 | - SpaCy
16 | 
17 | 
18 | ## DESCRIPTION
19 | 
20 | !!! info "What is the requirement of the project?"
21 | - Named Entity Recognition (NER) is essential to automatically extract and classify key entities from text, such as persons, organizations, locations, and more.
22 | - This helps in analyzing and organizing data efficiently, enabling various NLP applications like document analysis and information retrieval.
23 | 
24 | ??? info "Why is it necessary?"
25 | - NER is used for understanding and structuring unstructured text, which is widely applied in industries such as healthcare, finance, and e-commerce.
26 | - It allows users to extract actionable insights from large volumes of text data
27 | 
28 | ??? info "How is it beneficial and used?"
29 | - NER plays a key role in tasks such as document summarization, information retrieval.
30 | - It automates the extraction of relevant entities, which reduces manual effort and improves efficiency.
31 | 
32 | ??? info "How did you start approaching this project? (Initial thoughts and planning)"
33 | - The project leverages SpaCy's pre-trained NER models, enabling easy text analysis without the need for training custom models.
34 | 
35 | ### Mention any additional resources used (blogs, books, chapters, articles, research papers, etc.)
36 | - SpaCy Documentation: [SpaCy NER](https://spacy.io/usage/linguistic-features#named-entities)
37 | - NLP in Python by Steven Bird et al.
38 | 
39 | ## EXPLANATION
40 | 
41 | ### DETAILS OF THE DIFFERENT ENTITY TYPES
42 | 
43 | The system extracts the following entity types:
44 | 
45 | | Entity Type | Description |
46 | |-------------|-------------|
47 | | PERSON      | Names of people (e.g., "Anuska") |
48 | | ORG         | Organizations (e.g., "Google", "Tesla") |
49 | | LOC         | Locations (e.g., "New York", "Mount Everest") |
50 | | DATE        | Dates (e.g., "January 1st, 2025") |
51 | | GPE         | Geopolitical entities (e.g., "India", "California") |
52 | 
53 | ## WHAT I HAVE DONE
54 | 
55 | ### Step 1: Data collection and preparation
56 | - Gathered sample text for analysis (provided by users in the app).
57 | - Explored the text structure and identified entity types.
58 | 
59 | ### Step 2: NER model implementation
60 | - Integrated SpaCy's pre-trained NER model (`en_core_web_sm`).
61 | - Extracted named entities and visualized them with labels and color coding.
62 | 
63 | ### Step 3: Testing and validation
64 | - Validated results with multiple test cases to ensure entity accuracy.
65 | - Allowed users to input custom text for NER analysis in real-time.
66 | 
67 | ## PROJECT TRADE-OFFS AND SOLUTIONS
68 | 
69 | ### Trade Off 1: Pre-trained model vs. custom model
70 | - **Pre-trained models** provide quick results but may lack accuracy for domain-specific entities.
71 | - **Custom models** can improve accuracy but require additional data and training time.
72 | 
73 | ### Trade Off 2: Real-time analysis vs. batch processing
74 | - **Real-time analysis** in a web app enhances user interaction but might slow down with large text inputs.
75 | - **Batch processing** could be more efficient for larger datasets.
76 | 
77 | ## SCREENSHOTS
78 | 
79 | ### NER Example
80 |   ``` mermaid
81 | graph LR
82 |     A[Start] --> B[Text Input];
83 |     B --> C[NER Analysis];
84 |     C --> D{Entities Extracted};
85 |     D -->|Person| E[Anuska];
86 |     D -->|Location| F[New York];
87 |     D -->|Organization| G[Google];
88 |     D -->|Date| H[January 1st, 2025];
89 | 


--------------------------------------------------------------------------------
/docs/natural-language-processing/next-word-pred.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | # Next Word Prediction using LSTM
  4 | 
  5 | ### AIM 
  6 | To predict the next word using LSTM.
  7 | 
  8 | 
  9 | ### DATASET LINK 
 10 | [Dataset](https://www.kaggle.com/datasets/muhammadbilalhaneef/sherlock-holmes-next-word-prediction-corpus)
 11 | 
 12 | ### NOTEBOOK LINK 
 13 | [Code](https://colab.research.google.com/drive/1Y1icIR8ZViZzRn6LV-ZSuGvXHde8T7yA)
 14 | 
 15 | 
 16 | ### LIBRARIES NEEDED
 17 | 
 18 | ??? quote "LIBRARIES USED"
 19 | 
 20 |     - pandas
 21 |     - numpy
 22 |     - scikit-learn
 23 |     - matplotlib
 24 |     - seaborn
 25 |     - tensorflow
 26 |     - keras
 27 | 
 28 | --- 
 29 | 
 30 | ### DESCRIPTION 
 31 | 
 32 | 
 33 | !!! info "What is the requirement of the project?"
 34 |     - To create an intelligent system capable of predicting the next word in a sentence based on its context.
 35 |     - The need for such a system arises in applications like autocomplete, chatbots, and virtual assistants.
 36 | 
 37 | ??? info "Why is it necessary?"
 38 |     - Enhances user experience in text-based applications by offering accurate suggestions.
 39 |     - Reduces typing effort, especially in mobile applications.
 40 | 
 41 | ??? info "How is it beneficial and used?"
 42 |     - Improves productivity: By predicting words, users can complete sentences faster.
 43 |     - Supports accessibility: Assists individuals with disabilities in typing.
 44 |     - Boosts efficiency: Helps in real-time text generation in NLP applications like chatbots and email composition.
 45 | 
 46 | ??? info "How did you start approaching this project? (Initial thoughts and planning)"
 47 |     - Studied LSTM architecture and its suitability for sequential data.
 48 |     - Explored similar projects and research papers to understand data preprocessing techniques.
 49 |     - Experimented with tokenization, padding, and sequence generation for the dataset.
 50 | 
 51 | ??? info "Mention any additional resources used (blogs, books, chapters, articles, research papers, etc.)."
 52 |     - Blogs on LSTM from Towards Data Science.
 53 |     - TensorFlow and Keras official documentation.
 54 | 
 55 | 
 56 | --- 
 57 | 
 58 | ### EXPLANATION 
 59 | 
 60 | #### DETAILS OF THE DIFFERENT FEATURES 
 61 | ---
 62 | 
 63 | #### PROJECT WORKFLOW 
 64 | === "Step 1"
 65 | 
 66 |     Initial data exploration and understanding:
 67 | 
 68 |     - Gathered text data from open-source datasets.
 69 |     - Analyzed the structure of the data.
 70 |     - Performed basic text statistics to understand word frequency and distribution.
 71 | 
 72 | === "Step 2"
 73 |     
 74 |     Data cleaning and preprocessing
 75 | 
 76 |     - Removed punctuation and convert text to lowercase.
 77 |     - Tokenized text into sequences and pad them to uniform length.
 78 | 
 79 | === "Step 3"
 80 |     Feature engineering and selection
 81 |         
 82 |     - Created input-output pairs for next-word prediction using sliding window techniques on tokenized sequences.
 83 | 
 84 | === "Step 4"
 85 |     Model training and evaluation:
 86 | 
 87 |     - Used an embedding layer to represent words in a dense vector space.
 88 |     - Implemented LSTM-based sequential models to learn context and dependencies in text. 
 89 |     - Experimented with hyperparameters like sequence length, LSTM units, learning rate, and batch size.
 90 | 
 91 | === "Step 5"
 92 |     Model optimization and fine-tuning
 93 |         
 94 |     - Adjusted hyperparameters like embedding size, LSTM units, and learning rate.
 95 | 
 96 | === "Step 6"
 97 |     Validation and testing
 98 | 
 99 |     - Used metrics like accuracy and perplexity to assess prediction quality.  
100 |     - Validated the model on unseen data to test generalization. 
101 | 
102 | --- 
103 | 
104 | #### PROJECT TRADE-OFFS AND SOLUTIONS 
105 | 
106 | === "Trade-Off 1"
107 |     Accuracy vs Training Time:
108 |         
109 |     - **Solution**: Balanced by reducing the model's complexity and using an efficient optimizer.
110 | 
111 | === "Trade-Off 2"
112 |     Model complexity vs. Overfitting:
113 |         
114 |     - **Solution**: Implemented dropout layers and monitored validation loss during training.
115 | 
116 | --- 
117 | 
118 | ### SCREENSHOTS 
119 | 
120 | 
121 | !!! success "Project workflow"
122 | 
123 |     ``` mermaid
124 |       graph LR
125 |         A[Start] --> B{Data Preprocessed?};
126 |         B -->|No| C[Clean and Tokenize];
127 |         C --> D[Create Sequences];
128 |         D --> B;
129 |         B -->|Yes| E[Model Designed?];
130 |         E -->|No| F[Build LSTM/Transformer];
131 |         F --> E;
132 |         E -->|Yes| G[Train Model];
133 |         G --> H{Performant?};
134 |         H -->|No| I[Optimize Hyperparameters];
135 |         I --> G;
136 |         H -->|Yes| J[Deploy Model];
137 |         J --> K[End];
138 |     ```
139 | 
140 | 
141 | --- 
142 | 
143 | ### MODELS USED AND THEIR EVALUATION METRICS 
144 | 
145 | 
146 | |    Model   | Accuracy |  MSE  | R2 Score |
147 | |------------|----------|-------|----------|
148 | | LSTM       |    72%   |   -   |    -     |
149 | 
150 | --- 
151 | #### MODELS COMPARISON GRAPHS 
152 | 
153 | !!! tip "Models Comparison Graphs"
154 | 
155 |     === "LSTM Loss"
156 |         ![model perf](https://github.com/user-attachments/assets/db3a6d81-96fa-46d6-84b4-6395d46221d6)
157 | 
158 | ---
159 | ### CONCLUSION 
160 | 
161 | #### KEY LEARNINGS 
162 | 
163 | 
164 | !!! tip "Insights gained from the data"
165 | 
166 |     - The importance of preprocessing for NLP tasks.
167 |     - How padding and embeddings improve the model’s ability to generalize.
168 | 
169 | ??? tip "Improvements in understanding machine learning concepts"
170 | 
171 |     - Learned how LSTMs handle sequential dependencies.
172 |     - Understood the role of softmax activation in predicting word probabilities.
173 | 
174 | ??? tip "Challenges faced and how they were overcome"
175 | 
176 |     - Challenge: Large vocabulary size causing high memory usage.
177 |     - Solution: Limited vocabulary to the top frequent words.
178 | 
179 | --- 
180 | 
181 | #### USE CASES
182 | 
183 | === "Application 1"
184 | 
185 |     **Text Autocompletion**
186 |     
187 |       - Used in applications like Gmail and search engines to enhance typing speed.
188 | 
189 | === "Application 2"
190 | 
191 |     **Virtual Assistants**
192 |     
193 |       - Enables better conversational capabilities in chatbots and AI assistants.
194 | 


--------------------------------------------------------------------------------
/docs/natural-language-processing/text-summarization.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # 📜Text Summarization
  3 | 
  4 | ### 🎯 AIM
  5 | Develop a model to summarize long articles into short, concise summaries.
  6 | 
  7 | ### 📊 DATASET LINK
  8 | [CNN DailyMail News Dataset](https://www.kaggle.com/datasets/gowrishankarp/newspaper-text-summarization-cnn-dailymail/)
  9 | 
 10 | ### 📓 NOTEBOOK LINK
 11 | ??? Abstract "Kaggle Notebook"
 12 | 
 13 |     <iframe src="https://www.kaggle.com/embed/piyushchakarborthy/text-summary-via-textrank-transformers-tf-idf?kernelSessionId=219171135" height="800" style="margin: 0 auto; width: 100%; max-width: 950px;" frameborder="0" scrolling="auto" title="Text Summary Via TextRank, Transformers, TF-IDF"></iframe>
 14 | ### ⚙️ LIBRARIES NEEDED
 15 | ??? quote "LIBRARIES USED"
 16 | 
 17 |     - pandas
 18 |     - numpy
 19 |     - scikit-learn
 20 |     - matplotlib
 21 |     - keras
 22 |     - tensorflow
 23 |     - spacy
 24 |     - pytextrank
 25 |     - TfidfVectorizer
 26 |     - Transformer (Bart)
 27 | --- 
 28 | 
 29 | ### 📝 DESCRIPTION
 30 | 
 31 | ??? info "What is the requirement of the project?"
 32 |     - A robust system to summarize text efficiently is essential for handling large volumes of information.
 33 |     - It helps users quickly grasp key insights without reading lengthy documents.
 34 | 
 35 | ??? info "Why is it necessary?"
 36 |     - Large amounts of text can be overwhelming and time-consuming to process.
 37 |     - Automated summarization improves productivity and aids decision-making in various fields like journalism, research, and customer support.
 38 | 
 39 | ??? info "How is it beneficial and used?"
 40 |     - Provides a concise summary while preserving essential information.
 41 |     - Used in news aggregation, academic research, and AI-powered assistants for quick content consumption.
 42 | 
 43 | ??? info "How did you start approaching this project? (Initial thoughts and planning)"
 44 |     - Explored different text summarization techniques, including extractive and abstractive methods.
 45 |     - Implemented models like TextRank, BART, and T5 to compare their effectiveness.
 46 | 
 47 | ??? info "Mention any additional resources used (blogs, books, chapters, articles, research papers, etc.)."
 48 |     - Documentation from Hugging Face Transformers
 49 |     - Research Paper: "Text Summarization using Deep Learning"
 50 |     - Blog: "Introduction to NLP-based Summarization Techniques"
 51 | 
 52 | ---
 53 | ## 🔍 EXPLANATION
 54 | 
 55 | #### 🧩 DETAILS OF THE DIFFERENT FEATURES
 56 | 
 57 | #### 📂 dataset.csv 
 58 | 
 59 | The dataset contains features like sentence importance, word frequency, and linguistic structures that help in generating meaningful summaries.
 60 | 
 61 | | Feature Name | Description |
 62 | |--------------|-------------|
 63 | | Id           | A unique Id for each row                   |
 64 | | Article      | Entire article written on CNN Daily mail   |
 65 | | Highlights   | Key Notes of the article                   |
 66 | 
 67 | #### 🛠 Developed Features 
 68 | 
 69 | | Feature              | Description                                     |
 70 | |----------------------|-------------------------------------------------|
 71 | | `sentence_rank`        | Rank of a sentence based on importance using TextRank |
 72 | | `word_freq`            | Frequency of key terms in the document |
 73 | | `tf-idf_score`         | Term Frequency-Inverse Document Frequency for words |
 74 | | `summary_length`       | Desired length of the summary |
 75 | | `generated_summary`    | AI-generated condensed version of the original text |
 76 | 
 77 | ---
 78 | ### 🛤 PROJECT WORKFLOW 
 79 | !!! success "Project flowchart"
 80 | 
 81 |     ``` mermaid
 82 |       graph LR
 83 |     A[Start] --> B[Load Dataset]
 84 |     B --> C[Preprocessing]
 85 |     C --> D[TextRank + TF-IDF / Transformer Models]
 86 |     D --> E{Compare Performance}
 87 |     E -->|Best Model| F[Deploy]
 88 |     E -->|Retry| C;
 89 |     ```
 90 | 
 91 | #### PROCEDURE
 92 | 
 93 | === "Step 1"
 94 | 
 95 |     Exploratory Data Analysis:
 96 | 
 97 |     -  Loaded the CNN/DailyMail dataset using pandas.
 98 |     -  Explored dataset features like article and highlights, ensuring the correct format for summarization.
 99 |     -  Analyzed the distribution of articles and their corresponding summaries.
100 | 
101 | === "Step 2"
102 | 
103 |     Data cleaning and preprocessing:
104 | 
105 |       - Removed unnecessary columns (like id) and checked for missing values.
106 |       - Tokenized articles into sentences and words, removing stopwords and special characters.
107 |       - Preprocessed the text using basic NLP techniques such as lowercasing, lemmatization, and removing non-alphanumeric characters.
108 | 
109 | === "Step 3"
110 | 
111 |     Feature engineering and selection:
112 | 
113 |       - For TextRank-based summarization, calculated sentence similarity using TF-IDF (Term Frequency-Inverse Document Frequency) and Cosine Similarity.
114 |       - Selected top-ranked sentences based on their importance and relevance to the article.
115 |       - Applied transformers-based models like BART and T5 for abstractive summarization.
116 |       - Applied transformers-based models like BART and T5 for abstractive summarization.
117 | 
118 | === "Step 4"
119 | 
120 |     Model training and evaluation:
121 | 
122 |       - For the TextRank summarization approach, created a similarity matrix based on TF-IDF and Cosine Similarity.
123 |       - For transformer-based methods, used Hugging Face's BART and T5 models, summarizing articles with their pre-trained weights.
124 |       - Evaluated the summarization models based on BLEU, ROUGE, and Cosine Similarity metrics.
125 | 
126 | === "Step 5"
127 | 
128 |     Validation and testing:
129 | 
130 |       - Tested both extractive and abstractive summarization models on unseen data to ensure generalizability.
131 |       - Plotted confusion matrices to visualize True Positives, False Positives, and False Negatives, ensuring effective model performance.
132 | ---
133 | 
134 | ### 🖥 CODE EXPLANATION 
135 | <!-- Provide an explanation for your essential code, highlighting key sections and their functionalities. -->
136 | <!-- This will help beginners understand the core components and how they contribute to the overall project. -->
137 | 
138 | === "TextRank algorithm"
139 |     
140 |     Important Function:
141 | 
142 |         graph = nx.from_numpy_array(similarity_matrix)
143 |         scores = nx.pagerank(graph)
144 | 
145 |         Example Input: 
146 |         similarity_matrix = np.array([
147 |             [0.0, 0.2, 0.1],  # Sentence 1
148 |             [0.2, 0.0, 0.3],  # Sentence 2 
149 |             [0.1, 0.3, 0.0]]) # Sentence 3 
150 | 
151 |         graph = nx.from_numpy_array(similarity_matrix)
152 |         scores = nx.pagerank(graph)
153 | 
154 |         Output:
155 |         {0: 0.25, 1: 0.45, 2: 0.30} #That means sentence 2(0.45) has more importance than others
156 | 
157 | 
158 | 
159 | === "Transformers"
160 |     
161 |     Important Function:
162 |         
163 |         pipeline("summarization") - Initializes a pre-trained transformer model for summarization.
164 |         generated_summary = summarization_pipeline(article, max_length=150, min_length=50, do_sample=False) 
165 |         This Generates a summary using a transformer model.
166 | 
167 |         Example Input:
168 |         article = "The Apollo program was a NASA initiative that landed humans on the Moon between 1969 and 1972, 
169 |         with Apollo 11 being the first mission."
170 | 
171 |         Output:
172 |         The Apollo program was a NASA initiative that landed humans on the Moon between 1969 and 1972. 
173 |         Apollo 11 was the first mission.
174 | 
175 | 
176 | 
177 | 
178 | === "TTF-IDF Algorithm"
179 |     
180 |     Important Function:
181 |         
182 |         vectorizer = TfidfVectorizer()
183 |         tfidf_matrix = vectorizer.fit_transform(processed_sentences)
184 | 
185 |         Example Input:
186 |         processed_sentences = [
187 |         "apollo program nasa initiative landed humans moon 1969 1972",
188 |         "apollo 11 first mission land moon neil armstrong buzz aldrin walked surface",
189 |         "apollo program significant achievement space exploration cold war space race"]
190 | 
191 |         Output:
192 |         ['1969', '1972', 'achievement', 'aldrin', 'apollo', 'armstrong', 'buzz', 'cold', 'exploration', 
193 |         'first', 'humans', 'initiative', 'land', 'landed', 'moon', 'nasa', 'neil', 'program', 'race', 
194 |         'significant', 'space', 'surface', 'walked', 'war']
195 | 
196 | --- 
197 | 
198 | #### ⚖️ PROJECT TRADE-OFFS AND SOLUTIONS 
199 | 
200 | === "Trade-off 1"
201 | 
202 |     Training Dataset being over 1.2Gb, which is too large for local machines.
203 | 
204 |       - **Solution**: Instead of Training a model on train dataset, Used Test Dataset for training and validation.
205 | 
206 | === "Trade-off 2"
207 | 
208 |     Transformer models (BART/T5) required high computational resources and long inference times for summarizing large articles.
209 | 
210 |       - **Solution**: Model Pruning: Used smaller versions of transformer models (e.g., distilBART or distilT5) to reduce the computational load without compromising much on performance.
211 | 
212 | === "Trade-off 3"
213 | 
214 |     TextRank summary might miss nuances and context, leading to less accurate or overly simplistic outputs compared to transformer-based models.
215 | 
216 |       - **Solution**: Combined TextRank and Transformer-based summarization models in a hybrid approach to leverage the best of both worlds—speed from TextRank and accuracy from transformers.
217 | 
218 | 
219 | --- 
220 | 
221 | ### 🖼 SCREENSHOTS
222 | 
223 | ??? example "Confusion Matrix"
224 | 
225 |     === "TF-IDF Confusion Matrix"
226 |         ![tfidf](https://github.com/user-attachments/assets/28f257e1-2529-48f1-81e5-e058a50fb351)
227 |         
228 |     === "TextRank Confusion Matrix"
229 |         ![textrank](https://github.com/user-attachments/assets/cb748eff-e4f3-4096-ab2b-cf2e4b40186f)
230 | 
231 |     === "Transformers Confusion Matrix"
232 |         ![trans](https://github.com/user-attachments/assets/7e99887b-e225-4dd0-802d-f1c2b0e89bef)
233 | 
234 | 
235 | ### ✅CONCLUSION 
236 | 
237 | #### 🔑 KEY LEARNINGS 
238 | 
239 | !!! tip "Insights gained from the data"
240 |     - Data Complexity: News articles vary in length and structure, requiring different summarization techniques.
241 |     - Text Preprocessing: Cleaning text (e.g., stopword removal, tokenization) significantly improves summarization quality.
242 |     - Feature Extraction: Techniques like TF-IDF, TextRank, and Transformer embeddings help in effective text representation for summarization models.
243 | 
244 | ??? tip "Improvements in understanding machine learning concepts"
245 |     - Model Selection: Comparing extractive (TextRank, TF-IDF) and abstractive (Transformers) models to determine the best summarization approach.
246 | 
247 | ??? tip "Challenges faced and how they were overcome"
248 |     - Long Text Processing: Splitting lengthy articles into manageable sections before summarization.
249 |     - Computational Efficiency: Used batch processing and model optimization to handle large datasets efficiently.
250 | 
251 | --- 
252 | 
253 | #### 🌍 USE CASES 
254 | 
255 | === "Application 1"
256 | 
257 |     **News Aggregation & Personalized Summaries**
258 |     
259 |       - Automating news summarization helps users quickly grasp key events without reading lengthy articles.
260 |       - Used in news apps, digital assistants, and content curation platforms.
261 | 
262 | === "Application 2"
263 | 
264 |     **Legal & Academic Document Summarization**
265 |     
266 |       - Helps professionals extract critical insights from lengthy legal or research documents.
267 |       - Reduces the time needed for manual reading and analysis.
268 | 


--------------------------------------------------------------------------------
/docs/natural-language-processing/twitter-sentiment-analysis.md:
--------------------------------------------------------------------------------
  1 | # Twitter Sentiment Analysis 
  2 | 
  3 | ### AIM 
  4 | To analyze sentiment in Twitter data using natural language processing techniques.
  5 | 
  6 | ### DATASET LINK 
  7 | [https://www.kaggle.com/datasets/kazanova/sentiment140](https://www.kaggle.com/datasets/kazanova/sentiment140)
  8 | 
  9 | ### NOTEBOOK LINK 
 10 | 
 11 | [https://drive.google.com/drive/folders/1F6BLxvp6qIAgGZOZ2rC370EmKhj5W1FC?usp=sharing](https://drive.google.com/drive/folders/1F6BLxvp6qIAgGZOZ2rC370EmKhj5W1FC?usp=sharing)
 12 | 
 13 | ### LIBRARIES NEEDED 
 14 | 
 15 | ??? quote "LIBRARIES USED"
 16 | 
 17 |     - pandas
 18 |     - numpy
 19 |     - scikit-learn
 20 |     - seaborn
 21 |     - matplotlib
 22 |     - tensorflow
 23 |     - keras
 24 |     - nltk
 25 |     - multiprocessing
 26 |     - tqdm
 27 |     - os
 28 | 
 29 | --- 
 30 | 
 31 | ### DESCRIPTION 
 32 | 
 33 | !!! info "What is the requirement of the project?"
 34 |     - The project aims to perform sentiment analysis on Twitter data.
 35 |     - This involves extracting tweets related to specific topics or keywords, processing these tweets using natural language processing (NLP) techniques to determine the sentiment (positive or negative), and presenting insights derived from the analysis.
 36 | 
 37 | 
 38 | ??? info "Why is it necessary?"
 39 |     - Twitter is a rich source of real-time public opinion and sentiment. Analyzing tweets can provide valuable insights into public perception of events, products, brands, or topics of interest.
 40 |     - This information is crucial for businesses, governments, and researchers to make informed decisions, understand public sentiment trends, and gauge the success of marketing campaigns or policy changes.
 41 | 
 42 | ??? info "How is it beneficial and used?"
 43 |     - Business Insights: Companies can understand customer feedback and sentiments towards their products or services.
 44 |     - Brand Management: Monitor brand sentiment and respond to customer concerns or issues in real-time.
 45 |     - Market Research: Identify trends and sentiments related to specific topics or industries.
 46 |     - Social Listening: Understand public opinion on current events, policies, or social issues.
 47 |     - Customer Service Improvement: Improve customer service by analyzing sentiment towards customer interactions.
 48 | 
 49 | ??? info "How did you start approaching this project? (Initial thoughts and planning)"
 50 |     - Choose appropriate NLP techniques for sentiment analysis, such as:
 51 | 
 52 |         - Bag-of-Words (BoW) and TF-IDF: Represent tweets as numerical vectors.
 53 |         - Sentiment Lexicons: Use dictionaries of words annotated with sentiment scores (e.g., Vader sentiment lexicon).
 54 |         - Machine Learning Models: Train supervised classifiers (e.g., Naive Bayes, SVM, or neural networks) on labeled data for sentiment prediction.
 55 | 
 56 |     - Model Evaluation: Evaluate the performance of the sentiment analysis model using metrics like accuracy. Cross-validation techniques can be used to ensure robustness.
 57 | 
 58 |     - Visualization and Insights: Visualize sentiment trends over time or across different categories using charts (e.g., line plots, bar charts). Generate insights based on the analysis results.
 59 | 
 60 |     - Deployment: Deploy the sentiment analysis system as a standalone application or integrate it into existing systems for real-time monitoring and analysis.
 61 | 
 62 | ??? info "Mention any additional resources used (blogs, books, chapters, articles, research papers, etc.)"
 63 |     - [GeeksforGeeks Twitter Sentiment Analysis](https://www.geeksforgeeks.org/twitter-sentiment-analysis-using-python/)
 64 |     - [YouTube Video](https://youtu.be/4YGkfAd2iXM?si=_COXzhlQG5a0z7PH)
 65 | 
 66 | --- 
 67 | 
 68 | ### EXPLANATION
 69 | 
 70 | #### DETAILS OF THE DIFFERENT FEATURES 
 71 | 
 72 | 
 73 | --- 
 74 | 
 75 | #### WHAT I HAVE DONE 
 76 | 
 77 | === "Step 1"
 78 | 
 79 |     Initial data exploration and understanding:
 80 | 
 81 |       - Gathered Twitter data using pre-existing datasets (Kaggle).
 82 |       - Understand the structure of the data (e.g., tweet text, metadata like timestamps, user information).
 83 |       - Explore basic statistics and distributions of data features.
 84 | 
 85 | === "Step 2"
 86 | 
 87 |     Data cleaning and preprocessing:
 88 | 
 89 |       - Remove or handle noisy data such as URLs, special characters, and emojis.
 90 |       - Tokenize tweets into individual words or tokens.
 91 |       - Remove stopwords (commonly used words that do not carry significant meaning).
 92 |       - Normalize text through techniques like stemming to reduce variations of words.
 93 | 
 94 | === "Step 3"
 95 | 
 96 |     Feature engineering and selection:
 97 | 
 98 |       - Convert text data into numerical representations suitable for machine learning models (e.g., Bag-of-Words, TF-IDF).
 99 |       - Select relevant features that contribute most to the sentiment analysis task.
100 | 
101 | === "Step 4"
102 | 
103 |     Model training and evaluation:
104 | 
105 |       - Split the dataset into training and testing sets.
106 |       - Choose appropriate machine learning models (e.g., Naive Bayes, RNN LSTM, logistic regression) for sentiment analysis.
107 |       - Train the models on the training data and evaluate their performance using metrics like accuracy.
108 | 
109 | === "Step 5"
110 | 
111 |     Model optimization and fine-tuning:
112 | 
113 |       - Fine-tune the hyperparameters of the selected models to improve performance.
114 |       - Consider techniques like grid search or random search to find optimal parameters.
115 |       - Experiment with different models or combinations of models to achieve better results.
116 | 
117 | === "Step 6"
118 | 
119 |     Validation and testing:
120 | 
121 |       - Validate the trained models on a separate validation set to ensure generalizability.
122 |       - Test the final model on unseen data (testing set or new tweets) to assess its performance in real-world scenarios.
123 |       - Iterate on the model and preprocessing steps based on validation results to improve accuracy and robustness.
124 | 
125 | --- 
126 | 
127 | #### PROJECT TRADE-OFFS AND SOLUTIONS 
128 | 
129 | === "Trade-off 1"
130 | 
131 |     Stemming process took a lot of computational time to process over 1.6 million datapoints.
132 | 
133 |       - **Solution**: Divided the data into batches and applied parallel processing.
134 | 
135 | === "Trade-off 2"
136 | 
137 |     In RNN based LSTM, overfitting problem occurred.
138 | 
139 |       - **Solution**: Tried to fix it using Dropout layer, early stopping criteria.
140 | 
141 | --- 
142 | 
143 | ### SCREENSHOTS 
144 | 
145 | !!! success "Project structure or tree diagram"
146 | 
147 |     ``` mermaid
148 |       graph LR
149 |       A[Start] --> B{Error?};
150 |       B -->|Yes| C[Hmm...];
151 |       C --> D[Debug];
152 |       D --> B;
153 |       B ---->|No| E[Yay!];
154 |     ```
155 | 
156 | ??? tip "Visualizations and EDA of different features"
157 | 
158 |     === "Sentiment Distribution"
159 |         ![sentiment_distribution](https://github.com/user-attachments/assets/02d957bf-ff37-462d-bd11-b8e363e86f87)
160 | 
161 | ??? example "Model performance graphs"
162 | 
163 |     === "LR Confusion Matrix"
164 |         ![confusion_matrix_logistic_regression](https://github.com/user-attachments/assets/748f359a-e74c-4156-9f4d-7c67bf4a828b)
165 |     
166 |     === "LR ROC Curve"
167 |         ![roc_curve_logistic_regression](https://github.com/user-attachments/assets/1ffc044f-9c41-43ea-a4fd-2f05fcbcb771)
168 |     
169 |     === "Naive Bayes Confusion Matrix"
170 |         ![confusion_matrix_naive_bayes](https://github.com/user-attachments/assets/233b1dc6-0177-4e2a-ab9f-d0e400e91a5f)
171 |     
172 |     === "Naive Bayes ROC Curve"
173 |         ![roc_curve_naive_bayes](https://github.com/user-attachments/assets/55cdefd9-7ae6-4234-aa1c-f790e3c49f44)
174 | 
175 | --- 
176 | 
177 | ### MODELS USED AND THEIR EVALUATION METRICS 
178 | 
179 | | Model | Accuracy | MSE | R2 Score |
180 | |-------|----------|-----|----------|
181 | | Logistic Regression | 77% | 0.1531724703945824 | 0.3873101184216704 |
182 | | Naive Bayes | 75% | 0.17476773790874897 | 0.3009290483650041 |
183 | | RNN LSTM | 77.84% | - | - |
184 | 
185 | --- 
186 | 
187 | #### MODELS COMPARISON GRAPHS 
188 | 
189 | !!! tip "Models Comparison Graphs"
190 | 
191 |     === "LSTM Accuracy"
192 |         ![lstm_accuracy](https://github.com/user-attachments/assets/54619fbd-0f8c-4543-8b7f-7eb419be9659)
193 |     === "LSTM Loss"
194 |         ![lstm_loss](https://github.com/user-attachments/assets/af2e1c78-2488-425f-ac01-8d24061a2650)
195 | 
196 | --- 
197 | 
198 | ### CONCLUSION 
199 | 
200 | #### KEY LEARNINGS 
201 | 
202 | !!! tip "Insights gained from the data"
203 |     - Data Variety: Twitter data provides a rich source of real-time, diverse opinions and sentiments.
204 |     - Text Preprocessing: Importance of cleaning and preprocessing text data (e.g., removing stopwords, stemming/lemmatization) for better analysis.
205 |     - Feature Extraction: Techniques like TF-IDF (Term Frequency-Inverse Document Frequency) and word embeddings (e.g., Word2Vec, GloVe) to represent text numerically for machine learning models.
206 | 
207 | ??? tip "Improvements in understanding machine learning concepts"
208 |     - Model Selection: Experimenting with various algorithms to find the most suitable for sentiment classification (e.g., logistic regression, naive bayes, neural networks).
209 | 
210 | ??? tip "Challenges faced and how they were overcome"
211 |     - Noise in Data: Dealing with noise from hashtags, emojis, and slang in tweets through effective preprocessing techniques.
212 |     - Computational Resources: Managing large volumes of data and resource-intensive computations by optimizing code and leveraging cloud computing platforms if necessary.
213 | 
214 | --- 
215 | 
216 | #### USE CASES 
217 | 
218 | === "Application 1"
219 | 
220 |     **Brand Monitoring and Customer Feedback Analysis**
221 |     
222 |       - This application allows businesses to leverage Twitter sentiment analysis as a valuable tool for customer relationship management, brand reputation management, and strategic decision-making based on real-time customer feedback and sentiment analysis.
223 | 
224 | === "Application 2"
225 | 
226 |     **Financial Market Analysis and Investment Decisions**
227 |     
228 |       - This application showcases how Twitter sentiment analysis can be leveraged in the financial sector to gain competitive advantages, improve investment strategies, and manage risks effectively based on public sentiment towards financial markets and specific stocks.
229 | 
230 | 


--------------------------------------------------------------------------------
/docs/project-readme-template.md:
--------------------------------------------------------------------------------
  1 | <!-- REMOVE ALL THE COMMENTED PART AFTER WRITING YOUR DOCUMENTATION. -->
  2 | <!-- THESE COMMENTS ARE PROVIDED SOLELY FOR YOUR ASSISTANCE AND TO OUTLINE THE REQUIREMENTS OF THIS PROJECT. -->
  3 | <!-- YOU CAN ALSO DESIGN YOUR PROJECT DOCUMENTATION AS YOU WISH BUT SHOULD BE UNDERSTANABLE TO A NEWBIE. -->
  4 | 
  5 | # 📜 Project Title  <!-- Write the title of your project here. Keep it precise and clear. -->
  6 | 
  7 | <!-- Attach a poster related to your project. It should send a clear message in mind. -->
  8 | <div align="center">
  9 |     <img src="https://www.slideegg.com/image/catalog/85410-free-project-proposal-powerpoint-template.png" />
 10 | </div>
 11 | 
 12 | ## 🎯 AIM 
 13 | <!-- Describe the main goal or objective of the project. For example: "To classify images of cats and dogs using CNN." -->
 14 | 
 15 | 
 16 | ## 📊 DATASET LINK 
 17 | <!-- Attach the link of the Dataset. If no, Mention "NOT USED" -->
 18 | [https://www.google.com](https://www.google.com)
 19 | 
 20 | 
 21 | ## 📓 KAGGLE NOTEBOOK 
 22 | <!-- Attach both links Kaggle URL/ Embed URL public notebook link. -->
 23 | [https://www.google.com](https://www.google.com)
 24 | 
 25 | ??? Abstract "Kaggle Notebook"
 26 | 
 27 |     <iframe 
 28 |         src="https://www.kaggle.com/embed/avdhesh15/cvd-risk-prediction-system?kernelSessionId=218959248" 
 29 |         height="600" 
 30 |         style="margin: 0 auto; width: 100%; max-width: 950px;" 
 31 |         frameborder="0" 
 32 |         scrolling="auto" 
 33 |         title="cvd-risk-prediction-system">
 34 |     </iframe>
 35 | 
 36 | ## ⚙️ TECH STACK 
 37 | 
 38 | | **Category**             | **Technologies**                            |
 39 | |--------------------------|---------------------------------------------|
 40 | | **Languages**            | Python, JavaScript                          |
 41 | | **Libraries/Frameworks** | TensorFlow, Keras, Flask                    |
 42 | | **Databases**            | MongoDB, PostgreSQL                         |
 43 | | **Tools**                | Docker, Git, Jupyter, VS Code               |
 44 | | **Deployment**           | AWS, Heroku                                 |
 45 | 
 46 | 
 47 | --- 
 48 | 
 49 | ## 📝 DESCRIPTION 
 50 | <!-- Properly describe the project. Provide the answer of all the questions,
 51 | what is the requirement of the project?, 
 52 | why is it necessary?, 
 53 | how is it beneficial and used?, 
 54 | how did you start approaching this project?, 
 55 | Any additional resources used like blogs reading, books reading (mention the name of book along with the pages you have read)?
 56 | etc. -->
 57 | <!-- Properly describe the project and answer the following questions: -->
 58 | 
 59 | <!-- Provide a comprehensive overview of the project -->
 60 | 
 61 | <!-- Why did you need to create this project? -->
 62 | !!! info "What is the requirement of the project?"
 63 |     - Write the answer here in simple bullet points. 
 64 | 
 65 | <!-- How will this project benefit others or solve problems? -->
 66 | ??? info "How is it beneficial and used?"
 67 |     - Write the answer here in simple bullet points. 
 68 | 
 69 | <!-- What were your initial thoughts and strategies for this project? -->
 70 | ??? info "How did you start approaching this project? (Initial thoughts and planning)"
 71 |     - Write the answer here in simple bullet points. 
 72 | 
 73 | <!-- Mention any additional resources, such as blogs, books, articles, or research papers, that you used. -->
 74 | ??? info "Mention any additional resources used (blogs, books, chapters, articles, research papers, etc.)."
 75 |     - Write the answer here in simple bullet points. 
 76 | 
 77 | 
 78 | --- 
 79 | 
 80 | ## 🔍 PROJECT EXPLANATION 
 81 | 
 82 | ### 🧩 DATASET OVERVIEW & FEATURE DETAILS 
 83 | <!-- Elaborate the features as mentioned in the issues, performing any googling to learn about the features -->
 84 | <!-- Describe the key features of the project, explaining each one in detail. -->
 85 | <!-- Use h4 heading to highlight the name of the dataset with its extension. Example, #### houses.csv -->
 86 | 
 87 | ??? example "📂 dataset.csv"
 88 | 
 89 |     - There are X features in the dataset.csv
 90 | 
 91 |     | Feature Name | Description |   Datatype   |
 92 |     |--------------|-------------|:------------:|
 93 |     | feature 1    | explain 1   | int64/object |
 94 | 
 95 | 
 96 | <!-- Display those features which are developed to build the model -->
 97 | ??? example "🛠 Developed Features from dataset.csv"
 98 | 
 99 |     | Feature Name | Description | Reason   |   Datatype   |
100 |     |--------------|-------------|----------|:------------:|
101 |     | feature 1    | explain 1   | reason 1 | int64/object |
102 | 
103 | 
104 | --- 
105 | 
106 | ### 🛤 PROJECT WORKFLOW 
107 | <!-- Draft a visualization graph of your project workflow using mermaid -->
108 | 
109 | !!! success "Project workflow"
110 | 
111 |     ``` mermaid
112 |       graph LR
113 |         A[Start] --> B{Error?};
114 |         B -->|Yes| C[Hmm...];
115 |         C --> D[Debug];
116 |         D --> B;
117 |         B ---->|No| E[Yay!];
118 |     ```
119 | 
120 | <!-- Clearly define the step-by-step workflow followed in the project. You can add or remove points as necessary. -->
121 | === "Step 1"
122 |     - Explanation
123 | 
124 | === "Step 2"
125 |     - Explanation
126 | 
127 | === "Step 3"
128 |     - Explanation
129 | 
130 | === "Step 4"
131 |     - Explanation
132 | 
133 | === "Step 5"
134 |     - Explanation
135 | 
136 | === "Step 6"
137 |     - Explanation
138 | 
139 | --- 
140 | 
141 | ### 🖥 CODE EXPLANATION 
142 | <!-- Provide an explanation for your essential code, highlighting key sections and their functionalities. -->
143 | <!-- This will help beginners understand the core components and how they contribute to the overall project. -->
144 | 
145 | === "Section 1"
146 |     - Explanation
147 | 
148 | --- 
149 | 
150 | ### ⚖️ PROJECT TRADE-OFFS AND SOLUTIONS 
151 | <!-- Explain the trade-offs encountered during the project and the solutions you implemented. -->
152 | 
153 | === "Trade Off 1"
154 |     - Describe the trade-off encountered (e.g., accuracy vs. computational efficiency).
155 |       - Explain how you addressed this trade-off (e.g., by optimizing hyperparameters, using a more efficient algorithm, etc.).
156 | 
157 | === "Trade Off 2"
158 |     - Describe another trade-off (e.g., model complexity vs. interpretability).
159 |       - Explain the solution (e.g., by selecting a model that balances both aspects effectively).
160 | 
161 | --- 
162 | 
163 | ## 🖼 SCREENSHOTS 
164 | <!-- Include screenshots, graphs, and visualizations to illustrate your findings and workflow. -->
165 | 
166 | !!! tip "Visualizations and EDA of different features"
167 | 
168 |     === "Image Topic"
169 |         ![img](https://assets.ltkcontent.com/images/103034/line-graph-example_27c5571306.jpg)
170 | 
171 | ??? example "Model performance graphs"
172 | 
173 |     === "Image Topic"
174 |         ![img](https://assets.ltkcontent.com/images/103029/bar-graph-example_27c5571306.jpg)
175 | 
176 | --- 
177 | 
178 | ## 📉 MODELS USED AND THEIR EVALUATION METRICS 
179 | <!-- Summarize the models used and their evaluation metrics in a table. -->
180 | 
181 | |    Model   | Accuracy |  MSE  | R2 Score |
182 | |------------|----------|-------|----------|
183 | | Model Name |    95%   | 0.022 |   0.90   |
184 | | Model Name |    93%   | 0.033 |   0.88   |
185 | 
186 | --- 
187 | 
188 | ## ✅ CONCLUSION 
189 | 
190 | ### 🔑 KEY LEARNINGS 
191 | <!-- Summarize what you learned from this project in terms of data, techniques, and skills. -->
192 | 
193 | !!! tip "Insights gained from the data"
194 |     - Write from here in bullet points
195 | 
196 | ??? tip "Improvements in understanding machine learning concepts"
197 |     - Write from here in bullet points
198 | 
199 | --- 
200 | 
201 | ### 🌍 USE CASES 
202 | <!-- Mention at least two real-world applications of this project. -->
203 | 
204 | === "Headline 1"
205 |     - Explain your application
206 | 
207 | === "Headline 2"
208 |     - Explain your application
209 | 
210 | ### 🔗 USEFUL LINKS 
211 | <!-- Mention all the links, if present, otherwise remove this part. (Optional) -->
212 | 
213 | === "Deployed Model"
214 |     - [https://www.google.com](https://www.google.com)
215 |     <!-- Link of streamlit, heroku, etc. -->
216 | 
217 | === "GitHub Repository"
218 |     - [https://www.google.com](https://www.google.com)
219 |     <!-- Attach the link of your project github repo. -->
220 | 
221 | === "Binary Model File"
222 |     - [https://www.google.com](https://www.google.com)
223 |     <!-- Attach the link of your pickle/joblib/hdf5 or any other model binary file link to download. -->
224 | 
225 | <!-- AFTER COMPLETEING THE DOCUMENTATION, UPDATE THE `index.md` file of the domian of which your project is a part of.  -->
226 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: AI Code
 2 | site_url: https://avdhesh-varshney.github.io/AI-Code/
 3 | 
 4 | nav:
 5 |   - Overview: index.md
 6 |   - Projects:
 7 |       - 📊 Data Insights: data-visualization/index.md
 8 |       - 📈 ML Models: machine-learning/index.md
 9 |       - 🧠 Neural Networks: deep-learning/index.md
10 |       - 📷 Vision Systems: computer-vision/index.md
11 |       - 🗣️ NLP Tasks: natural-language-processing/index.md
12 |       - 🌀 GANs: generative-adversarial-networks/index.md
13 |       - 📚 LLMs: large-language-models/index.md
14 |   - Get Involved:
15 |       - ✍️ How to Contribute: contribute.md
16 |       - 📄 Template Guide: project-readme-template.md
17 | 
18 | theme:
19 |   name: material
20 |   font:
21 |     text: IBM Plex Mono
22 |     code: Red Hat Mono
23 |   features:
24 |     - navigation.sections
25 |     - navigation.footer
26 |     - content.code.copy
27 |   palette:
28 |     # Dark Mode
29 |     - scheme: slate
30 |       toggle:
31 |         icon: material/weather-sunny
32 |         name: Dark mode
33 |       accent: blue grey
34 | 
35 |     # Light Mode
36 |     - scheme: default
37 |       toggle:
38 |         icon: material/weather-night
39 |         name: Light mode
40 |       accent: grey
41 | 
42 | markdown_extensions:
43 |   - attr_list
44 |   - md_in_html
45 |   - pymdownx.emoji:
46 |       emoji_index: !!python/name:material.extensions.emoji.twemoji
47 |       emoji_generator: !!python/name:material.extensions.emoji.to_svg
48 |   - pymdownx.highlight:
49 |       anchor_linenums: true
50 |       line_spans: __span
51 |       pygments_lang_class: true
52 |   - pymdownx.inlinehilite
53 |   - pymdownx.snippets
54 |   - pymdownx.superfences:
55 |       custom_fences:
56 |         - name: mermaid
57 |           class: mermaid
58 |           format: !!python/name:pymdownx.superfences.fence_code_format
59 |   - pymdownx.tabbed:
60 |       alternate_style: true
61 |   - admonition
62 |   - pymdownx.details
63 |   - pymdownx.blocks.caption
64 |   - pymdownx.arithmatex:
65 |       generic: true
66 | 
67 | extra:
68 |   social:
69 |     - icon: simple/github
70 |       link: https://github.com/Avdhesh-Varshney
71 |     - icon: simple/x
72 |       link: https://twitter.com/__Avdhesh__
73 |     - icon: simple/youtube
74 |       link: https://youtube.com/@Code_A2Z
75 |     - icon: simple/discord
76 |       link: https://discord.gg/tSqtvHUJzE
77 | 
78 | extra_javascript:
79 |   - https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/MathJax.js?config=TeX-MML-AM_CHTML
80 | 
81 | copyright: Copyright &copy; 2025 Avdhesh Varshney
82 | 


--------------------------------------------------------------------------------