├── Apache Airflow
    ├── Welcome_DAG.py
    ├── dag_preprocessing.py
    ├── population_dag.py
    └── wiki_dag.py
├── Classification Prediction Scikit Learn
    ├── Bank Customer Churn Prediction.ipynb
    ├── Bank Customer Churn.ipynb
    ├── Bank Loan Approval Exploratory Data Analysis.ipynb
    ├── Bank Turnover Prediction.ipynb
    ├── Customer Churn Prediction.ipynb
    ├── Customer Credit Risk Prediction.ipynb
    ├── Customer Personality Prediction to Boost Marketing Campaign.ipynb
    ├── Customer Satisfaction in Airline Prediction.ipynb
    ├── Fraudulent Claim on Cars Physical Damage Prediction.ipynb
    ├── Loan Default Prediction.ipynb
    ├── Loan Defaults Prediction.ipynb
    ├── Loan Prediction Analytics Vidhya Competition.ipynb
    ├── Loan Prediction Analytics Vidhya.ipynb
    ├── Loan Prediction Based on Customer Behavior.ipynb
    ├── Loan Prediction.ipynb
    ├── Predict CLTV of a customer.ipynb
    ├── Predict Customer Clicked Ads Classification.ipynb
    ├── Predict if a client will subscribe to a term deposit.ipynb
    └── Travel Insurance Prediction.ipynb
├── End to End Data Science Project
    ├── End to End Brazilian E-Commerce Analysis.ipynb
    └── End to End Customer Churn and Sales Analysis.ipynb
├── Flourish
    └── README.md
├── KNIME Project
    ├── Knime Simple Data Preprocessing.JPG
    └── README.md
├── Langchain LLM
    ├── LangChain_Chroma.ipynb
    ├── Langchain_Analyze_CSV.ipynb
    ├── Langchain_Analyze_PDF.ipynb
    ├── README.md
    ├── gemini_web_langchain.py
    ├── langchain_complete
    │   ├── file.csv
    │   ├── file.docx
    │   ├── file.pdf
    │   ├── file.pptx
    │   ├── file.xlsx
    │   └── langchain_streamlit.py
    └── langchain_youtube.py
├── LlamaIndex
    └── llamastreamlit.py
├── MySQL
    ├── Data Science Salary Query.sql
    ├── INNER JOIN COMBINATION.sql
    ├── README.md
    ├── SQL JOIN.sql
    ├── STUDENTS PERFORMANCE.sql
    ├── SUPERSTORE DATA ANALYSIS.sql
    ├── Sample - Superstore - Wanda.xlsx - Orders.csv
    ├── Students_Performance_mv.csv
    ├── VIRTUAL INTERNSHIP QUERIES.sql
    ├── ds_salaries.csv
    ├── exam score analysis.sql
    ├── excercise 1.sql
    ├── sakila-dvd-rental.sql
    └── yellow_tlc_apr2022_1k.csv
├── Natural Language Processing
    ├── Anies_Sentiment_Analysis.ipynb
    ├── RUU_DPR_2020_2024.ipynb
    ├── Sentiment_Analisis_Prabowo.ipynb
    └── emotion_streamlit.py
├── Power BI
    ├── pbi1.JPG
    └── pbi2.JPG
├── PySpark
    ├── Insurance_Claim_Pyspark.ipynb
    └── PySpark_Data_Preprocessing.ipynb
├── R Language
    ├── calculate.R
    ├── coba.R
    └── portfolio.R
├── README.md
├── Regression Prediction Scikit Learn
    ├── Ford Car Price Prediction.ipynb
    ├── Honda Price Prediction.ipynb
    ├── House Price Prediction for Kaggle Competition.ipynb
    ├── House Rent Price Prediction.ipynb
    ├── Media Campaign Cost Prediction.ipynb
    ├── Medical Insurance Cost Prediction.ipynb
    ├── Melbourne Housing Price Prediction.ipynb
    ├── NY Rental Pricing Prediction.ipynb
    ├── Rain Prediction in Australian Coursera.ipynb
    ├── Salary Prediction.ipynb
    ├── Salary prediction based on country and race.ipynb
    ├── Software Industry Salary Prediction.ipynb
    ├── Sport Car Price Prediction.ipynb
    ├── USA Real Estate Price Prediction.ipynb
    └── Used Vehicle Price Prediction.ipynb
├── Snowflake Cloud
    ├── README.md
    ├── Snowflake_Python_Connector.ipynb
    ├── Snowflake_Snowpark_Session.ipynb
    └── Snowpark_Data_Pipeline_and_Transformation_Covid.ipynb
├── Streamlit-Web-Application-main
    ├── README.md
    ├── __pycache__
    │   ├── flask.cpython-311.pyc
    │   └── pandasai.cpython-311.pyc
    ├── auto_sentiment_analysis_twitter.py
    ├── chat_with_your_csv.py
    ├── cheatgpt.py
    ├── compare.py
    ├── complete_pack.py
    ├── diagnosis.py
    ├── ecommerce_clustering_llm.py
    ├── fraud_analysis_llm.py
    ├── indonesia-bert-sentiment-classification
    │   └── config.json
    ├── llmpandas.py
    ├── pdf_comparer.py
    ├── pdf_document_analysis.py
    ├── table_scraper_analysis.py
    └── web_scrape.py
├── Tableau
    └── Dashboard 1.png
├── Tensorflow
    ├── ANTAM_Stock_Price_Prediction.ipynb
    ├── Classify_Mineral_Stone.ipynb
    ├── GOTO_Stock_Price.ipynb
    ├── Insurance_Claim_Fraud_with_GAN.ipynb
    └── Insurance_Claim_Tensorflow.ipynb
└── site
    └── en
        └── gemini-api
            └── docs
                └── model-tuning
                    └── python.ipynb


/Apache Airflow/Welcome_DAG.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from airflow import DAG
 4 | from airflow.operators.empty import EmptyOperator
 5 | 
 6 | my_dag = DAG(
 7 |     dag_id="my_dag_name",
 8 |     start_date=datetime.datetime(2021, 1, 1),
 9 |     schedule="@daily",
10 | )
11 | EmptyOperator(task_id="task", dag=my_dag)


--------------------------------------------------------------------------------
/Apache Airflow/dag_preprocessing.py:
--------------------------------------------------------------------------------
 1 | from airflow import DAG
 2 | from airflow.providers.mysql.hooks.mysql import MySqlHook
 3 | from datetime import datetime
 4 | from airflow.operators.python import PythonOperator
 5 | 
 6 | # Replace with your actual connection ID
 7 | connection_id = 'mysql'
 8 | 
 9 | def test_mysql_connection():
10 |     try:
11 |         # Get the connection from Airflow
12 |         mysql_hook = MySqlHook(mysql_conn_id=connection_id)
13 | 
14 |         # Attempt a simple connection test (e.g., ping the server)
15 |         with mysql_hook.get_conn() as conn:
16 |             cursor = conn.cursor()
17 |             cursor.execute("SELECT * FROM marketing.customer;")
18 |             result = cursor.fetchone()
19 | 
20 |         if result:
21 |             print("Connection to MySQL successful!")
22 |         else:
23 |             print("Connection test failed!")
24 | 
25 |     except Exception as e:
26 |         print(f"Error connecting to MySQL: {e}")
27 | 
28 | with DAG(dag_id='test_mysql_connection',
29 |           start_date=datetime(2024, 4, 15),
30 |           schedule_interval=None) as dag:
31 | 
32 |     test_connection_task = PythonOperator(
33 |         task_id='test_connection',
34 |         python_callable=test_mysql_connection
35 |     )
36 | 


--------------------------------------------------------------------------------
/Apache Airflow/population_dag.py:
--------------------------------------------------------------------------------
 1 | from airflow import DAG
 2 | from airflow.utils.dates import days_ago
 3 | from airflow.operators.python_operator import PythonOperator
 4 | from bs4 import BeautifulSoup  # For web scraping
 5 | import requests
 6 | 
 7 | # Define default arguments
 8 | default_args = {
 9 |     'owner': 'airflow',
10 |     'start_date': days_ago(1),  # Start yesterday
11 |     'schedule_interval': '@daily',  # Run daily
12 | }
13 | 
14 | 
15 | def scrape_worldometer(ti):  # Inject the TaskInstance object
16 |   """
17 |   Scrapes Worldometer website for population data and stores in XCom.
18 |   """
19 |   url = 'https://www.worldometers.info/world-population/'
20 |   response = requests.get(url)
21 |   soup = BeautifulSoup(response.content, 'html.parser')
22 | 
23 |   # Target elements using updated selectors
24 |   births_today = soup.find('span', class_='rts-counter', rel='births_today').text.strip()
25 |   deaths_today = soup.find('span', class_='rts-counter', rel='dth1s_today').text.strip()
26 | 
27 |   # Store data in XCom for retrieval by downstream tasks
28 |   ti.xcom_push(
29 |       key='worldometer_data',
30 |       value={
31 |           'births_today': births_today,
32 |           'deaths_today': deaths_today
33 |       }
34 |   )
35 | 
36 | # Define the DAG
37 | with DAG(
38 |     dag_id='worldometer_scraper',
39 |     default_args=default_args,
40 | ) as dag:
41 | 
42 |   # Scrape data task
43 |   scrape_task = PythonOperator(
44 |       task_id='scrape_worldometer',
45 |       python_callable=scrape_worldometer,  # Pass the function with TaskInstance injection
46 |   )
47 | 


--------------------------------------------------------------------------------
/Apache Airflow/wiki_dag.py:
--------------------------------------------------------------------------------
 1 | from airflow import DAG
 2 | from airflow.utils.dates import days_ago
 3 | from airflow.operators.python_operator import PythonOperator
 4 | from bs4 import BeautifulSoup
 5 | import requests
 6 | 
 7 | # Define default arguments
 8 | default_args = {
 9 |     'owner': 'airflow',
10 |     'start_date': days_ago(1),  # Start yesterday
11 |     'schedule_interval': '@daily',  # Run daily
12 | }
13 | 
14 | 
15 | def scrape_wiki_content(ti):
16 |   """
17 |   Scrapes content from Albert Einstein's Wikipedia page and stores it in XCom.
18 |   """
19 |   url = 'https://en.wikipedia.org/wiki/Albert_Einstein'
20 |   response = requests.get(url)
21 |   soup = BeautifulSoup(response.content, 'html.parser')
22 | 
23 |   # Target all paragraphs within the main content section (can be adjusted)
24 |   content_elements = soup.find_all('p', class_=None)  # Find all paragraphs without a class
25 | 
26 |   # Combine the text content of all paragraphs
27 |   content_text = '\n'.join([p.get_text(strip=True) for p in content_elements])
28 | 
29 |   # Store the content in XCom for retrieval by downstream tasks
30 |   ti.xcom_push(
31 |       key='einstein_wiki_content',
32 |       value=content_text
33 |   )
34 | 
35 | 
36 | # Define the DAG
37 | with DAG(
38 |     dag_id='wiki_einstein_scraper',
39 |     default_args=default_args,
40 | ) as dag:
41 | 
42 |     # Scrape data task
43 |     scrape_task = PythonOperator(
44 |         task_id='scrape_wiki_content',
45 |         python_callable=scrape_wiki_content,  # Pass the function with TaskInstance injection
46 |     )
47 | 


--------------------------------------------------------------------------------
/Flourish/README.md:
--------------------------------------------------------------------------------
1 | # Link for Flourish Visualization
2 | 
3 | 1. Loan Default Analysis : https://public.flourish.studio/story/2119154/
4 | 2. Superstore Sales Analysis : https://public.flourish.studio/story/2117963/
5 | 


--------------------------------------------------------------------------------
/KNIME Project/Knime Simple Data Preprocessing.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MagicDash91/All-of-Data-Science-Project/734e54ff951d39fb8d7ba007dcc9c82859ac7ec6/KNIME Project/Knime Simple Data Preprocessing.JPG


--------------------------------------------------------------------------------
/KNIME Project/README.md:
--------------------------------------------------------------------------------
1 | # KNIME Project
2 | 
3 | 1. Simple Data Preprocessing explanation : https://www.linkedin.com/posts/michael-wiryaseputra_knime-datapreprocessing-datascience-activity-7184053754592129024-eqzs?utm_source=share&utm_medium=member_desktop
4 | 


--------------------------------------------------------------------------------
/Langchain LLM/README.md:
--------------------------------------------------------------------------------
 1 | # Langchain LLM
 2 | This repository is for all of my Langchain project, modify this code if you want to use the real time poject
 3 | 
 4 | ## 1. Langchain Analyze CSV
 5 | This project is analyzing the CSV file with Langchain CSV Agent. The user will ask anything about the CSV Dataset and then Langchain will query it and answer based on the question
 6 | 
 7 | ## 2. Langchain Analyze PDF
 8 | This project analyzing the context of the PDF File with Langchain and then it will answer based on the users question
 9 | 
10 | ## 3. Langchain Analyze Youtube Video
11 | This project analyzing the context of the Youtube Video with Langchain and then it will answer based on the users question
12 | 
13 | ## 4. Langchain Analyze Website
14 | This project analyzing the context of the Website with Langchain and then it will answer based on the users question
15 | 


--------------------------------------------------------------------------------
/Langchain LLM/gemini_web_langchain.py:
--------------------------------------------------------------------------------
 1 | from langchain_google_genai import ChatGoogleGenerativeAI
 2 | from langchain_google_genai import GoogleGenerativeAIEmbeddings
 3 | from langchain_community.document_loaders import WebBaseLoader
 4 | from langchain.chains import StuffDocumentsChain
 5 | from langchain.chains.llm import LLMChain
 6 | from langchain.prompts import PromptTemplate
 7 | import google.generativeai as genai
 8 | 
 9 | #genai.configure(api_key="AIzaSyC0HGxZs1MI5Nfc_9v9C9b5b7vTSMSlITc")
10 | 
11 | #Initialize Model
12 | llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key="AIzaSyC0HGxZs1MI5Nfc_9v9C9b5b7vTSMSlITc")
13 | 
14 | #Load the blog
15 | loader = WebBaseLoader("https://thenewstack.io/the-building-blocks-of-llms-vectors-tokens-and-embeddings/")
16 | docs = loader.load()
17 | 
18 | #Define the Summarize Chain
19 | template = """Write a concise summary of the following:
20 | "{text}"
21 | CONCISE SUMMARY:"""
22 | 
23 | prompt = PromptTemplate.from_template(template)
24 | 
25 | llm_chain = LLMChain(llm=llm, prompt=prompt)
26 | stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")
27 | 
28 | #Invoke Chain
29 | response=stuff_chain.invoke(docs)
30 | print(response["output_text"])


--------------------------------------------------------------------------------
/Langchain LLM/langchain_complete/file.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MagicDash91/All-of-Data-Science-Project/734e54ff951d39fb8d7ba007dcc9c82859ac7ec6/Langchain LLM/langchain_complete/file.docx


--------------------------------------------------------------------------------
/Langchain LLM/langchain_complete/file.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MagicDash91/All-of-Data-Science-Project/734e54ff951d39fb8d7ba007dcc9c82859ac7ec6/Langchain LLM/langchain_complete/file.pdf


--------------------------------------------------------------------------------
/Langchain LLM/langchain_complete/file.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MagicDash91/All-of-Data-Science-Project/734e54ff951d39fb8d7ba007dcc9c82859ac7ec6/Langchain LLM/langchain_complete/file.pptx


--------------------------------------------------------------------------------
/Langchain LLM/langchain_complete/file.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MagicDash91/All-of-Data-Science-Project/734e54ff951d39fb8d7ba007dcc9c82859ac7ec6/Langchain LLM/langchain_complete/file.xlsx


--------------------------------------------------------------------------------
/Langchain LLM/langchain_complete/langchain_streamlit.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | 
  3 | # Define functions for each page
  4 | def langchain_pdf():
  5 |     st.title("Langchain PDF Text Analysis")
  6 |     from langchain_google_genai import ChatGoogleGenerativeAI
  7 |     from langchain_community.document_loaders import PyPDFLoader
  8 |     from langchain.chains import StuffDocumentsChain
  9 |     from langchain.chains.llm import LLMChain
 10 |     from langchain.prompts import PromptTemplate
 11 |     import asyncio
 12 |     import nest_asyncio
 13 |     nest_asyncio.apply()
 14 | 
 15 |     # Initialize Model
 16 |     llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", google_api_key="AIzaSyAQLXJ6ROBzMycImPVp2jTlbB3zIpEWmhM")
 17 | 
 18 |     # Input for PDF file
 19 |     uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
 20 |         
 21 |     # Input for the question
 22 |     question = st.text_input("Enter your question:")
 23 | 
 24 |     if st.button("Analyze"):
 25 |         if uploaded_file is not None:
 26 |             # Save the uploaded PDF file with the name "file.pdf"
 27 |             with open("file.pdf", "wb") as f:
 28 |                 f.write(uploaded_file.getvalue())
 29 | 
 30 |             # Load the PDF file
 31 |             loader = PyPDFLoader("file.pdf")
 32 |             docs = loader.load_and_split()
 33 | 
 34 |             # Define the Summarize Chain
 35 |             template = question + """ Write a concise summary of the following:
 36 |             "{text}"
 37 |             CONCISE SUMMARY:"""
 38 | 
 39 |             prompt = PromptTemplate.from_template(template)
 40 | 
 41 |             llm_chain = LLMChain(llm=llm, prompt=prompt)
 42 |             stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")
 43 | 
 44 |             # Invoke Chain
 45 |             response = stuff_chain.invoke(docs)
 46 |             summary = response["output_text"]
 47 | 
 48 |             # Display the summary
 49 |             st.header("Summary:")
 50 |             st.write(summary)
 51 |         else:
 52 |             st.error("Please upload a PDF file.")
 53 | 
 54 | 
 55 | def langchain_doc():
 56 |     st.title("Langchain Microsoft Word File Analysis")
 57 |     from langchain_google_genai import ChatGoogleGenerativeAI
 58 |     from langchain_community.document_loaders import Docx2txtLoader
 59 |     from langchain.chains import StuffDocumentsChain
 60 |     from langchain.chains.llm import LLMChain
 61 |     from langchain.prompts import PromptTemplate
 62 |     import asyncio
 63 |     import nest_asyncio
 64 |     nest_asyncio.apply()
 65 | 
 66 |     # Initialize Model
 67 |     llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", google_api_key="AIzaSyAQLXJ6ROBzMycImPVp2jTlbB3zIpEWmhM")
 68 | 
 69 |     # Input for PDF file
 70 |     uploaded_file = st.file_uploader("Upload PDF", type=["docx"])
 71 |         
 72 |     # Input for the question
 73 |     question = st.text_input("Enter your question:")
 74 | 
 75 |     if st.button("Analyze"):
 76 |         if uploaded_file is not None:
 77 |             # Save the uploaded PDF file with the name "file.pdf"
 78 |             with open("file.docx", "wb") as f:
 79 |                 f.write(uploaded_file.getvalue())
 80 | 
 81 |             # Load the PDF file
 82 |             loader = Docx2txtLoader("file.docx")
 83 |             docs = loader.load_and_split()
 84 | 
 85 |             # Define the Summarize Chain
 86 |             template = question + """ Write a concise summary of the following:
 87 |             "{text}"
 88 |             CONCISE SUMMARY:"""
 89 | 
 90 |             prompt = PromptTemplate.from_template(template)
 91 | 
 92 |             llm_chain = LLMChain(llm=llm, prompt=prompt)
 93 |             stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")
 94 | 
 95 |             # Invoke Chain
 96 |             response = stuff_chain.invoke(docs)
 97 |             summary = response["output_text"]
 98 | 
 99 |             # Display the summary
100 |             st.header("Summary:")
101 |             st.write(summary)
102 |         else:
103 |             st.error("Please upload a Micosoft Word file.")
104 | 
105 | 
106 | def langchain_excel():
107 |     st.title("Langchain Microsoft Excel File Analysis")
108 |     from langchain_google_genai import ChatGoogleGenerativeAI
109 |     from langchain_community.document_loaders import UnstructuredExcelLoader
110 |     from langchain.chains import StuffDocumentsChain
111 |     from langchain.chains.llm import LLMChain
112 |     from langchain.prompts import PromptTemplate
113 |     import asyncio
114 |     import nest_asyncio
115 |     nest_asyncio.apply()
116 | 
117 |     # Initialize Model
118 |     llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", google_api_key="AIzaSyAQLXJ6ROBzMycImPVp2jTlbB3zIpEWmhM")
119 | 
120 |     # Input for PDF file
121 |     uploaded_file = st.file_uploader("Upload PDF", type=["xlsx"])
122 |         
123 |     # Input for the question
124 |     question = st.text_input("Enter your question:")
125 | 
126 |     if st.button("Analyze"):
127 |         if uploaded_file is not None:
128 |             # Save the uploaded PDF file with the name "file.pdf"
129 |             with open("file.xlsx", "wb") as f:
130 |                 f.write(uploaded_file.getvalue())
131 | 
132 |             # Load the PDF file
133 |             loader = UnstructuredExcelLoader("file.xlsx", mode="elements")
134 |             docs = loader.load()
135 | 
136 |             # Define the Summarize Chain
137 |             template = question + """ Write a concise summary of the following:
138 |             "{text}"
139 |             CONCISE SUMMARY:"""
140 | 
141 |             prompt = PromptTemplate.from_template(template)
142 | 
143 |             llm_chain = LLMChain(llm=llm, prompt=prompt)
144 |             stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")
145 | 
146 |             # Invoke Chain
147 |             response = stuff_chain.invoke(docs)
148 |             summary = response["output_text"]
149 | 
150 |             # Display the summary
151 |             st.header("Summary:")
152 |             st.write(summary)
153 |         else:
154 |             st.error("Please upload a Excel file.")
155 | 
156 | def langchain_ppt():
157 |     st.title("Langchain Microsoft Power Point File Analysis")
158 |     from langchain_google_genai import ChatGoogleGenerativeAI
159 |     from langchain_community.document_loaders import UnstructuredPowerPointLoader
160 |     from langchain.chains import StuffDocumentsChain
161 |     from langchain.chains.llm import LLMChain
162 |     from langchain.prompts import PromptTemplate
163 |     import asyncio
164 |     import nest_asyncio
165 |     nest_asyncio.apply()
166 | 
167 |     # Initialize Model
168 |     llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", google_api_key="AIzaSyAQLXJ6ROBzMycImPVp2jTlbB3zIpEWmhM")
169 | 
170 |     # Input for PDF file
171 |     uploaded_file = st.file_uploader("Upload PDF", type=["pptx"])
172 |         
173 |     # Input for the question
174 |     question = st.text_input("Enter your question:")
175 | 
176 |     if st.button("Analyze"):
177 |         if uploaded_file is not None:
178 |             # Save the uploaded PDF file with the name "file.pdf"
179 |             with open("file.pptx", "wb") as f:
180 |                 f.write(uploaded_file.getvalue())
181 | 
182 |             # Load the PDF file
183 |             loader = UnstructuredPowerPointLoader("file.pptx", mode="elements")
184 |             docs = loader.load_and_split()
185 | 
186 |             # Define the Summarize Chain
187 |             template = question + """ Write a concise summary of the following:
188 |             "{text}"
189 |             CONCISE SUMMARY:"""
190 | 
191 |             prompt = PromptTemplate.from_template(template)
192 | 
193 |             llm_chain = LLMChain(llm=llm, prompt=prompt)
194 |             stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")
195 | 
196 |             # Invoke Chain
197 |             response = stuff_chain.invoke(docs)
198 |             summary = response["output_text"]
199 | 
200 |             # Display the summary
201 |             st.header("Summary:")
202 |             st.write(summary)
203 |         else:
204 |             st.error("Please upload a Excel file.")
205 | 
206 | def langchain_csv():
207 |     st.title("Langchain CSV File Analysis")
208 |     from langchain_google_genai import ChatGoogleGenerativeAI
209 |     from langchain_community.document_loaders.csv_loader import CSVLoader
210 |     from langchain.chains import StuffDocumentsChain
211 |     from langchain.chains.llm import LLMChain
212 |     from langchain.prompts import PromptTemplate
213 |     import asyncio
214 |     import nest_asyncio
215 |     nest_asyncio.apply()
216 | 
217 |     # Initialize Model
218 |     llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", google_api_key="AIzaSyAQLXJ6ROBzMycImPVp2jTlbB3zIpEWmhM")
219 | 
220 |     # Input for PDF file
221 |     uploaded_file = st.file_uploader("Upload PDF", type=["csv"])
222 |         
223 |     # Input for the question
224 |     question = st.text_input("Enter your question:")
225 | 
226 |     if st.button("Analyze"):
227 |         if uploaded_file is not None:
228 |             # Save the uploaded PDF file with the name "file.pdf"
229 |             with open("file.csv", "wb") as f:
230 |                 f.write(uploaded_file.getvalue())
231 | 
232 |             # Load the PDF file
233 |             loader = CSVLoader(file_path="file.csv")
234 |             docs = loader.load()
235 | 
236 |             # Define the Summarize Chain
237 |             template = question + """ Write a concise summary of the following:
238 |             "{text}"
239 |             CONCISE SUMMARY:"""
240 | 
241 |             prompt = PromptTemplate.from_template(template)
242 | 
243 |             llm_chain = LLMChain(llm=llm, prompt=prompt)
244 |             stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")
245 | 
246 |             # Invoke Chain
247 |             response = stuff_chain.invoke(docs)
248 |             summary = response["output_text"]
249 | 
250 |             # Display the summary
251 |             st.header("Summary:")
252 |             st.write(summary)
253 |         else:
254 |             st.error("Please upload a CSV file.")
255 | 
256 | def langchain_web():
257 |     st.title("Langchain Web Content Analysis")
258 |     from langchain_google_genai import ChatGoogleGenerativeAI
259 |     from langchain_community.document_loaders import WebBaseLoader
260 |     from langchain.chains import StuffDocumentsChain
261 |     from langchain.chains.llm import LLMChain
262 |     from langchain.prompts import PromptTemplate
263 |     import asyncio
264 |     import nest_asyncio
265 |     nest_asyncio.apply()
266 | 
267 |     # Initialize Model
268 |     llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", google_api_key="AIzaSyAQLXJ6ROBzMycImPVp2jTlbB3zIpEWmhM")
269 | 
270 |     # Input for article link
271 |     article_link = st.text_input("Enter the link to the article:")
272 |         
273 |     # Input for the question
274 |     question = st.text_input("Enter your question:")
275 | 
276 |     if st.button("Analyze"):
277 |         if article_link.strip() == "":
278 |             st.error("Please enter a link to the article.")
279 |         else:
280 |             # Load the article content
281 |             loader = WebBaseLoader(article_link)
282 |             docs = loader.load()
283 | 
284 |             # Define the Summarize Chain
285 |             template = question + """ Write a concise summary of the following:
286 |             "{text}"
287 |             CONCISE SUMMARY:"""
288 | 
289 |             prompt = PromptTemplate.from_template(template)
290 | 
291 |             llm_chain = LLMChain(llm=llm, prompt=prompt)
292 |             stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")
293 | 
294 |             # Invoke Chain
295 |             response = stuff_chain.invoke(docs)
296 |             summary = response["output_text"]
297 | 
298 |             # Display the summary
299 |             st.header("Summary:")
300 |             st.write(summary)
301 | 
302 | def langchain_youtube():
303 |     st.title("Langchain Youtube Video Analysis")
304 |     from langchain_google_genai import ChatGoogleGenerativeAI
305 |     from langchain_community.document_loaders import YoutubeLoader
306 |     from langchain.chains import StuffDocumentsChain
307 |     from langchain.chains.llm import LLMChain
308 |     from langchain.prompts import PromptTemplate
309 |     import asyncio
310 |     import nest_asyncio
311 |     nest_asyncio.apply()
312 | 
313 |     # Initialize Model
314 |     llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", google_api_key="AIzaSyAQLXJ6ROBzMycImPVp2jTlbB3zIpEWmhM")
315 | 
316 |     # Input for article link
317 |     youtube_link = st.text_input("Enter the YouTube link:")
318 |         
319 |     # Input for the question
320 |     question = st.text_input("Enter your question:")
321 | 
322 |     if st.button("Analyze"):
323 |         if youtube_link.strip() == "":
324 |             st.error("Please enter a link to the article.")
325 |         else:
326 |             # Load the article content
327 |             loader = YoutubeLoader.from_youtube_url(
328 |                 youtube_link,
329 |                 add_video_info=True,
330 |                 language=["en", "id"],
331 |                 translation="en",
332 |             )
333 |             docs = loader.load()
334 | 
335 |             # Define the Summarize Chain
336 |             template = question + """ Write a concise summary of the following:
337 |             "{text}"
338 |             CONCISE SUMMARY:"""
339 | 
340 |             prompt = PromptTemplate.from_template(template)
341 | 
342 |             llm_chain = LLMChain(llm=llm, prompt=prompt)
343 |             stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")
344 | 
345 |             # Invoke Chain
346 |             response = stuff_chain.invoke(docs)
347 |             summary = response["output_text"]
348 | 
349 |             # Display the summary
350 |             st.header("Summary:")
351 |             st.write(summary)
352 | 
353 | # Set CSS to arrange buttons horizontally
354 | st.markdown(
355 |     """
356 |     <style>
357 |         .sidebar .widget-button {
358 |             width: 100%;
359 |             white-space: normal;
360 |             text-align: left;
361 |         }
362 |     </style>
363 | """,
364 |     unsafe_allow_html=True,
365 | )
366 | 
367 | # Get the selected page
368 | selected_page = st.sidebar.radio(
369 |     "Select Page",
370 |     ("Langchain PDF Text Analysis", 
371 |      "Langchain Microsoft Word File Analysis", 
372 |      "Langchain Microsoft Excel File Analysis", 
373 |      "Langchain Microsoft Power Point File Analysis",
374 |      "Langchain CSV File Analysis", 
375 |      "Langchain Web Content Analysis",
376 |      "Langchain Youtube Video Analysis")
377 | )
378 | 
379 | if selected_page == "Langchain PDF Text Analysis":
380 |     langchain_pdf()
381 | elif selected_page == "Langchain Microsoft Word File Analysis":
382 |     langchain_doc()
383 | elif selected_page == "Langchain Microsoft Excel File Analysis":
384 |     langchain_excel()
385 | elif selected_page == "Langchain Microsoft Power Point File Analysis":
386 |     langchain_ppt()
387 | elif selected_page == "Langchain CSV File Analysis":
388 |     langchain_csv()
389 | elif selected_page == "Langchain Web Content Analysis":
390 |     langchain_web()
391 | elif selected_page == "Langchain Youtube Video Analysis":
392 |     langchain_youtube()
393 | 


--------------------------------------------------------------------------------
/Langchain LLM/langchain_youtube.py:
--------------------------------------------------------------------------------
 1 | from langchain_google_genai import ChatGoogleGenerativeAI
 2 | from langchain_google_genai import GoogleGenerativeAIEmbeddings
 3 | from langchain_community.document_loaders import YoutubeLoader
 4 | from langchain.chains import StuffDocumentsChain
 5 | from langchain.chains.llm import LLMChain
 6 | from langchain.prompts import PromptTemplate
 7 | import google.generativeai as genai
 8 | 
 9 | #genai.configure(api_key="AIzaSyC0HGxZs1MI5Nfc_9v9C9b5b7vTSMSlITc")
10 | 
11 | #Initialize Model
12 | llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key="AIzaSyC0HGxZs1MI5Nfc_9v9C9b5b7vTSMSlITc")
13 | 
14 | #Load the blog
15 | loader = YoutubeLoader.from_youtube_url(
16 |     "https://www.youtube.com/watch?v=bT8_sZlgOSI", 
17 |     add_video_info=True,
18 |     language=["en", "id"],
19 |     translation="en",
20 | )
21 | docs = loader.load()
22 | 
23 | #Define the Summarize Chain
24 | template = """Write a concise summary of the following:
25 | "{text}"
26 | CONCISE SUMMARY:"""
27 | 
28 | prompt = PromptTemplate.from_template(template)
29 | 
30 | llm_chain = LLMChain(llm=llm, prompt=prompt)
31 | stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")
32 | 
33 | #Invoke Chain
34 | response=stuff_chain.invoke(docs)
35 | print(response["output_text"])


--------------------------------------------------------------------------------
/LlamaIndex/llamastreamlit.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from bs4 import BeautifulSoup
 3 | from llama_index.core import Document, Settings, SimpleDirectoryReader, StorageContext, VectorStoreIndex
 4 | from llama_index.readers.web import SimpleWebPageReader
 5 | from llama_index.vector_stores.chroma import ChromaVectorStore
 6 | from llama_index.embeddings.gemini import GeminiEmbedding
 7 | from llama_index.llms.gemini import Gemini
 8 | from llama_index.core import PromptTemplate
 9 | import chromadb
10 | 
11 | # Set up Streamlit page title and instructions
12 | st.title("LlamaIndex + Google Gemini Web Article Question Answering")
13 | st.write("Please input the URL of the webpage you'd like to analyze, and ask your question about it.")
14 | 
15 | # Input for the webpage URL
16 | url = st.text_input("Enter URL:")
17 | 
18 | # Input for the question
19 | question = st.text_input("Ask your question:")
20 | 
21 | # If both URL and question are provided, execute the code
22 | if url and question:
23 |     # Load webpage content
24 |     web_documents = SimpleWebPageReader().load_data([url])
25 |     html_content = web_documents[0].text
26 | 
27 |     # Parse HTML content
28 |     soup = BeautifulSoup(html_content, 'html.parser')
29 |     p_tags = soup.findAll('p')
30 |     text_content = ""
31 |     for each in p_tags:
32 |         text_content += each.text + "\n"
33 | 
34 |     # Convert to Document format
35 |     documents = [Document(text=text_content)]
36 | 
37 |     # Initialize Gemini embedding model and LLAMA model
38 |     gemini_api_key = "AIzaSyB2sQh_oHbFULJ7x2vixJWAboPpPvrCKoA"
39 |     gemini_embedding_model = GeminiEmbedding(api_key=gemini_api_key, model_name="models/embedding-001")
40 |     llm = Gemini(api_key=gemini_api_key, model_name="models/gemini-pro")
41 | 
42 |     # Create a client and a new collection
43 |     client = chromadb.PersistentClient(path="./chroma_db")
44 |     chroma_collection = client.get_or_create_collection("quickstart")
45 | 
46 |     # Create a vector store
47 |     vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
48 | 
49 |     # Create a storage context
50 |     storage_context = StorageContext.from_defaults(vector_store=vector_store)
51 | 
52 |     # Set Global settings
53 |     Settings.llm = llm
54 |     Settings.embed_model = gemini_embedding_model
55 | 
56 |     # Create an index from the documents
57 |     index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
58 | 
59 |     # Define LLAMA prompt template
60 |     template = (
61 |         """ You are an assistant for question-answering tasks.
62 |     Use the following context to answer the question.
63 |     If you don't know the answer, just say that you don't know.
64 |     Use five sentences maximum and keep the answer concise.\n
65 |     Question: {query_str} \nContext: {context_str} \nAnswer:"""
66 |     )
67 |     llm_prompt = PromptTemplate(template)
68 | 
69 |     # Query data from the persisted index
70 |     query_engine = index.as_query_engine(text_qa_template=llm_prompt)
71 |     response = query_engine.query(question)
72 | 
73 |     # Display just the response text
74 |     st.write("Answer:", response.response)
75 | 
76 | 


--------------------------------------------------------------------------------
/MySQL/Data Science Salary Query.sql:
--------------------------------------------------------------------------------
 1 | /* SELECT DATA WHERE SALARY > 100000 */
 2 | SELECT * FROM new_schema.ds_salaries
 3 | WHERE salary > 100000;
 4 | 
 5 | /* SELECT DATA WHERE SALARY > 100000, Company location in US, Order the salary from the largest */
 6 | SELECT MyUnknownColumn, job_title, salary_in_usd, company_location FROM new_schema.ds_salaries
 7 | WHERE salary_in_usd > 100000
 8 | AND company_location = 'US'
 9 | ORDER BY salary_in_usd DESC;
10 | 
11 | /* Count the average Average Salary in USD group by job title and sort from the largest */
12 | SELECT AVG(salary_in_usd) AS AVERAGE_SALARY_IN_USD, job_title FROM new_schema.ds_salaries
13 | GROUP BY job_title
14 | ORDER BY AVERAGE_SALARY_IN_USD DESC;


--------------------------------------------------------------------------------
/MySQL/INNER JOIN COMBINATION.sql:
--------------------------------------------------------------------------------
 1 | SELECT DISTINCT Z.customer_id, X.email, CONCAT (X.first_name,'  ', X.last_name) AS full_name , Z.inventory_id
 2 | FROM sakila.rental Z
 3 | LEFT JOIN sakila.customer X
 4 | ON Z.customer_id = X.customer_id
 5 | ORDER BY inventory_id ASC;
 6 | 
 7 | SELECT A.city_id, A.city, B.country 
 8 | FROM sakila.city A
 9 | INNER JOIN sakila.country B
10 | ON A.city_id = B.country_id;
11 | 
12 | SELECT A.film_id, A.actor_id, B.category_id
13 | FROM sakila.film_actor A
14 | RIGHT JOIN sakila.film_category B
15 | ON A.film_id = B.category_id;
16 | 


--------------------------------------------------------------------------------
/MySQL/README.md:
--------------------------------------------------------------------------------
1 | # SQL Portofolio
2 | Here all of my SQL Portofolio created using MYSQL Workbench
3 | 


--------------------------------------------------------------------------------
/MySQL/SQL JOIN.sql:
--------------------------------------------------------------------------------
 1 | SELECT C.city_id, C.city_name, S.country_name
 2 | FROM new_schema.`city` C
 3 | JOIN new_schema.`country` S
 4 | ON C.city_id = S.country_id;
 5 | 
 6 | SELECT C.city_id, C.city_name, S.country_name
 7 | FROM new_schema.`city` C
 8 | LEFT JOIN new_schema.`country` S
 9 | ON C.city_id = S.country_id;
10 | 
11 | SELECT C.city_id, C.city_name, S.country_name
12 | FROM new_schema.`city` C
13 | RIGHT JOIN new_schema.`country` S
14 | ON C.city_id = S.country_id;


--------------------------------------------------------------------------------
/MySQL/STUDENTS PERFORMANCE.sql:
--------------------------------------------------------------------------------
 1 | SELECT * FROM new_schema.students_performance_mv;
 2 | 
 3 | /* COUNT RACE ETHNICITY WHERE test preparation course is completed and ORDER BY ASCENDING */
 4 | SELECT race_ethnicity, COUNT(race_ethnicity) AS TOTAL FROM new_schema.students_performance_mv
 5 | WHERE test_preparation_course = 'completed'
 6 | GROUP BY race_ethnicity
 7 | ORDER BY TOTAL;
 8 | 
 9 | /* COUNT THE TOTAL SCORE EACH STUDENT AND THEN RANK THEM FROM HIGHEST*/
10 | SELECT gender, race_ethnicity, test_preparation_course, math_score + reading_score + writing_score AS TOTAL_SCORE
11 | FROM new_schema.students_performance_mv
12 | ORDER BY TOTAL_SCORE DESC;
13 | 
14 | /* COUNT THE AVERAGE SCORE OF 3 TEST THEN COUNT THE AVERAGE AGAIN GROUP BY RACE ETHNICITY THEN ELIMINATE NULL VALUE AND test preparation course is completed */
15 | SELECT race_ethnicity, (AVG(math_score + reading_score + writing_score)/3) AS NILAI_3_PELAJARAN_RATA_RATA
16 | FROM new_schema.students_performance_mv
17 | WHERE test_preparation_course = 'completed'
18 | AND NOT race_ethnicity =''
19 | GROUP BY race_ethnicity
20 | ORDER BY NILAI_3_PELAJARAN_RATA_RATA DESC
21 | 


--------------------------------------------------------------------------------
/MySQL/SUPERSTORE DATA ANALYSIS.sql:
--------------------------------------------------------------------------------
 1 | SELECT * FROM new_schema.`sample - superstore - wanda.xlsx - orders`;
 2 | 
 3 | /* SELECT AMOUNT OF CUSTOMER EACH REGION */
 4 | SELECT Region, COUNT(Region) AS TOTAL_CUSTOMER FROM new_schema.`sample - superstore - wanda.xlsx - orders`
 5 | GROUP BY Region
 6 | ORDER BY TOTAL_CUSTOMER DESC;
 7 | 
 8 | /* COUNT THE QUANTITY EACH REGION */
 9 | SELECT Region, SUM(Quantity) AS TOTAL_QUANTITY FROM new_schema.`sample - superstore - wanda.xlsx - orders`
10 | GROUP BY Region
11 | ORDER BY TOTAL_QUANTITY DESC;
12 | 
13 | /* COUNT SALES EACH REGION */
14 | SELECT Region, ROUND(SUM(Sales),2) AS TOTAL_SALES FROM new_schema.`sample - superstore - wanda.xlsx - orders`
15 | GROUP BY Region
16 | ORDER BY TOTAL_SALES DESC;
17 | 
18 | /* FIRST BUY EACH REGION */
19 | SELECT Region, MIN(Order_Date) AS FIRST_BUYER_DATE FROM new_schema.`sample - superstore - wanda.xlsx - orders`
20 | GROUP BY Region
21 | ORDER BY FIRST_BUYER_DATE;


--------------------------------------------------------------------------------
/MySQL/VIRTUAL INTERNSHIP QUERIES.sql:
--------------------------------------------------------------------------------
 1 | SELECT DISTINCT Z.SK_ID_CURR, X.SK_ID_CURR, Z.CODE_GENDER, X.NAME_CONTRACT_STATUS
 2 | FROM vix.hci_application AS Z
 3 | INNER JOIN vix.hci_previous AS X
 4 | ON Z.SK_ID_CURR = X.SK_ID_CURR
 5 | WHERE Z.TARGET = 0
 6 | AND Z.CODE_GENDER = 'F'
 7 | AND NOT Z.CNT_CHILDREN = 0
 8 | AND X.NAME_CONTRACT_STATUS = 'Approved'
 9 | ORDER BY X.SK_ID_CURR;
10 | 
11 | SELECT DISTINCT Z.SK_ID_CURR, X.SK_ID_CURR, Z.CODE_GENDER, Z.CNT_CHILDREN, Z.TARGET, X.NAME_CONTRACT_TYPE, COUNT(*) AS TOTAL
12 | FROM vix.hci_application AS Z
13 | INNER JOIN vix.hci_previous AS X
14 | ON Z.SK_ID_CURR = X.SK_ID_CURR
15 | GROUP BY X.NAME_CONTRACT_TYPE
16 | /*HAVING Z.CODE_GENDER = 'F'
17 | AND Z.TARGET = 0
18 | AND NOT Z.CNT_CHILDREN = 0*/
19 | ORDER BY TOTAL DESC;
20 | 
21 | SELECT DISTINCT Z.SK_ID_CURR, X.SK_ID_CURR, Z.CODE_GENDER, X.NAME_CONTRACT_STATUS
22 | FROM vix.hci_application AS Z
23 | INNER JOIN vix.hci_previous AS X
24 | ON Z.SK_ID_CURR = X.SK_ID_CURR
25 | WHERE Z.TARGET = 0
26 | AND Z.CODE_GENDER = 'F'
27 | AND NOT Z.CNT_CHILDREN = 0
28 | AND X.NAME_CONTRACT_STATUS = 'Approved';
29 | 


--------------------------------------------------------------------------------
/MySQL/exam score analysis.sql:
--------------------------------------------------------------------------------
 1 | SELECT gender, race_ethnicity, math_score, reading_score, writing_score, (math_score + reading_score + writing_score) AS total
 2 | FROM exam.exams
 3 | HAVING total > 200
 4 | ORDER BY total DESC;
 5 | 
 6 | SELECT race_ethnicity, AVG(math_score + reading_score + writing_score) AS AVERAGE
 7 | FROM exam.exams
 8 | GROUP BY race_ethnicity
 9 | ORDER BY AVERAGE DESC;
10 | 
11 | SELECT race_ethnicity, ROUND(AVG((math_score + reading_score + writing_score)/3),2) AS AVERAGE_SCORE
12 | FROM exam.exams
13 | GROUP BY race_ethnicity
14 | ORDER BY AVERAGE_SCORE DESC;
15 | 
16 | SELECT race_ethnicity, ROUND(AVG(math_score),2) AS AVERAGE_MATH, ROUND(AVG(reading_score),2) AS AVERAGE_READING, ROUND(AVG(writing_score),2) AS AVERAGE_WRITING
17 | FROM exam.exams
18 | WHERE math_score > 70
19 | AND reading_score > 70
20 | AND writing_score > 70
21 | GROUP BY race_ethnicity;
22 | 
23 | 


--------------------------------------------------------------------------------
/MySQL/excercise 1.sql:
--------------------------------------------------------------------------------
 1 | /* no 1 */
 2 | SELECT VendorID, passenger_count, trip_distance, payment_type FROM new_schema.yellow_tlc_apr2022_1k
 3 | WHERE trip_distance < 3
 4 | AND payment_type = 3;
 5 | 
 6 | /* no 2 */
 7 | SELECT VendorID, passenger_count, trip_distance, payment_type FROM new_schema.yellow_tlc_apr2022_1k
 8 | WHERE trip_distance < 3;
 9 | 
10 | /* no 3 */
11 | SELECT VendorID, passenger_count, trip_distance, payment_type FROM new_schema.yellow_tlc_apr2022_1k
12 | WHERE trip_distance < 3 
13 | AND passenger_count = 1;
14 | 
15 | /* no 4 */
16 | SELECT VendorID, passenger_count, trip_distance, payment_type FROM new_schema.yellow_tlc_apr2022_1k
17 | WHERE trip_distance 
18 | BETWEEN 1.50 AND 1.60;
19 | 


--------------------------------------------------------------------------------
/MySQL/sakila-dvd-rental.sql:
--------------------------------------------------------------------------------
 1 | /* CATEGORIZE FILM WITH ACTOR NAME AND FILM CATEGORY */
 2 | SELECT Z.actor_id, 
 3 | CONCAT(Z.first_name," ",Z.last_name) AS actor_name,
 4 | X.film_id, C.title AS film_title,
 5 | B.name AS category
 6 | FROM sakila.actor Z
 7 | INNER JOIN sakila.film_actor X
 8 | ON Z.actor_id = X.actor_id
 9 | INNER JOIN sakila.film_text C
10 | ON X.film_id = C.film_id
11 | INNER JOIN sakila.film_category V
12 | ON C.film_id = V.film_id
13 | INNER JOIN sakila.category B
14 | ON V.category_id = B.category_id
15 | WHERE B.name = 'Action';
16 | 
17 | /* CUSTOMER PAYMENT DATA WITH PRICE AND FILM TITLE */
18 | SELECT CONCAT(Z.first_name," ",Z.last_name) AS customer_name, 
19 | X.amount, X.payment_date,
20 | C.inventory_id, C.rental_id,
21 | V.film_id, B.title
22 | FROM sakila.customer Z
23 | INNER JOIN sakila.payment X
24 | ON Z.customer_id = X.customer_id
25 | INNER JOIN sakila.rental C
26 | ON X.customer_id = C.customer_id
27 | INNER JOIN sakila.inventory V
28 | ON C.inventory_id = V.inventory_id
29 | INNER JOIN sakila.film_text B
30 | ON V.film_id = B.film_id;
31 | 
32 | /* CUSTOMER ADDRESS AND IDENTITY */
33 | SELECT CONCAT(Z.first_name, " ", Z.last_name) AS name,
34 | Z.email, Z.address_id,
35 | X.address
36 | FROM sakila.customer Z
37 | INNER JOIN sakila.address X
38 | ON Z.address_id = X.address_id


--------------------------------------------------------------------------------
/Natural Language Processing/emotion_streamlit.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import pandas as pd
  3 | import seaborn as sns
  4 | import matplotlib.pyplot as plt
  5 | import re
  6 | from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
  7 | from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
  8 | import collections
  9 | import pathlib
 10 | import textwrap
 11 | import google.generativeai as genai
 12 | from IPython.display import display
 13 | from IPython.display import Markdown
 14 | 
 15 | # Create stemmer
 16 | factory = StemmerFactory()
 17 | stemmer = factory.create_stemmer()
 18 | 
 19 | # Create stopword remover
 20 | stop_factory = StopWordRemoverFactory()
 21 | more_stopword = ['dengan', 'ia', 'bahwa', 'oleh', 'rp', 'undang', 'pasal', 'ayat', 'bab']
 22 | data = stop_factory.get_stop_words() + more_stopword
 23 | 
 24 | # Define patterns for removal
 25 | hyperlink_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
 26 | number_pattern = re.compile(r'\b\d+\b')
 27 | emoticon_pattern = re.compile(u'('
 28 |     u'\ud83c[\udf00-\udfff]|'
 29 |     u'\ud83d[\udc00-\ude4f\ude80-\udeff]|'
 30 |     u'[\u2600-\u26FF\u2700-\u27BF])+', 
 31 |     re.UNICODE)
 32 | 
 33 | st.title('Sentiment Analysis')
 34 | 
 35 | uploaded_file = st.file_uploader("Upload a CSV file", type="csv")
 36 | custom_stopwords = st.text_input('Custom Stopwords (comma-separated)', '')
 37 | 
 38 | if uploaded_file is not None and custom_stopwords:
 39 |     if st.button('Analyze'):
 40 |         df = pd.read_csv(uploaded_file)
 41 |         custom_stopword_list = [word.strip() for word in custom_stopwords.split(',')]
 42 |         all_stopwords = data + custom_stopword_list
 43 | 
 44 |         df['cleaned_text'] = df['full_text'].str.replace(hyperlink_pattern, '')
 45 |         df['cleaned_text'] = df['cleaned_text'].str.replace(emoticon_pattern, '')
 46 |         df['cleaned_text'] = df['cleaned_text'].str.replace(number_pattern, '')
 47 | 
 48 |         for stopword in custom_stopword_list:
 49 |             df['cleaned_text'] = df['cleaned_text'].str.replace(stopword, '')
 50 | 
 51 |         df['cleaned_text'] = df['cleaned_text'].apply(lambda x: ' '.join(
 52 |             [stemmer.stem(word) for word in stop_factory.create_stop_word_remover().remove(x).split()
 53 |             if word.lower() not in all_stopwords]
 54 |         ))
 55 | 
 56 |         from transformers import BertForSequenceClassification, BertTokenizer, BertConfig
 57 | 
 58 |         tokenizer = BertTokenizer.from_pretrained("indobert-emotion-classification")
 59 |         config = BertConfig.from_pretrained("indobert-emotion-classification")
 60 |         model = BertForSequenceClassification.from_pretrained("indobert-emotion-classification", config=config)
 61 |         from transformers import pipeline
 62 | 
 63 |         nlp = pipeline("text-classification", model="indobert-emotion-classification")
 64 |         results = df['cleaned_text'].apply(lambda x: nlp(x)[0])
 65 |         df['label'] = [res['label'] for res in results]
 66 |         df['score'] = [res['score'] for res in results]
 67 | 
 68 |         sentiment_counts = df['label'].value_counts()
 69 | 
 70 |         st.write("### Sentiment Distribution")
 71 |         st.bar_chart(sentiment_counts)
 72 | 
 73 |         st.write("### Analysis Results")
 74 |         st.write(df)
 75 | 
 76 |         anger_text = ' '.join(df[df['label'] == 'Anger']['cleaned_text'])
 77 |         happy_text = ' '.join(df[df['label'] == 'Happy']['cleaned_text'])
 78 |         neutral_text = ' '.join(df[df['label'] == 'Neutral']['cleaned_text'])
 79 |         fear_text = ' '.join(df[df['label'] == 'Fear']['cleaned_text'])
 80 |         sadness_text = ' '.join(df[df['label'] == 'Sadness']['cleaned_text'])
 81 |         love_text = ' '.join(df[df['label'] == 'Love']['cleaned_text'])
 82 | 
 83 |         # Bigrams Anger Sentiment
 84 |         words1 = anger_text.split()
 85 |         # Get bigrams
 86 |         bigrams = list(zip(words1, words1[1:]))
 87 | 
 88 |         # Count bigrams
 89 |         bigram_counts = collections.Counter(bigrams)
 90 | 
 91 |         # Get top 10 bigram counts
 92 |         top_bigrams = dict(bigram_counts.most_common(10))
 93 | 
 94 |         # Create bar chart
 95 |         plt.figure(figsize=(10, 7))
 96 |         plt.bar(range(len(top_bigrams)), list(top_bigrams.values()), align='center')
 97 |         plt.xticks(range(len(top_bigrams)), list(top_bigrams.keys()), rotation=90)
 98 |         plt.xlabel('Bigram Words')
 99 |         plt.ylabel('Count')
100 |         plt.title(f"Top 10 Bigram for Anger Sentiment")
101 |         # Save the entire plot as a PNG
102 |         plt.tight_layout()
103 |         plt.savefig("bigram_anger.png")
104 |         st.subheader("Bigram for Anger Sentiment")
105 |         st.image("bigram_anger.png")
106 | 
107 |         def to_markdown(text):
108 |             text = text.replace('•', '  *')
109 |             return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
110 | 
111 |         genai.configure(api_key="AIzaSyC0HGxZs1MI5Nfc_9v9C9b5b7vTSMSlITc")
112 | 
113 |         import PIL.Image
114 | 
115 |         img = PIL.Image.open("bigram_anger.png")
116 |         model = genai.GenerativeModel('gemini-pro-vision')
117 |         response = model.generate_content(img)
118 | 
119 |         response = model.generate_content(["As a marketing consulant, I want to understand consumer insighst based on the chart and the market context so I can use the key findings to formulate actionable insights", img])
120 |         response.resolve()
121 |         st.write("**Google Gemini Response About Data**")
122 |         st.write(response.text)
123 | 
124 | 
125 | 
126 | 
127 |         # Bigrams Happy Sentiment
128 |         words1 = happy_text.split()
129 |         # Get bigrams
130 |         bigrams = list(zip(words1, words1[1:]))
131 | 
132 |         # Count bigrams
133 |         bigram_counts = collections.Counter(bigrams)
134 | 
135 |         # Get top 10 bigram counts
136 |         top_bigrams = dict(bigram_counts.most_common(10))
137 | 
138 |         # Create bar chart
139 |         plt.figure(figsize=(10, 7))
140 |         plt.bar(range(len(top_bigrams)), list(top_bigrams.values()), align='center')
141 |         plt.xticks(range(len(top_bigrams)), list(top_bigrams.keys()), rotation=90)
142 |         plt.xlabel('Bigram Words')
143 |         plt.ylabel('Count')
144 |         plt.title(f"Top 10 Bigram for Happy Sentiment")
145 |         # Save the entire plot as a PNG
146 |         plt.tight_layout()
147 |         plt.savefig("bigram_happy.png")
148 |         st.subheader("Bigram for Happy Sentiment")
149 |         st.image("bigram_happy.png")
150 | 
151 |         def to_markdown(text):
152 |             text = text.replace('•', '  *')
153 |             return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
154 | 
155 |         genai.configure(api_key="AIzaSyC0HGxZs1MI5Nfc_9v9C9b5b7vTSMSlITc")
156 | 
157 |         import PIL.Image
158 | 
159 |         img = PIL.Image.open("bigram_happy.png")
160 |         model = genai.GenerativeModel('gemini-pro-vision')
161 |         response = model.generate_content(img)
162 | 
163 |         response = model.generate_content(["As a marketing consulant, I want to understand consumer insighst based on the chart and the market context so I can use the key findings to formulate actionable insights", img])
164 |         response.resolve()
165 |         st.write("**Google Gemini Response About Data**")
166 |         st.write(response.text)
167 | 
168 | 
169 | 
170 | 
171 |         # Bigrams Neutral Sentiment
172 |         words1 = neutral_text.split()
173 |         # Get bigrams
174 |         bigrams = list(zip(words1, words1[1:]))
175 | 
176 |         # Count bigrams
177 |         bigram_counts = collections.Counter(bigrams)
178 | 
179 |         # Get top 10 bigram counts
180 |         top_bigrams = dict(bigram_counts.most_common(10))
181 | 
182 |         # Create bar chart
183 |         plt.figure(figsize=(10, 7))
184 |         plt.bar(range(len(top_bigrams)), list(top_bigrams.values()), align='center')
185 |         plt.xticks(range(len(top_bigrams)), list(top_bigrams.keys()), rotation=90)
186 |         plt.xlabel('Bigram Words')
187 |         plt.ylabel('Count')
188 |         plt.title(f"Top 10 Bigram for Neutral Sentiment")
189 |         # Save the entire plot as a PNG
190 |         plt.tight_layout()
191 |         plt.savefig("bigram_neutral.png")
192 |         st.subheader("Bigram for Neutral Sentiment")
193 |         st.image("bigram_neutral.png")
194 | 
195 |         def to_markdown(text):
196 |             text = text.replace('•', '  *')
197 |             return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
198 | 
199 |         genai.configure(api_key="AIzaSyC0HGxZs1MI5Nfc_9v9C9b5b7vTSMSlITc")
200 | 
201 |         import PIL.Image
202 | 
203 |         img = PIL.Image.open("bigram_neutral.png")
204 |         model = genai.GenerativeModel('gemini-pro-vision')
205 |         response = model.generate_content(img)
206 | 
207 |         response = model.generate_content(["As a marketing consulant, I want to understand consumer insighst based on the chart and the market context so I can use the key findings to formulate actionable insights", img])
208 |         response.resolve()
209 |         st.write("**Google Gemini Response About Data**")
210 |         st.write(response.text)
211 | 
212 | 
213 | 
214 | 
215 |         # Bigrams Fear Sentiment
216 |         words1 = fear_text.split()
217 |         # Get bigrams
218 |         bigrams = list(zip(words1, words1[1:]))
219 | 
220 |         # Count bigrams
221 |         bigram_counts = collections.Counter(bigrams)
222 | 
223 |         # Get top 10 bigram counts
224 |         top_bigrams = dict(bigram_counts.most_common(10))
225 | 
226 |         # Create bar chart
227 |         plt.figure(figsize=(10, 7))
228 |         plt.bar(range(len(top_bigrams)), list(top_bigrams.values()), align='center')
229 |         plt.xticks(range(len(top_bigrams)), list(top_bigrams.keys()), rotation=90)
230 |         plt.xlabel('Bigram Words')
231 |         plt.ylabel('Count')
232 |         plt.title(f"Top 10 Bigram for Fear Sentiment")
233 |         # Save the entire plot as a PNG
234 |         plt.tight_layout()
235 |         plt.savefig("bigram_fear.png")
236 |         st.subheader("Bigram for Fear Sentiment")
237 |         st.image("bigram_fear.png")
238 | 
239 |         def to_markdown(text):
240 |             text = text.replace('•', '  *')
241 |             return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
242 | 
243 |         genai.configure(api_key="AIzaSyC0HGxZs1MI5Nfc_9v9C9b5b7vTSMSlITc")
244 | 
245 |         import PIL.Image
246 | 
247 |         img = PIL.Image.open("bigram_fear.png")
248 |         model = genai.GenerativeModel('gemini-pro-vision')
249 |         response = model.generate_content(img)
250 | 
251 |         response = model.generate_content(["As a marketing consulant, I want to understand consumer insighst based on the chart and the market context so I can use the key findings to formulate actionable insights", img])
252 |         response.resolve()
253 |         st.write("**Google Gemini Response About Data**")
254 |         st.write(response.text)
255 | 
256 | 
257 | 
258 | 
259 |         # Bigrams Sadness Sentiment
260 |         words1 = sadness_text.split()
261 |         # Get bigrams
262 |         bigrams = list(zip(words1, words1[1:]))
263 | 
264 |         # Count bigrams
265 |         bigram_counts = collections.Counter(bigrams)
266 | 
267 |         # Get top 10 bigram counts
268 |         top_bigrams = dict(bigram_counts.most_common(10))
269 | 
270 |         # Create bar chart
271 |         plt.figure(figsize=(10, 7))
272 |         plt.bar(range(len(top_bigrams)), list(top_bigrams.values()), align='center')
273 |         plt.xticks(range(len(top_bigrams)), list(top_bigrams.keys()), rotation=90)
274 |         plt.xlabel('Bigram Words')
275 |         plt.ylabel('Count')
276 |         plt.title(f"Top 10 Bigram for Sadness Sentiment")
277 |         # Save the entire plot as a PNG
278 |         plt.tight_layout()
279 |         plt.savefig("bigram_sadness.png")
280 |         st.subheader("Bigram for Sadness Sentiment")
281 |         st.image("bigram_sadness.png")
282 | 
283 |         def to_markdown(text):
284 |             text = text.replace('•', '  *')
285 |             return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
286 | 
287 |         genai.configure(api_key="AIzaSyC0HGxZs1MI5Nfc_9v9C9b5b7vTSMSlITc")
288 | 
289 |         import PIL.Image
290 | 
291 |         img = PIL.Image.open("bigram_sadness.png")
292 |         model = genai.GenerativeModel('gemini-pro-vision')
293 |         response = model.generate_content(img)
294 | 
295 |         response = model.generate_content(["As a marketing consulant, I want to understand consumer insighst based on the chart and the market context so I can use the key findings to formulate actionable insights", img])
296 |         response.resolve()
297 |         st.write("**Google Gemini Response About Data**")
298 |         st.write(response.text)
299 | 
300 | 
301 | 
302 | 
303 |         # Bigrams Love Sentiment
304 |         words1 = love_text.split()
305 |         # Get bigrams
306 |         bigrams = list(zip(words1, words1[1:]))
307 | 
308 |         # Count bigrams
309 |         bigram_counts = collections.Counter(bigrams)
310 | 
311 |         # Get top 10 bigram counts
312 |         top_bigrams = dict(bigram_counts.most_common(10))
313 | 
314 |         # Create bar chart
315 |         plt.figure(figsize=(10, 7))
316 |         plt.bar(range(len(top_bigrams)), list(top_bigrams.values()), align='center')
317 |         plt.xticks(range(len(top_bigrams)), list(top_bigrams.keys()), rotation=90)
318 |         plt.xlabel('Bigram Words')
319 |         plt.ylabel('Count')
320 |         plt.title(f"Top 10 Bigram for Love Sentiment")
321 |         # Save the entire plot as a PNG
322 |         plt.tight_layout()
323 |         plt.savefig("bigram_love.png")
324 |         st.subheader("Bigram for Love Sentiment")
325 |         st.image("bigram_love.png")
326 | 
327 |         def to_markdown(text):
328 |             text = text.replace('•', '  *')
329 |             return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
330 | 
331 |         genai.configure(api_key="AIzaSyC0HGxZs1MI5Nfc_9v9C9b5b7vTSMSlITc")
332 | 
333 |         import PIL.Image
334 | 
335 |         img = PIL.Image.open("bigram_love.png")
336 |         model = genai.GenerativeModel('gemini-pro-vision')
337 |         response = model.generate_content(img)
338 | 
339 |         response = model.generate_content(["As a marketing consulant, I want to understand consumer insighst based on the chart and the market context so I can use the key findings to formulate actionable insights", img])
340 |         response.resolve()
341 |         st.write("**Google Gemini Response About Data**")
342 |         st.write(response.text)


--------------------------------------------------------------------------------
/Power BI/pbi1.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MagicDash91/All-of-Data-Science-Project/734e54ff951d39fb8d7ba007dcc9c82859ac7ec6/Power BI/pbi1.JPG


--------------------------------------------------------------------------------
/Power BI/pbi2.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MagicDash91/All-of-Data-Science-Project/734e54ff951d39fb8d7ba007dcc9c82859ac7ec6/Power BI/pbi2.JPG


--------------------------------------------------------------------------------
/R Language/calculate.R:
--------------------------------------------------------------------------------
 1 | library(ggplot2)
 2 | library(dplyr)
 3 | library(forcats)
 4 | library(quantmod)
 5 | library(zoo)
 6 | library(plotly)
 7 | 
 8 | setwd("D:/RStudio/dataset")
 9 | data3 <- read.csv("portfolio_data.csv")
10 | 
11 | # Convert the "Date" column to the desired format "2013-05-01"
12 | data3$Date <- as.Date(data3$Date, format = "%m/%d/%Y")
13 | 
14 | # Convert the data to an xts object using only the numeric columns
15 | prices_xts <- xts(data3[, -1], order.by = data3$Date)
16 | 
17 | # Calculate the returns for each asset
18 | returns_xts <- Return.calculate(prices_xts)
19 | 
20 | # Convert the "Date" column to the desired format "2013-05-01"
21 | data3$Date <- as.Date(data3$Date, format = "%m/%d/%Y")
22 | 
23 | # Convert the data to an xts object using only the numeric columns
24 | prices_xts <- xts(data3[, -1], order.by = data3$Date)
25 | 
26 | # Calculate the returns for each asset
27 | returns_xts <- Return.calculate(prices_xts)
28 | 
29 | # Convert the returns back to a data frame
30 | returns_df <- data.frame(Date = index(returns_xts), coredata(returns_xts))
31 | 
32 | # Create an interactive line chart for each asset's returns
33 | plot_ly(data = returns_df, x = ~Date) %>%
34 |   add_lines(y = ~AMZN, name = "AMZN", line = list(color = "blue")) %>%
35 |   add_lines(y = ~DPZ, name = "DPZ", line = list(color = "green")) %>%
36 |   add_lines(y = ~BTC, name = "BTC", line = list(color = "orange")) %>%
37 |   add_lines(y = ~NFLX, name = "NFLX", line = list(color = "red")) %>%
38 |   layout(title = "Asset Returns Over Time",
39 |          xaxis = list(title = "Date"),
40 |          yaxis = list(title = "Returns"),
41 |          showlegend = TRUE)
42 | 


--------------------------------------------------------------------------------
/R Language/coba.R:
--------------------------------------------------------------------------------
 1 | library(ggplot2)
 2 | library(dplyr)
 3 | library(forcats)
 4 | 
 5 | setwd("D:/RStudio/dataset")
 6 | data <- read.csv("ruu_sql2.csv")
 7 | 
 8 | # Create the countplot
 9 | ggplot(data, aes(x = fct_infreq(sponsor))) +
10 |   geom_bar(stat = "count")
11 | 
12 | # Customized countplot
13 | ggplot(data, aes(x = fct_infreq(sponsor), fill = sponsor)) +
14 |   geom_bar() +
15 |   labs(title = "Countplot of Sponsor",
16 |        x = "Sponsor",
17 |        y = "Count") +
18 |   theme_minimal() +
19 |   theme(axis.text.x = element_text(angle = 45, hjust = 1))
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/R Language/portfolio.R:
--------------------------------------------------------------------------------
 1 | library(ggplot2)
 2 | library(dplyr)
 3 | library(forcats)
 4 | library(quantmod)
 5 | library(zoo)
 6 | 
 7 | setwd("D:/RStudio/dataset")
 8 | data1 <- read.csv("portfolio_data.csv")
 9 | 
10 | # Convert the "Date" column to the desired format "2013-05-01"
11 | data1$Date <- as.Date(data1$Date, format = "%m/%d/%Y")
12 | 
13 | returns1 <- Return.portfolio(data1)
14 | 
15 | 
16 | # Convert the xts object to a dataframe and extract the Date column
17 | returns_df <- data.frame(Date = index(returns1), portfolio.returns = coredata(returns1))
18 | 
19 | # Create a line chart
20 | ggplot(data = returns_df, aes(x = Date, y = portfolio.returns)) +
21 |   geom_line() +
22 |   labs(title = "Portfolio Returns Over Time",
23 |        x = "Date",
24 |        y = "Portfolio Returns")
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # All-of-Data-Science-Project
2 | This repository is for all of my Data Science Project and Portfolio
3 | 


--------------------------------------------------------------------------------
/Snowflake Cloud/README.md:
--------------------------------------------------------------------------------
1 | # Snowflake Project
2 | This repository is for all of my Snowflake Cloud that created by myself. Use this code for your reference only, modify this code if you want to use the real time poject
3 | 
4 | ## 1. Snowflake Python Connector
5 | This project is about connecting the Database from Snowflake into Jupyter Notebook with Snowflake Python connector. Then we can analyze the data using Python
6 | 
7 | ## 2. Snowflake Snowpark Session
8 | This project is about to process data directly within the Snowflake cloud platform, allows the user to build data pipelines and applications for Snowflake in Python, Scala, or Java, allows the user to simplify data preprocessing tasks in Snowflake using familiar programming languages.
9 | 


--------------------------------------------------------------------------------
/Snowflake Cloud/Snowflake_Python_Connector.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": []
  7 |     },
  8 |     "kernelspec": {
  9 |       "name": "python3",
 10 |       "display_name": "Python 3"
 11 |     },
 12 |     "language_info": {
 13 |       "name": "python"
 14 |     }
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "cell_type": "markdown",
 19 |       "source": [
 20 |         "# Install Required Library"
 21 |       ],
 22 |       "metadata": {
 23 |         "id": "Q5-cg7fitgua"
 24 |       }
 25 |     },
 26 |     {
 27 |       "cell_type": "code",
 28 |       "execution_count": null,
 29 |       "metadata": {
 30 |         "colab": {
 31 |           "base_uri": "https://localhost:8080/"
 32 |         },
 33 |         "id": "vvcfAobHpIau",
 34 |         "outputId": "2bf0572d-1fc5-46de-e1e2-6ff217deb515"
 35 |       },
 36 |       "outputs": [
 37 |         {
 38 |           "output_type": "stream",
 39 |           "name": "stdout",
 40 |           "text": [
 41 |             "Collecting snowflake-connector-python\n",
 42 |             "  Downloading snowflake_connector_python-3.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.6 MB)\n",
 43 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 44 |             "\u001b[?25hCollecting asn1crypto<2.0.0,>0.24.0 (from snowflake-connector-python)\n",
 45 |             "  Downloading asn1crypto-1.5.1-py2.py3-none-any.whl (105 kB)\n",
 46 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m105.0/105.0 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 47 |             "\u001b[?25hRequirement already satisfied: cffi<2.0.0,>=1.9 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python) (1.16.0)\n",
 48 |             "Requirement already satisfied: cryptography<43.0.0,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python) (42.0.5)\n",
 49 |             "Requirement already satisfied: pyOpenSSL<25.0.0,>=16.2.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python) (24.1.0)\n",
 50 |             "Requirement already satisfied: pyjwt<3.0.0 in /usr/lib/python3/dist-packages (from snowflake-connector-python) (2.3.0)\n",
 51 |             "Requirement already satisfied: pytz in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python) (2023.4)\n",
 52 |             "Requirement already satisfied: requests<3.0.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python) (2.31.0)\n",
 53 |             "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python) (24.0)\n",
 54 |             "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python) (3.3.2)\n",
 55 |             "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python) (3.6)\n",
 56 |             "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python) (2024.2.2)\n",
 57 |             "Requirement already satisfied: typing-extensions<5,>=4.3 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python) (4.11.0)\n",
 58 |             "Requirement already satisfied: filelock<4,>=3.5 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python) (3.13.4)\n",
 59 |             "Requirement already satisfied: sortedcontainers>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python) (2.4.0)\n",
 60 |             "Requirement already satisfied: platformdirs<5.0.0,>=2.6.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python) (4.2.0)\n",
 61 |             "Collecting tomlkit (from snowflake-connector-python)\n",
 62 |             "  Downloading tomlkit-0.12.4-py3-none-any.whl (37 kB)\n",
 63 |             "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi<2.0.0,>=1.9->snowflake-connector-python) (2.22)\n",
 64 |             "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0->snowflake-connector-python) (2.0.7)\n",
 65 |             "Installing collected packages: asn1crypto, tomlkit, snowflake-connector-python\n",
 66 |             "Successfully installed asn1crypto-1.5.1 snowflake-connector-python-3.8.1 tomlkit-0.12.4\n",
 67 |             "Collecting snowflake-sqlalchemy\n",
 68 |             "  Downloading snowflake_sqlalchemy-1.5.2-py3-none-any.whl (42 kB)\n",
 69 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.4/42.4 kB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 70 |             "\u001b[?25hRequirement already satisfied: snowflake-connector-python in /usr/local/lib/python3.10/dist-packages (from snowflake-sqlalchemy) (3.8.1)\n",
 71 |             "Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.10/dist-packages (from snowflake-sqlalchemy) (2.0.29)\n",
 72 |             "Requirement already satisfied: asn1crypto<2.0.0,>0.24.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python->snowflake-sqlalchemy) (1.5.1)\n",
 73 |             "Requirement already satisfied: cffi<2.0.0,>=1.9 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python->snowflake-sqlalchemy) (1.16.0)\n",
 74 |             "Requirement already satisfied: cryptography<43.0.0,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python->snowflake-sqlalchemy) (42.0.5)\n",
 75 |             "Requirement already satisfied: pyOpenSSL<25.0.0,>=16.2.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python->snowflake-sqlalchemy) (24.1.0)\n",
 76 |             "Requirement already satisfied: pyjwt<3.0.0 in /usr/lib/python3/dist-packages (from snowflake-connector-python->snowflake-sqlalchemy) (2.3.0)\n",
 77 |             "Requirement already satisfied: pytz in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python->snowflake-sqlalchemy) (2023.4)\n",
 78 |             "Requirement already satisfied: requests<3.0.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python->snowflake-sqlalchemy) (2.31.0)\n",
 79 |             "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python->snowflake-sqlalchemy) (24.0)\n",
 80 |             "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python->snowflake-sqlalchemy) (3.3.2)\n",
 81 |             "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python->snowflake-sqlalchemy) (3.6)\n",
 82 |             "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python->snowflake-sqlalchemy) (2024.2.2)\n",
 83 |             "Requirement already satisfied: typing-extensions<5,>=4.3 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python->snowflake-sqlalchemy) (4.11.0)\n",
 84 |             "Requirement already satisfied: filelock<4,>=3.5 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python->snowflake-sqlalchemy) (3.13.4)\n",
 85 |             "Requirement already satisfied: sortedcontainers>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python->snowflake-sqlalchemy) (2.4.0)\n",
 86 |             "Requirement already satisfied: platformdirs<5.0.0,>=2.6.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python->snowflake-sqlalchemy) (4.2.0)\n",
 87 |             "Requirement already satisfied: tomlkit in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python->snowflake-sqlalchemy) (0.12.4)\n",
 88 |             "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy->snowflake-sqlalchemy) (3.0.3)\n",
 89 |             "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi<2.0.0,>=1.9->snowflake-connector-python->snowflake-sqlalchemy) (2.22)\n",
 90 |             "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0->snowflake-connector-python->snowflake-sqlalchemy) (2.0.7)\n",
 91 |             "Installing collected packages: snowflake-sqlalchemy\n",
 92 |             "Successfully installed snowflake-sqlalchemy-1.5.2\n",
 93 |             "Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.10/dist-packages (2.0.29)\n",
 94 |             "Requirement already satisfied: typing-extensions>=4.6.0 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy) (4.11.0)\n",
 95 |             "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy) (3.0.3)\n"
 96 |           ]
 97 |         }
 98 |       ],
 99 |       "source": [
100 |         "!pip install snowflake-connector-python\n",
101 |         "!pip install snowflake-sqlalchemy\n",
102 |         "!pip install sqlalchemy"
103 |       ]
104 |     },
105 |     {
106 |       "cell_type": "markdown",
107 |       "source": [
108 |         "# Make a Connection into your Snowflake account"
109 |       ],
110 |       "metadata": {
111 |         "id": "HsKtW8CatmJl"
112 |       }
113 |     },
114 |     {
115 |       "cell_type": "code",
116 |       "source": [
117 |         "import snowflake.connector\n",
118 |         "from sqlalchemy import create_engine"
119 |       ],
120 |       "metadata": {
121 |         "id": "NM4SW0NspLPo"
122 |       },
123 |       "execution_count": null,
124 |       "outputs": []
125 |     },
126 |     {
127 |       "cell_type": "code",
128 |       "source": [
129 |         "# Snowflake connection parameters\n",
130 |         "username = 'MAGICDASH91'\n",
131 |         "password = '*************'\n",
132 |         "account = 'tk11073.europe-west4.gcp'\n",
133 |         "warehouse = 'COMPUTE_WH'\n",
134 |         "database = 'DATABASE'\n",
135 |         "schema = 'PUBLIC'\n",
136 |         "\n",
137 |         "# Establishing connection\n",
138 |         "conn = snowflake.connector.connect(\n",
139 |         "    user=username,\n",
140 |         "    password=password,\n",
141 |         "    account=account,\n",
142 |         "    warehouse=warehouse,\n",
143 |         "    database=database,\n",
144 |         "    schema=schema\n",
145 |         ")\n",
146 |         "\n",
147 |         "# Creating a cursor object\n",
148 |         "cur = conn.cursor()"
149 |       ],
150 |       "metadata": {
151 |         "id": "xFqBAYWWpNON"
152 |       },
153 |       "execution_count": null,
154 |       "outputs": []
155 |     },
156 |     {
157 |       "cell_type": "code",
158 |       "source": [
159 |         "import pandas as pd\n",
160 |         "\n",
161 |         "# Executing a modified query\n",
162 |         "cur.execute(\"\"\"\n",
163 |         "    SELECT\n",
164 |         "        NAME,\n",
165 |         "        YEAR,\n",
166 |         "        SELLING_PRICE,\n",
167 |         "        CASE\n",
168 |         "            WHEN SELLING_PRICE < 100000 THEN 'CHEAP'\n",
169 |         "            WHEN SELLING_PRICE >= 100000 AND SELLING_PRICE <= 200000 THEN 'NORMAL'\n",
170 |         "            ELSE 'EXPENSIVE'\n",
171 |         "        END AS SELLING_PRICE_LABEL\n",
172 |         "    FROM\n",
173 |         "        BIKE_DETAILS\n",
174 |         "\"\"\")\n",
175 |         "\n",
176 |         "# Fetching results\n",
177 |         "rows = cur.fetchall()\n",
178 |         "\n",
179 |         "# Creating a Pandas DataFrame\n",
180 |         "df = pd.DataFrame(rows, columns=['Name', 'Year', 'Selling_Price', 'Selling_Price_Label'])\n",
181 |         "\n",
182 |         "# Displaying the DataFrame\n",
183 |         "df"
184 |       ],
185 |       "metadata": {
186 |         "colab": {
187 |           "base_uri": "https://localhost:8080/",
188 |           "height": 424
189 |         },
190 |         "id": "kAL7Zo4-tA1w",
191 |         "outputId": "2f46ddc6-e33f-441d-9822-5960ef26bf27"
192 |       },
193 |       "execution_count": null,
194 |       "outputs": [
195 |         {
196 |           "output_type": "execute_result",
197 |           "data": {
198 |             "text/plain": [
199 |               "                                     Name  Year  Selling_Price  \\\n",
200 |               "0               Royal Enfield Classic 350  2019         175000   \n",
201 |               "1                               Honda Dio  2017          45000   \n",
202 |               "2     Royal Enfield Classic Gunmetal Grey  2018         150000   \n",
203 |               "3       Yamaha Fazer FI V 2.0 [2016-2018]  2015          65000   \n",
204 |               "4                   Yamaha SZ [2013-2014]  2011          20000   \n",
205 |               "...                                   ...   ...            ...   \n",
206 |               "1056                            Activa 3g  2010          17000   \n",
207 |               "1057                     Honda CB twister  2012          16000   \n",
208 |               "1058                   Bajaj Discover 125  2013          15000   \n",
209 |               "1059                       Honda CB Shine  2009          12000   \n",
210 |               "1060                     Bajaj Pulsar 150  2008          10000   \n",
211 |               "\n",
212 |               "     Selling_Price_Label  \n",
213 |               "0                 NORMAL  \n",
214 |               "1                  CHEAP  \n",
215 |               "2                 NORMAL  \n",
216 |               "3                  CHEAP  \n",
217 |               "4                  CHEAP  \n",
218 |               "...                  ...  \n",
219 |               "1056               CHEAP  \n",
220 |               "1057               CHEAP  \n",
221 |               "1058               CHEAP  \n",
222 |               "1059               CHEAP  \n",
223 |               "1060               CHEAP  \n",
224 |               "\n",
225 |               "[1061 rows x 4 columns]"
226 |             ],
227 |             "text/html": [
228 |               "\n",
229 |               "  <div id=\"df-c1f3568d-2368-4e82-963c-897b72b1c6db\" class=\"colab-df-container\">\n",
230 |               "    <div>\n",
231 |               "<style scoped>\n",
232 |               "    .dataframe tbody tr th:only-of-type {\n",
233 |               "        vertical-align: middle;\n",
234 |               "    }\n",
235 |               "\n",
236 |               "    .dataframe tbody tr th {\n",
237 |               "        vertical-align: top;\n",
238 |               "    }\n",
239 |               "\n",
240 |               "    .dataframe thead th {\n",
241 |               "        text-align: right;\n",
242 |               "    }\n",
243 |               "</style>\n",
244 |               "<table border=\"1\" class=\"dataframe\">\n",
245 |               "  <thead>\n",
246 |               "    <tr style=\"text-align: right;\">\n",
247 |               "      <th></th>\n",
248 |               "      <th>Name</th>\n",
249 |               "      <th>Year</th>\n",
250 |               "      <th>Selling_Price</th>\n",
251 |               "      <th>Selling_Price_Label</th>\n",
252 |               "    </tr>\n",
253 |               "  </thead>\n",
254 |               "  <tbody>\n",
255 |               "    <tr>\n",
256 |               "      <th>0</th>\n",
257 |               "      <td>Royal Enfield Classic 350</td>\n",
258 |               "      <td>2019</td>\n",
259 |               "      <td>175000</td>\n",
260 |               "      <td>NORMAL</td>\n",
261 |               "    </tr>\n",
262 |               "    <tr>\n",
263 |               "      <th>1</th>\n",
264 |               "      <td>Honda Dio</td>\n",
265 |               "      <td>2017</td>\n",
266 |               "      <td>45000</td>\n",
267 |               "      <td>CHEAP</td>\n",
268 |               "    </tr>\n",
269 |               "    <tr>\n",
270 |               "      <th>2</th>\n",
271 |               "      <td>Royal Enfield Classic Gunmetal Grey</td>\n",
272 |               "      <td>2018</td>\n",
273 |               "      <td>150000</td>\n",
274 |               "      <td>NORMAL</td>\n",
275 |               "    </tr>\n",
276 |               "    <tr>\n",
277 |               "      <th>3</th>\n",
278 |               "      <td>Yamaha Fazer FI V 2.0 [2016-2018]</td>\n",
279 |               "      <td>2015</td>\n",
280 |               "      <td>65000</td>\n",
281 |               "      <td>CHEAP</td>\n",
282 |               "    </tr>\n",
283 |               "    <tr>\n",
284 |               "      <th>4</th>\n",
285 |               "      <td>Yamaha SZ [2013-2014]</td>\n",
286 |               "      <td>2011</td>\n",
287 |               "      <td>20000</td>\n",
288 |               "      <td>CHEAP</td>\n",
289 |               "    </tr>\n",
290 |               "    <tr>\n",
291 |               "      <th>...</th>\n",
292 |               "      <td>...</td>\n",
293 |               "      <td>...</td>\n",
294 |               "      <td>...</td>\n",
295 |               "      <td>...</td>\n",
296 |               "    </tr>\n",
297 |               "    <tr>\n",
298 |               "      <th>1056</th>\n",
299 |               "      <td>Activa 3g</td>\n",
300 |               "      <td>2010</td>\n",
301 |               "      <td>17000</td>\n",
302 |               "      <td>CHEAP</td>\n",
303 |               "    </tr>\n",
304 |               "    <tr>\n",
305 |               "      <th>1057</th>\n",
306 |               "      <td>Honda CB twister</td>\n",
307 |               "      <td>2012</td>\n",
308 |               "      <td>16000</td>\n",
309 |               "      <td>CHEAP</td>\n",
310 |               "    </tr>\n",
311 |               "    <tr>\n",
312 |               "      <th>1058</th>\n",
313 |               "      <td>Bajaj Discover 125</td>\n",
314 |               "      <td>2013</td>\n",
315 |               "      <td>15000</td>\n",
316 |               "      <td>CHEAP</td>\n",
317 |               "    </tr>\n",
318 |               "    <tr>\n",
319 |               "      <th>1059</th>\n",
320 |               "      <td>Honda CB Shine</td>\n",
321 |               "      <td>2009</td>\n",
322 |               "      <td>12000</td>\n",
323 |               "      <td>CHEAP</td>\n",
324 |               "    </tr>\n",
325 |               "    <tr>\n",
326 |               "      <th>1060</th>\n",
327 |               "      <td>Bajaj Pulsar 150</td>\n",
328 |               "      <td>2008</td>\n",
329 |               "      <td>10000</td>\n",
330 |               "      <td>CHEAP</td>\n",
331 |               "    </tr>\n",
332 |               "  </tbody>\n",
333 |               "</table>\n",
334 |               "<p>1061 rows × 4 columns</p>\n",
335 |               "</div>\n",
336 |               "    <div class=\"colab-df-buttons\">\n",
337 |               "\n",
338 |               "  <div class=\"colab-df-container\">\n",
339 |               "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-c1f3568d-2368-4e82-963c-897b72b1c6db')\"\n",
340 |               "            title=\"Convert this dataframe to an interactive table.\"\n",
341 |               "            style=\"display:none;\">\n",
342 |               "\n",
343 |               "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
344 |               "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
345 |               "  </svg>\n",
346 |               "    </button>\n",
347 |               "\n",
348 |               "  <style>\n",
349 |               "    .colab-df-container {\n",
350 |               "      display:flex;\n",
351 |               "      gap: 12px;\n",
352 |               "    }\n",
353 |               "\n",
354 |               "    .colab-df-convert {\n",
355 |               "      background-color: #E8F0FE;\n",
356 |               "      border: none;\n",
357 |               "      border-radius: 50%;\n",
358 |               "      cursor: pointer;\n",
359 |               "      display: none;\n",
360 |               "      fill: #1967D2;\n",
361 |               "      height: 32px;\n",
362 |               "      padding: 0 0 0 0;\n",
363 |               "      width: 32px;\n",
364 |               "    }\n",
365 |               "\n",
366 |               "    .colab-df-convert:hover {\n",
367 |               "      background-color: #E2EBFA;\n",
368 |               "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
369 |               "      fill: #174EA6;\n",
370 |               "    }\n",
371 |               "\n",
372 |               "    .colab-df-buttons div {\n",
373 |               "      margin-bottom: 4px;\n",
374 |               "    }\n",
375 |               "\n",
376 |               "    [theme=dark] .colab-df-convert {\n",
377 |               "      background-color: #3B4455;\n",
378 |               "      fill: #D2E3FC;\n",
379 |               "    }\n",
380 |               "\n",
381 |               "    [theme=dark] .colab-df-convert:hover {\n",
382 |               "      background-color: #434B5C;\n",
383 |               "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
384 |               "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
385 |               "      fill: #FFFFFF;\n",
386 |               "    }\n",
387 |               "  </style>\n",
388 |               "\n",
389 |               "    <script>\n",
390 |               "      const buttonEl =\n",
391 |               "        document.querySelector('#df-c1f3568d-2368-4e82-963c-897b72b1c6db button.colab-df-convert');\n",
392 |               "      buttonEl.style.display =\n",
393 |               "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
394 |               "\n",
395 |               "      async function convertToInteractive(key) {\n",
396 |               "        const element = document.querySelector('#df-c1f3568d-2368-4e82-963c-897b72b1c6db');\n",
397 |               "        const dataTable =\n",
398 |               "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
399 |               "                                                    [key], {});\n",
400 |               "        if (!dataTable) return;\n",
401 |               "\n",
402 |               "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
403 |               "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
404 |               "          + ' to learn more about interactive tables.';\n",
405 |               "        element.innerHTML = '';\n",
406 |               "        dataTable['output_type'] = 'display_data';\n",
407 |               "        await google.colab.output.renderOutput(dataTable, element);\n",
408 |               "        const docLink = document.createElement('div');\n",
409 |               "        docLink.innerHTML = docLinkHtml;\n",
410 |               "        element.appendChild(docLink);\n",
411 |               "      }\n",
412 |               "    </script>\n",
413 |               "  </div>\n",
414 |               "\n",
415 |               "\n",
416 |               "<div id=\"df-9e587680-5036-44b8-9bcf-523d0dbc78af\">\n",
417 |               "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-9e587680-5036-44b8-9bcf-523d0dbc78af')\"\n",
418 |               "            title=\"Suggest charts\"\n",
419 |               "            style=\"display:none;\">\n",
420 |               "\n",
421 |               "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
422 |               "     width=\"24px\">\n",
423 |               "    <g>\n",
424 |               "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
425 |               "    </g>\n",
426 |               "</svg>\n",
427 |               "  </button>\n",
428 |               "\n",
429 |               "<style>\n",
430 |               "  .colab-df-quickchart {\n",
431 |               "      --bg-color: #E8F0FE;\n",
432 |               "      --fill-color: #1967D2;\n",
433 |               "      --hover-bg-color: #E2EBFA;\n",
434 |               "      --hover-fill-color: #174EA6;\n",
435 |               "      --disabled-fill-color: #AAA;\n",
436 |               "      --disabled-bg-color: #DDD;\n",
437 |               "  }\n",
438 |               "\n",
439 |               "  [theme=dark] .colab-df-quickchart {\n",
440 |               "      --bg-color: #3B4455;\n",
441 |               "      --fill-color: #D2E3FC;\n",
442 |               "      --hover-bg-color: #434B5C;\n",
443 |               "      --hover-fill-color: #FFFFFF;\n",
444 |               "      --disabled-bg-color: #3B4455;\n",
445 |               "      --disabled-fill-color: #666;\n",
446 |               "  }\n",
447 |               "\n",
448 |               "  .colab-df-quickchart {\n",
449 |               "    background-color: var(--bg-color);\n",
450 |               "    border: none;\n",
451 |               "    border-radius: 50%;\n",
452 |               "    cursor: pointer;\n",
453 |               "    display: none;\n",
454 |               "    fill: var(--fill-color);\n",
455 |               "    height: 32px;\n",
456 |               "    padding: 0;\n",
457 |               "    width: 32px;\n",
458 |               "  }\n",
459 |               "\n",
460 |               "  .colab-df-quickchart:hover {\n",
461 |               "    background-color: var(--hover-bg-color);\n",
462 |               "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
463 |               "    fill: var(--button-hover-fill-color);\n",
464 |               "  }\n",
465 |               "\n",
466 |               "  .colab-df-quickchart-complete:disabled,\n",
467 |               "  .colab-df-quickchart-complete:disabled:hover {\n",
468 |               "    background-color: var(--disabled-bg-color);\n",
469 |               "    fill: var(--disabled-fill-color);\n",
470 |               "    box-shadow: none;\n",
471 |               "  }\n",
472 |               "\n",
473 |               "  .colab-df-spinner {\n",
474 |               "    border: 2px solid var(--fill-color);\n",
475 |               "    border-color: transparent;\n",
476 |               "    border-bottom-color: var(--fill-color);\n",
477 |               "    animation:\n",
478 |               "      spin 1s steps(1) infinite;\n",
479 |               "  }\n",
480 |               "\n",
481 |               "  @keyframes spin {\n",
482 |               "    0% {\n",
483 |               "      border-color: transparent;\n",
484 |               "      border-bottom-color: var(--fill-color);\n",
485 |               "      border-left-color: var(--fill-color);\n",
486 |               "    }\n",
487 |               "    20% {\n",
488 |               "      border-color: transparent;\n",
489 |               "      border-left-color: var(--fill-color);\n",
490 |               "      border-top-color: var(--fill-color);\n",
491 |               "    }\n",
492 |               "    30% {\n",
493 |               "      border-color: transparent;\n",
494 |               "      border-left-color: var(--fill-color);\n",
495 |               "      border-top-color: var(--fill-color);\n",
496 |               "      border-right-color: var(--fill-color);\n",
497 |               "    }\n",
498 |               "    40% {\n",
499 |               "      border-color: transparent;\n",
500 |               "      border-right-color: var(--fill-color);\n",
501 |               "      border-top-color: var(--fill-color);\n",
502 |               "    }\n",
503 |               "    60% {\n",
504 |               "      border-color: transparent;\n",
505 |               "      border-right-color: var(--fill-color);\n",
506 |               "    }\n",
507 |               "    80% {\n",
508 |               "      border-color: transparent;\n",
509 |               "      border-right-color: var(--fill-color);\n",
510 |               "      border-bottom-color: var(--fill-color);\n",
511 |               "    }\n",
512 |               "    90% {\n",
513 |               "      border-color: transparent;\n",
514 |               "      border-bottom-color: var(--fill-color);\n",
515 |               "    }\n",
516 |               "  }\n",
517 |               "</style>\n",
518 |               "\n",
519 |               "  <script>\n",
520 |               "    async function quickchart(key) {\n",
521 |               "      const quickchartButtonEl =\n",
522 |               "        document.querySelector('#' + key + ' button');\n",
523 |               "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
524 |               "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
525 |               "      try {\n",
526 |               "        const charts = await google.colab.kernel.invokeFunction(\n",
527 |               "            'suggestCharts', [key], {});\n",
528 |               "      } catch (error) {\n",
529 |               "        console.error('Error during call to suggestCharts:', error);\n",
530 |               "      }\n",
531 |               "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
532 |               "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
533 |               "    }\n",
534 |               "    (() => {\n",
535 |               "      let quickchartButtonEl =\n",
536 |               "        document.querySelector('#df-9e587680-5036-44b8-9bcf-523d0dbc78af button');\n",
537 |               "      quickchartButtonEl.style.display =\n",
538 |               "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
539 |               "    })();\n",
540 |               "  </script>\n",
541 |               "</div>\n",
542 |               "    </div>\n",
543 |               "  </div>\n"
544 |             ],
545 |             "application/vnd.google.colaboratory.intrinsic+json": {
546 |               "type": "dataframe",
547 |               "variable_name": "df",
548 |               "summary": "{\n  \"name\": \"df\",\n  \"rows\": 1061,\n  \"fields\": [\n    {\n      \"column\": \"Name\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 276,\n        \"samples\": [\n          \"Hero Xtreme Sports\",\n          \"Bajaj Avenger [2015]\",\n          \"Bajaj Avenger Street 160\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Year\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 4,\n        \"min\": 1988,\n        \"max\": 2020,\n        \"num_unique_values\": 28,\n        \"samples\": [\n          2012,\n          2003,\n          2020\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Selling_Price\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 56304,\n        \"min\": 5000,\n        \"max\": 760000,\n        \"num_unique_values\": 130,\n        \"samples\": [\n          72000,\n          160000,\n          26000\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Selling_Price_Label\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"NORMAL\",\n          \"CHEAP\",\n          \"EXPENSIVE\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
549 |             }
550 |           },
551 |           "metadata": {},
552 |           "execution_count": 4
553 |         }
554 |       ]
555 |     }
556 |   ]
557 | }
558 | 


--------------------------------------------------------------------------------
/Snowflake Cloud/Snowflake_Snowpark_Session.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": []
  7 |     },
  8 |     "kernelspec": {
  9 |       "name": "python3",
 10 |       "display_name": "Python 3"
 11 |     },
 12 |     "language_info": {
 13 |       "name": "python"
 14 |     }
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "cell_type": "markdown",
 19 |       "source": [
 20 |         "# Install Required Library"
 21 |       ],
 22 |       "metadata": {
 23 |         "id": "xe3ioIeBOEVv"
 24 |       }
 25 |     },
 26 |     {
 27 |       "cell_type": "code",
 28 |       "execution_count": null,
 29 |       "metadata": {
 30 |         "colab": {
 31 |           "base_uri": "https://localhost:8080/"
 32 |         },
 33 |         "id": "nUI_Fe6nLjC-",
 34 |         "outputId": "a9217013-5cd8-48f6-b4dd-2217f94882f3"
 35 |       },
 36 |       "outputs": [
 37 |         {
 38 |           "output_type": "stream",
 39 |           "name": "stdout",
 40 |           "text": [
 41 |             "Requirement already satisfied: snowflake-snowpark-python in /usr/local/lib/python3.10/dist-packages (1.14.0)\n",
 42 |             "Requirement already satisfied: setuptools>=40.6.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-snowpark-python) (67.7.2)\n",
 43 |             "Requirement already satisfied: wheel in /usr/local/lib/python3.10/dist-packages (from snowflake-snowpark-python) (0.43.0)\n",
 44 |             "Requirement already satisfied: snowflake-connector-python<4.0.0,>=3.6.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-snowpark-python) (3.8.1)\n",
 45 |             "Requirement already satisfied: typing-extensions<5.0.0,>=4.1.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-snowpark-python) (4.11.0)\n",
 46 |             "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from snowflake-snowpark-python) (6.0.1)\n",
 47 |             "Requirement already satisfied: cloudpickle!=2.1.0,!=2.2.0,<=2.2.1,>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-snowpark-python) (2.2.1)\n",
 48 |             "Requirement already satisfied: asn1crypto<2.0.0,>0.24.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python<4.0.0,>=3.6.0->snowflake-snowpark-python) (1.5.1)\n",
 49 |             "Requirement already satisfied: cffi<2.0.0,>=1.9 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python<4.0.0,>=3.6.0->snowflake-snowpark-python) (1.16.0)\n",
 50 |             "Requirement already satisfied: cryptography<43.0.0,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python<4.0.0,>=3.6.0->snowflake-snowpark-python) (42.0.5)\n",
 51 |             "Requirement already satisfied: pyOpenSSL<25.0.0,>=16.2.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python<4.0.0,>=3.6.0->snowflake-snowpark-python) (24.1.0)\n",
 52 |             "Requirement already satisfied: pyjwt<3.0.0 in /usr/lib/python3/dist-packages (from snowflake-connector-python<4.0.0,>=3.6.0->snowflake-snowpark-python) (2.3.0)\n",
 53 |             "Requirement already satisfied: pytz in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python<4.0.0,>=3.6.0->snowflake-snowpark-python) (2023.4)\n",
 54 |             "Requirement already satisfied: requests<3.0.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python<4.0.0,>=3.6.0->snowflake-snowpark-python) (2.31.0)\n",
 55 |             "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python<4.0.0,>=3.6.0->snowflake-snowpark-python) (24.0)\n",
 56 |             "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python<4.0.0,>=3.6.0->snowflake-snowpark-python) (3.3.2)\n",
 57 |             "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python<4.0.0,>=3.6.0->snowflake-snowpark-python) (3.6)\n",
 58 |             "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python<4.0.0,>=3.6.0->snowflake-snowpark-python) (2024.2.2)\n",
 59 |             "Requirement already satisfied: filelock<4,>=3.5 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python<4.0.0,>=3.6.0->snowflake-snowpark-python) (3.13.4)\n",
 60 |             "Requirement already satisfied: sortedcontainers>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python<4.0.0,>=3.6.0->snowflake-snowpark-python) (2.4.0)\n",
 61 |             "Requirement already satisfied: platformdirs<5.0.0,>=2.6.0 in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python<4.0.0,>=3.6.0->snowflake-snowpark-python) (4.2.0)\n",
 62 |             "Requirement already satisfied: tomlkit in /usr/local/lib/python3.10/dist-packages (from snowflake-connector-python<4.0.0,>=3.6.0->snowflake-snowpark-python) (0.12.4)\n",
 63 |             "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi<2.0.0,>=1.9->snowflake-connector-python<4.0.0,>=3.6.0->snowflake-snowpark-python) (2.22)\n",
 64 |             "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0->snowflake-connector-python<4.0.0,>=3.6.0->snowflake-snowpark-python) (2.0.7)\n"
 65 |           ]
 66 |         }
 67 |       ],
 68 |       "source": [
 69 |         "pip install snowflake-snowpark-python"
 70 |       ]
 71 |     },
 72 |     {
 73 |       "cell_type": "markdown",
 74 |       "source": [
 75 |         "# Create Snowpark Session"
 76 |       ],
 77 |       "metadata": {
 78 |         "id": "P7tMNrSvToO9"
 79 |       }
 80 |     },
 81 |     {
 82 |       "cell_type": "code",
 83 |       "source": [
 84 |         "from snowflake.snowpark.session import Session\n",
 85 |         "\n",
 86 |         "username = 'MAGICDASH91'\n",
 87 |         "password = '*************'\n",
 88 |         "account = 'tk11073.europe-west4.gcp'\n",
 89 |         "warehouse = 'COMPUTE_WH'\n",
 90 |         "database = 'DATABASE'\n",
 91 |         "schema = 'PUBLIC'\n",
 92 |         "\n",
 93 |         "def snowpark_session_create():\n",
 94 |         "  connection_params = {\n",
 95 |         "      \"user\": username,\n",
 96 |         "      \"password\": password,\n",
 97 |         "      \"account\": account,\n",
 98 |         "      \"warehouse\": warehouse,\n",
 99 |         "      \"database\": database,\n",
100 |         "      \"schema\": schema\n",
101 |         "  }\n",
102 |         "\n",
103 |         "  # Create the session\n",
104 |         "  session = Session.builder.configs(connection_params).create()\n",
105 |         "  return session\n",
106 |         "\n",
107 |         "demo_session = snowpark_session_create()"
108 |       ],
109 |       "metadata": {
110 |         "id": "_UiOBJ79Mhmb"
111 |       },
112 |       "execution_count": null,
113 |       "outputs": []
114 |     },
115 |     {
116 |       "cell_type": "markdown",
117 |       "source": [
118 |         "# Start Querying your data"
119 |       ],
120 |       "metadata": {
121 |         "id": "F961ofKuT0h9"
122 |       }
123 |     },
124 |     {
125 |       "cell_type": "code",
126 |       "source": [
127 |         "df = demo_session.sql('SELECT * FROM CROSS_SELL')\n",
128 |         "df.show()"
129 |       ],
130 |       "metadata": {
131 |         "colab": {
132 |           "base_uri": "https://localhost:8080/"
133 |         },
134 |         "id": "FlYG5jeXTrSr",
135 |         "outputId": "e11a1247-a83e-4ebe-b0f6-7f53bb6d0ce2"
136 |       },
137 |       "execution_count": null,
138 |       "outputs": [
139 |         {
140 |           "output_type": "stream",
141 |           "name": "stdout",
142 |           "text": [
143 |             "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
144 |             "|\"ID\"  |\"GENDER\"  |\"AGE\"  |\"DRIVING_LICENSE\"  |\"REGION_CODE\"  |\"PREVIOUSLY_INSURED\"  |\"VEHICLE_AGE\"  |\"VEHICLE_DAMAGE\"  |\"ANNUAL_PREMIUM\"  |\"POLICY_SALES_CHANNEL\"  |\"VINTAGE\"  |\"RESPONSE\"  |\n",
145 |             "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
146 |             "|1     |Male      |44     |1                  |28.0           |0                     |> 2 Years      |True              |40454.0           |26.0                    |217        |1           |\n",
147 |             "|2     |Male      |76     |1                  |3.0            |0                     |1-2 Year       |False             |33536.0           |26.0                    |183        |0           |\n",
148 |             "|3     |Male      |47     |1                  |28.0           |0                     |> 2 Years      |True              |38294.0           |26.0                    |27         |1           |\n",
149 |             "|4     |Male      |21     |1                  |11.0           |1                     |< 1 Year       |False             |28619.0           |152.0                   |203        |0           |\n",
150 |             "|5     |Female    |29     |1                  |41.0           |1                     |< 1 Year       |False             |27496.0           |152.0                   |39         |0           |\n",
151 |             "|6     |Female    |24     |1                  |33.0           |0                     |< 1 Year       |True              |2630.0            |160.0                   |176        |0           |\n",
152 |             "|7     |Male      |23     |1                  |11.0           |0                     |< 1 Year       |True              |23367.0           |152.0                   |249        |0           |\n",
153 |             "|8     |Female    |56     |1                  |28.0           |0                     |1-2 Year       |True              |32031.0           |26.0                    |72         |1           |\n",
154 |             "|9     |Female    |24     |1                  |3.0            |1                     |< 1 Year       |False             |27619.0           |152.0                   |28         |0           |\n",
155 |             "|10    |Female    |32     |1                  |6.0            |1                     |< 1 Year       |False             |28771.0           |152.0                   |80         |0           |\n",
156 |             "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
157 |             "\n"
158 |           ]
159 |         }
160 |       ]
161 |     },
162 |     {
163 |       "cell_type": "markdown",
164 |       "source": [
165 |         "# Snowflake Transformation"
166 |       ],
167 |       "metadata": {
168 |         "id": "3n2-ctOPg5HG"
169 |       }
170 |     },
171 |     {
172 |       "cell_type": "code",
173 |       "source": [
174 |         "import snowflake.snowpark.functions as F"
175 |       ],
176 |       "metadata": {
177 |         "id": "GQI9PNEDVHBl"
178 |       },
179 |       "execution_count": null,
180 |       "outputs": []
181 |     },
182 |     {
183 |       "cell_type": "code",
184 |       "source": [
185 |         "# Show the Age where Age between 30 and 44\n",
186 |         "df_age = df.filter(F.col('AGE').between(30,44))\n",
187 |         "df_age.show()"
188 |       ],
189 |       "metadata": {
190 |         "colab": {
191 |           "base_uri": "https://localhost:8080/"
192 |         },
193 |         "id": "Q6aL1zaLhpyw",
194 |         "outputId": "0f315e6c-d52c-4658-bd42-a4389bbf1631"
195 |       },
196 |       "execution_count": null,
197 |       "outputs": [
198 |         {
199 |           "output_type": "stream",
200 |           "name": "stdout",
201 |           "text": [
202 |             "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
203 |             "|\"ID\"  |\"GENDER\"  |\"AGE\"  |\"DRIVING_LICENSE\"  |\"REGION_CODE\"  |\"PREVIOUSLY_INSURED\"  |\"VEHICLE_AGE\"  |\"VEHICLE_DAMAGE\"  |\"ANNUAL_PREMIUM\"  |\"POLICY_SALES_CHANNEL\"  |\"VINTAGE\"  |\"RESPONSE\"  |\n",
204 |             "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
205 |             "|1     |Male      |44     |1                  |28.0           |0                     |> 2 Years      |True              |40454.0           |26.0                    |217        |1           |\n",
206 |             "|10    |Female    |32     |1                  |6.0            |1                     |< 1 Year       |False             |28771.0           |152.0                   |80         |0           |\n",
207 |             "|13    |Female    |41     |1                  |15.0           |1                     |1-2 Year       |False             |31409.0           |14.0                    |221        |0           |\n",
208 |             "|16    |Male      |37     |1                  |6.0            |0                     |1-2 Year       |True              |2630.0            |156.0                   |147        |1           |\n",
209 |             "|19    |Male      |42     |1                  |28.0           |0                     |1-2 Year       |True              |33667.0           |124.0                   |158        |0           |\n",
210 |             "|24    |Male      |44     |1                  |28.0           |0                     |1-2 Year       |True              |41852.0           |163.0                   |60         |0           |\n",
211 |             "|25    |Male      |34     |1                  |15.0           |1                     |1-2 Year       |False             |38111.0           |152.0                   |180        |0           |\n",
212 |             "|35    |Female    |32     |1                  |30.0           |1                     |< 1 Year       |False             |27638.0           |152.0                   |169        |0           |\n",
213 |             "|36    |Male      |41     |1                  |36.0           |1                     |1-2 Year       |False             |30039.0           |124.0                   |88         |0           |\n",
214 |             "|41    |Male      |30     |1                  |30.0           |0                     |< 1 Year       |True              |24550.0           |124.0                   |45         |0           |\n",
215 |             "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
216 |             "\n"
217 |           ]
218 |         }
219 |       ]
220 |     },
221 |     {
222 |       "cell_type": "code",
223 |       "source": [
224 |         "# Create an aggregation about Average ANNUAL_PREMIUM for every VEHICLE_AGE\n",
225 |         "avg_ann = df.group_by('VEHICLE_AGE').agg(F.avg('ANNUAL_PREMIUM').alias('AVERAGE_ANNUAL_PREMIUM'))\n",
226 |         "avg_ann.show()"
227 |       ],
228 |       "metadata": {
229 |         "colab": {
230 |           "base_uri": "https://localhost:8080/"
231 |         },
232 |         "id": "KrAhFfgVkLu1",
233 |         "outputId": "420c4b76-1415-49f9-ccd6-06bb968ec3b6"
234 |       },
235 |       "execution_count": null,
236 |       "outputs": [
237 |         {
238 |           "output_type": "stream",
239 |           "name": "stdout",
240 |           "text": [
241 |             "--------------------------------------------\n",
242 |             "|\"VEHICLE_AGE\"  |\"AVERAGE_ANNUAL_PREMIUM\"  |\n",
243 |             "--------------------------------------------\n",
244 |             "|> 2 Years      |35654.4994690             |\n",
245 |             "|1-2 Year       |30523.5821203             |\n",
246 |             "|< 1 Year       |30119.5520251             |\n",
247 |             "--------------------------------------------\n",
248 |             "\n"
249 |           ]
250 |         }
251 |       ]
252 |     },
253 |     {
254 |       "cell_type": "code",
255 |       "source": [
256 |         "# Simple Multiplication\n",
257 |         "mul_col = df.with_column(\"AGE & VINTAGE\", F.col('AGE') * F.col('VINTAGE'))\n",
258 |         "mul_col.show()"
259 |       ],
260 |       "metadata": {
261 |         "colab": {
262 |           "base_uri": "https://localhost:8080/"
263 |         },
264 |         "id": "dWGuzthgoXmu",
265 |         "outputId": "8e5a7484-934c-4360-9a2d-5c74ec285fdf"
266 |       },
267 |       "execution_count": null,
268 |       "outputs": [
269 |         {
270 |           "output_type": "stream",
271 |           "name": "stdout",
272 |           "text": [
273 |             "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
274 |             "|\"ID\"  |\"GENDER\"  |\"AGE\"  |\"DRIVING_LICENSE\"  |\"REGION_CODE\"  |\"PREVIOUSLY_INSURED\"  |\"VEHICLE_AGE\"  |\"VEHICLE_DAMAGE\"  |\"ANNUAL_PREMIUM\"  |\"POLICY_SALES_CHANNEL\"  |\"VINTAGE\"  |\"RESPONSE\"  |\"AGE & VINTAGE\"  |\n",
275 |             "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
276 |             "|1     |Male      |44     |1                  |28.0           |0                     |> 2 Years      |True              |40454.0           |26.0                    |217        |1           |9548             |\n",
277 |             "|2     |Male      |76     |1                  |3.0            |0                     |1-2 Year       |False             |33536.0           |26.0                    |183        |0           |13908            |\n",
278 |             "|3     |Male      |47     |1                  |28.0           |0                     |> 2 Years      |True              |38294.0           |26.0                    |27         |1           |1269             |\n",
279 |             "|4     |Male      |21     |1                  |11.0           |1                     |< 1 Year       |False             |28619.0           |152.0                   |203        |0           |4263             |\n",
280 |             "|5     |Female    |29     |1                  |41.0           |1                     |< 1 Year       |False             |27496.0           |152.0                   |39         |0           |1131             |\n",
281 |             "|6     |Female    |24     |1                  |33.0           |0                     |< 1 Year       |True              |2630.0            |160.0                   |176        |0           |4224             |\n",
282 |             "|7     |Male      |23     |1                  |11.0           |0                     |< 1 Year       |True              |23367.0           |152.0                   |249        |0           |5727             |\n",
283 |             "|8     |Female    |56     |1                  |28.0           |0                     |1-2 Year       |True              |32031.0           |26.0                    |72         |1           |4032             |\n",
284 |             "|9     |Female    |24     |1                  |3.0            |1                     |< 1 Year       |False             |27619.0           |152.0                   |28         |0           |672              |\n",
285 |             "|10    |Female    |32     |1                  |6.0            |1                     |< 1 Year       |False             |28771.0           |152.0                   |80         |0           |2560             |\n",
286 |             "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
287 |             "\n"
288 |           ]
289 |         }
290 |       ]
291 |     },
292 |     {
293 |       "cell_type": "markdown",
294 |       "source": [
295 |         "# For other Snowpark Functions you can check here :\n",
296 |         "\n",
297 |         "https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.functions.function"
298 |       ],
299 |       "metadata": {
300 |         "id": "5RvyahLxno7x"
301 |       }
302 |     },
303 |     {
304 |       "cell_type": "markdown",
305 |       "source": [
306 |         "# Alter (Editing) Existing Dataframe"
307 |       ],
308 |       "metadata": {
309 |         "id": "MqHqX9p_rzs6"
310 |       }
311 |     },
312 |     {
313 |       "cell_type": "code",
314 |       "source": [
315 |         "# Rename \"AVERAGE_ANNUAL_PREMIUM\" to be \"AVERAGE_ANNUAL_PREMIUM_ALL_AGE\"\n",
316 |         "avg_ann = avg_ann.with_column_renamed(F.col('AVERAGE_ANNUAL_PREMIUM'), 'AVERAGE_ANNUAL_PREMIUM_ALL_AGE')\n",
317 |         "avg_ann.show()"
318 |       ],
319 |       "metadata": {
320 |         "colab": {
321 |           "base_uri": "https://localhost:8080/"
322 |         },
323 |         "id": "_i19UKLOsFnp",
324 |         "outputId": "896d0273-da93-4764-93c2-dd858cd3d66d"
325 |       },
326 |       "execution_count": null,
327 |       "outputs": [
328 |         {
329 |           "output_type": "stream",
330 |           "name": "stdout",
331 |           "text": [
332 |             "----------------------------------------------------\n",
333 |             "|\"VEHICLE_AGE\"  |\"AVERAGE_ANNUAL_PREMIUM_ALL_AGE\"  |\n",
334 |             "----------------------------------------------------\n",
335 |             "|> 2 Years      |35654.4994690                     |\n",
336 |             "|1-2 Year       |30523.5821203                     |\n",
337 |             "|< 1 Year       |30119.5520251                     |\n",
338 |             "----------------------------------------------------\n",
339 |             "\n"
340 |           ]
341 |         }
342 |       ]
343 |     },
344 |     {
345 |       "cell_type": "markdown",
346 |       "source": [
347 |         "# Snowflake Drop column"
348 |       ],
349 |       "metadata": {
350 |         "id": "pCfM2Bxytgd7"
351 |       }
352 |     },
353 |     {
354 |       "cell_type": "code",
355 |       "source": [
356 |         "df.drop(\"ID\").show()"
357 |       ],
358 |       "metadata": {
359 |         "colab": {
360 |           "base_uri": "https://localhost:8080/"
361 |         },
362 |         "id": "Wv78trnDtic7",
363 |         "outputId": "0879ffc1-ad0e-4622-a74a-a422da61a915"
364 |       },
365 |       "execution_count": null,
366 |       "outputs": [
367 |         {
368 |           "output_type": "stream",
369 |           "name": "stdout",
370 |           "text": [
371 |             "---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
372 |             "|\"GENDER\"  |\"AGE\"  |\"DRIVING_LICENSE\"  |\"REGION_CODE\"  |\"PREVIOUSLY_INSURED\"  |\"VEHICLE_AGE\"  |\"VEHICLE_DAMAGE\"  |\"ANNUAL_PREMIUM\"  |\"POLICY_SALES_CHANNEL\"  |\"VINTAGE\"  |\"RESPONSE\"  |\n",
373 |             "---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
374 |             "|Male      |44     |1                  |28.0           |0                     |> 2 Years      |True              |40454.0           |26.0                    |217        |1           |\n",
375 |             "|Male      |76     |1                  |3.0            |0                     |1-2 Year       |False             |33536.0           |26.0                    |183        |0           |\n",
376 |             "|Male      |47     |1                  |28.0           |0                     |> 2 Years      |True              |38294.0           |26.0                    |27         |1           |\n",
377 |             "|Male      |21     |1                  |11.0           |1                     |< 1 Year       |False             |28619.0           |152.0                   |203        |0           |\n",
378 |             "|Female    |29     |1                  |41.0           |1                     |< 1 Year       |False             |27496.0           |152.0                   |39         |0           |\n",
379 |             "|Female    |24     |1                  |33.0           |0                     |< 1 Year       |True              |2630.0            |160.0                   |176        |0           |\n",
380 |             "|Male      |23     |1                  |11.0           |0                     |< 1 Year       |True              |23367.0           |152.0                   |249        |0           |\n",
381 |             "|Female    |56     |1                  |28.0           |0                     |1-2 Year       |True              |32031.0           |26.0                    |72         |1           |\n",
382 |             "|Female    |24     |1                  |3.0            |1                     |< 1 Year       |False             |27619.0           |152.0                   |28         |0           |\n",
383 |             "|Female    |32     |1                  |6.0            |1                     |< 1 Year       |False             |28771.0           |152.0                   |80         |0           |\n",
384 |             "---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
385 |             "\n"
386 |           ]
387 |         }
388 |       ]
389 |     },
390 |     {
391 |       "cell_type": "markdown",
392 |       "source": [
393 |         "# Join the table"
394 |       ],
395 |       "metadata": {
396 |         "id": "HZ39g44VtVOC"
397 |       }
398 |     },
399 |     {
400 |       "cell_type": "code",
401 |       "source": [
402 |         "# We have to make 2nd aggregation dataframe\n",
403 |         "avg_ann2 = df_age.group_by('VEHICLE_AGE').agg(F.avg('ANNUAL_PREMIUM').alias('AVERAGE_ANNUAL_PREMIUM_30_TO_44'))\n",
404 |         "avg_ann2.show()"
405 |       ],
406 |       "metadata": {
407 |         "colab": {
408 |           "base_uri": "https://localhost:8080/"
409 |         },
410 |         "id": "FHO9Xi03tXCD",
411 |         "outputId": "cf36e6a0-91fc-4458-b7fb-a421c5ac3725"
412 |       },
413 |       "execution_count": null,
414 |       "outputs": [
415 |         {
416 |           "output_type": "stream",
417 |           "name": "stdout",
418 |           "text": [
419 |             "-----------------------------------------------------\n",
420 |             "|\"VEHICLE_AGE\"  |\"AVERAGE_ANNUAL_PREMIUM_30_TO_44\"  |\n",
421 |             "-----------------------------------------------------\n",
422 |             "|> 2 Years      |33157.8273078                      |\n",
423 |             "|< 1 Year       |27853.8153776                      |\n",
424 |             "|1-2 Year       |28789.0972791                      |\n",
425 |             "-----------------------------------------------------\n",
426 |             "\n"
427 |           ]
428 |         }
429 |       ]
430 |     },
431 |     {
432 |       "cell_type": "code",
433 |       "source": [
434 |         "join_df = avg_ann.join(avg_ann2, \"VEHICLE_AGE\").select(avg_ann.VEHICLE_AGE.alias(\"VEHICLE_AGE\"),\n",
435 |         "                                                       avg_ann.AVERAGE_ANNUAL_PREMIUM_ALL_AGE,\n",
436 |         "                                                       avg_ann2.AVERAGE_ANNUAL_PREMIUM_30_TO_44)\n",
437 |         "\n",
438 |         "join_df.show()"
439 |       ],
440 |       "metadata": {
441 |         "colab": {
442 |           "base_uri": "https://localhost:8080/"
443 |         },
444 |         "id": "zc1OkGCWt5Ie",
445 |         "outputId": "1888e742-f3c3-4b14-9196-b65e76379704"
446 |       },
447 |       "execution_count": null,
448 |       "outputs": [
449 |         {
450 |           "output_type": "stream",
451 |           "name": "stdout",
452 |           "text": [
453 |             "----------------------------------------------------------------------------------------\n",
454 |             "|\"VEHICLE_AGE\"  |\"AVERAGE_ANNUAL_PREMIUM_ALL_AGE\"  |\"AVERAGE_ANNUAL_PREMIUM_30_TO_44\"  |\n",
455 |             "----------------------------------------------------------------------------------------\n",
456 |             "|> 2 Years      |35654.4994690                     |33157.8273078                      |\n",
457 |             "|1-2 Year       |30523.5821203                     |28789.0972791                      |\n",
458 |             "|< 1 Year       |30119.5520251                     |27853.8153776                      |\n",
459 |             "----------------------------------------------------------------------------------------\n",
460 |             "\n"
461 |           ]
462 |         }
463 |       ]
464 |     }
465 |   ]
466 | }
467 | 


--------------------------------------------------------------------------------
/Streamlit-Web-Application-main/README.md:
--------------------------------------------------------------------------------
 1 | # Streamlit Web Application Project with Google Gemini
 2 | This repository is for all of my Streamlit Web Application that created by myself. Use this code for your reference only, modify this code if you want to use the real time poject
 3 | 
 4 | ## 1. Auto Sentiment Analysis Twitter (Google Gemini)
 5 | This project allow the user to do auto sentiment analysis from your twitter dataset  and then visualize the result with Wordcloud and Bi-Gram Visualization. After that the Google Gemini will make a conclusion and actionable insight based on the visualization
 6 | 
 7 | ## 2. Chat With Your CSV (Google Gemini)
 8 | This project allow the user to analyze their own CSV dataset. The user should input the target variable and columns for analysis (attribute) for Data Visualization. There are 4 Visualization on this project : Countplot Visualization, Histoplot Visualization, Multiclass Countplot Visualization, Multiclass Histoplot Visualization. After that, the user can chat with Google Gemini about all of the visualized data
 9 | 
10 | ## 3. CheatGPT (Google Gemini)
11 | This project allow the user to upload their image, and then Google Gemini will answer your question based on the uploaded image. You only need to screenshot the exam question to do this
12 | 
13 | ## 4. Complete Pack
14 | This project is actually the complete pack for all of the Data Science project. There are : Machine Learning Classification Prediction, Machine Learning Regresion Prediction, PDF Document Analysis, Sentiment Analysis, CSV File Analysis, Clustering, EDA With Google Gemini
15 | 
16 | ## 5. E-Commerce Clustering (Google Gemini, K-Means)
17 | This project allow the user to do clustering method from their CSV File. First thing they have to do is upload a CSV File, then pick 3 numerical column for clustering. After that the user need to define how many cluster that they want. The last step is the system will give the 3D Clustering Visualization and the Google Gemini will give some response based on the 3D Clustering result
18 | 
19 | ## 6. Fraud Analysis (Google Gemini)
20 | This project is actually for my Google Gen AI Hackathon (Hack2Skill). The user only need to upload their fraud csv dataset after that the user should inpput the target variable and some column for analysis (attribute). After that the Google Gemini will give 4 Visualization, they are : Countplot Visualization, Histoplot Visualization, Multiclass Countplot Visualization, Multiclass Histoplot Visualization. The Google Gemini will give some Conclusion and Actionable Insight each Visualization
21 | 
22 | ## 7. PDF Document Analysis (Google Gemini)
23 | This project allow the user to analyse their PDF File. The user only need to upload their PDF File and put some additional sopwords for data cleansing. After that the system will give the Wordcloud Visualization and Bi-Gram Visualization and lastly Google Gemini will give some conclusion and Actionable Insight based on each Visualization 
24 | 
25 | ## 8. Table Scraper Analysis (Google Gemini, BeautifulSoup)
26 | This project allow the user to analyze the table from the selected website link. The first thing the users have to do is they have to put the link to analyze and then the system will show all of the available table from the website. After that the user should select column for analysis and remove the unwanted rows. Lastly the Google Gemini will analyze the selected table and give some conclusion and actioonable insight based on the table
27 | 
28 | 
29 | ## 9. PDF Document Comparison (Google Gemini, Cosine Simmilarity)
30 | This project allow the user to compare 2 PDF Document File and then the system will give the similarity percentage with cosine similarity. The system also will show Word cloud and Bi-Gram Visualization for each documents. Lastly the Google Gini will analyze both Document and give conclusion about the similarities and differences from both documents
31 | 
32 | 
33 | ## 10. CT Scan and MRI Diagnosis Explanator
34 | This web Application allows to help the doctor and medical officer to analyze about the result of patient CT Scan and MRI image and give some potential abnormalies
35 | 
36 | ## 11. LLM Pandas AI and Google Gemini Analysis
37 | This web Application allows to analyze your CSV Dataset and let the user ask anything about their Dataset, then PandasAI will give the answer based on the user question (answers can be dataframe or visualization) and lastly Google Gemini will give some explanation if the answer is a visualization
38 | 
39 | ## 12. PDF Documents Comparer
40 | This web Application allows to analyze your PDF File with Langchain and Google Gemini. The users allows to upload 2 PDF File and then ask any question about both PDF File. Then Google Gemini wull analyze the documents
41 | 


--------------------------------------------------------------------------------
/Streamlit-Web-Application-main/__pycache__/flask.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MagicDash91/All-of-Data-Science-Project/734e54ff951d39fb8d7ba007dcc9c82859ac7ec6/Streamlit-Web-Application-main/__pycache__/flask.cpython-311.pyc


--------------------------------------------------------------------------------
/Streamlit-Web-Application-main/__pycache__/pandasai.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MagicDash91/All-of-Data-Science-Project/734e54ff951d39fb8d7ba007dcc9c82859ac7ec6/Streamlit-Web-Application-main/__pycache__/pandasai.cpython-311.pyc


--------------------------------------------------------------------------------
/Streamlit-Web-Application-main/auto_sentiment_analysis_twitter.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import seaborn as sns
  3 | import matplotlib.pyplot as plt
  4 | sns.set_theme(color_codes=True)
  5 | import os
  6 | import pathlib
  7 | import textwrap
  8 | import google.generativeai as genai
  9 | from IPython.display import display
 10 | from IPython.display import Markdown
 11 | import PIL.Image
 12 | 
 13 | st.title("Sentiment Analysis")
 14 | 
 15 | from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
 16 | from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
 17 | from wordcloud import WordCloud
 18 | import PyPDF2
 19 | import re
 20 | from io import StringIO
 21 | import plotly.express as px
 22 | import pandas as pd
 23 | import collections
 24 | from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 25 | 
 26 | # Create stemmer
 27 | factory = StemmerFactory()
 28 | stemmer = factory.create_stemmer()
 29 | 
 30 | # Create stopword remover
 31 | stop_factory = StopWordRemoverFactory()
 32 | more_stopword = ['dengan', 'ia', 'bahwa', 'oleh', 'rp', 'undang', 'pasal', 'ayat', 'bab']
 33 | data = stop_factory.get_stop_words() + more_stopword
 34 | 
 35 | # Upload the CSV file
 36 | uploaded_file = st.file_uploader("Upload CSV file:")
 37 | 
 38 | # User input for delimiter
 39 | delimiter_option = st.radio("Select CSV delimiter:", [",", ";"], index=0)
 40 | 
 41 | # Add custom stopwords
 42 | custom_stopwords = st.text_input("Enter custom stopwords (comma-separated):")
 43 | custom_stopword_list = [word.strip() for word in custom_stopwords.split(",")] if custom_stopwords else []
 44 | 
 45 | # Check if the file is uploaded
 46 | if uploaded_file is not None:
 47 |     # Read the CSV file into a Pandas DataFrame
 48 |     if delimiter_option == ",":
 49 |         df = pd.read_csv(uploaded_file, delimiter=",")
 50 |     elif delimiter_option == ";":
 51 |         df = pd.read_csv(uploaded_file, delimiter=";")
 52 |     else:
 53 |         st.error("Invalid delimiter option.")
 54 | 
 55 |     # Show the DataFrame
 56 |     st.dataframe(df)
 57 | 
 58 |     # Select a column for sentiment analysis
 59 |     object_columns = df.select_dtypes(include="object").columns
 60 |     target_variable = st.selectbox("Choose a column for Sentiment Analysis:", object_columns)
 61 | 
 62 |     # Perform sentiment analysis on the selected column
 63 |     if st.button("Perform Sentiment Analysis"):
 64 |         # Your sentiment analysis logic goes here
 65 |         st.success(f"Sentiment Analysis performed on column: {target_variable}")
 66 |         
 67 |         # Show the selected column
 68 |         st.write(f"Selected {target_variable} Column:")
 69 |         st.dataframe(df[[target_variable]])
 70 | 
 71 |         # Create a new DataFrame with cleaned text column
 72 |         new_df = df.copy()
 73 | 
 74 |         # Create cleaned text column (updated to include custom stopwords)
 75 |         custom_stopword_list = [word.strip() for word in custom_stopwords.split(",")] if custom_stopwords else []
 76 |         new_df['cleaned_text'] = new_df[target_variable].apply(lambda x: ' '.join(
 77 |             [stemmer.stem(word) for word in stop_factory.create_stop_word_remover().remove(x).split() 
 78 |             if word.lower() not in data and word.lower() not in custom_stopword_list]  # Exclude custom stopwords
 79 |         ))
 80 | 
 81 |         # Apply stemming and stopword removal to the selected column
 82 |         new_df['cleaned_text'] = new_df[target_variable].apply(lambda x: ' '.join([stemmer.stem(word) for word in stop_factory.create_stop_word_remover().remove(x).split() if word.lower() not in data]))
 83 | 
 84 |         # Show the cleaned text column
 85 |         #st.write("Cleaned Text Column:")
 86 |         #st.dataframe(new_df[['cleaned_text']])
 87 | 
 88 |         # Load the sentiment analysis pipeline
 89 |         pretrained = "indonesia-bert-sentiment-classification"
 90 |         model = AutoModelForSequenceClassification.from_pretrained(pretrained)
 91 |         tokenizer = AutoTokenizer.from_pretrained(pretrained)
 92 |         sentiment_analysis = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
 93 |         label_index = {'LABEL_0': 'positive', 'LABEL_1': 'neutral', 'LABEL_2': 'negative'}
 94 | 
 95 |         # Function to apply sentiment analysis to each row in the 'cleaned_text' column
 96 |         def analyze_sentiment(text):
 97 |             result = sentiment_analysis(text)
 98 |             label = label_index[result[0]['label']]
 99 |             score = result[0]['score']
100 |             return pd.Series({'sentiment_label': label, 'sentiment_score': score})
101 | 
102 |         # Apply sentiment analysis to 'cleaned_text' column
103 |         new_df[['sentiment_label', 'sentiment_score']] = new_df['cleaned_text'].apply(analyze_sentiment)
104 | 
105 |         # Display the results
106 |         st.write("Sentiment Analysis Results:")
107 |         st.dataframe(new_df[['cleaned_text', 'sentiment_label', 'sentiment_score']])
108 | 
109 |         # Count the occurrences of each sentiment label
110 |         sentiment_counts = new_df['sentiment_label'].value_counts()
111 | 
112 |         # Plot a bar chart using seaborn
113 |         st.set_option('deprecation.showPyplotGlobalUse', False)
114 |         sns.set(style="whitegrid")
115 |         plt.figure(figsize=(8, 6))
116 |         sns.barplot(x=sentiment_counts.index, y=sentiment_counts.values, palette="viridis")
117 |         plt.title('Sentiment Distribution')
118 |         plt.xlabel('Sentiment Label')
119 |         plt.ylabel('Count')
120 |         st.pyplot()
121 | 
122 |         # Define a dictionary to store sentiment-wise text
123 |         sentiment_text = {
124 |             "positive": "",
125 |             "neutral": "",
126 |             "negative": ""
127 |         }
128 | 
129 |         # Loop through each sentiment label
130 |         for label in sentiment_counts.index:
131 |             # Filter data for the current sentiment
132 |             selected_data = new_df[new_df['sentiment_label'] == label]
133 | 
134 |             # Include custom stopwords back into the cleaned text before concatenation
135 |             selected_data['cleaned_text'] = selected_data['cleaned_text'].apply(lambda x: ' '.join([word for word in x.split() if word.lower() not in data and word.lower() not in custom_stopword_list]))  # Remove only general stopwords
136 | 
137 |             # Concatenate cleaned text from the selected data (now including custom stopwords)
138 |             sentiment_text[label] = ' '.join(selected_data['cleaned_text'].astype(str))
139 | 
140 | 
141 |         # Define variables for sentiment-wise text (adjust variable names)
142 |         #positive_text = ""
143 |         #neutral_text = ""
144 |         #negative_text = ""
145 | 
146 | 
147 |         # Concatenate cleaned text for each sentiment
148 |         positive_text = ' '.join([word for word in new_df[new_df['sentiment_label'] == 'positive']['cleaned_text'].apply(lambda x: ' '.join([w for w in x.split() if w.lower() not in data and w.lower() not in custom_stopword_list]))])
149 |         neutral_text = ' '.join([word for word in new_df[new_df['sentiment_label'] == 'neutral']['cleaned_text'].apply(lambda x: ' '.join([w for w in x.split() if w.lower() not in data and w.lower() not in custom_stopword_list]))])
150 |         negative_text = ' '.join([word for word in new_df[new_df['sentiment_label'] == 'negative']['cleaned_text'].apply(lambda x: ' '.join([w for w in x.split() if w.lower() not in data and w.lower() not in custom_stopword_list]))])
151 | 
152 | 
153 |        
154 |         # Generate WordCloud for positive sentiment
155 |         positive_wordcloud = WordCloud(
156 |             min_font_size=3, max_words=200, width=800, height=400,
157 |             colormap='viridis', background_color='white'
158 |         ).generate(positive_text)
159 | 
160 |         # Save the WordCloud image with a filename
161 |         positive_wordcloud_filename = "wordcloud_positive.png"
162 |         positive_wordcloud.to_file(positive_wordcloud_filename)
163 | 
164 |         # Display the saved WordCloud image using Streamlit
165 |         st.subheader("WordCloud for Positive Sentiment")
166 |         st.image(positive_wordcloud_filename)
167 | 
168 |         def to_markdown(text):
169 |             text = text.replace('•', '  *')
170 |             return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
171 | 
172 |         genai.configure(api_key="AIzaSyB2sQh_oHbFULJ7x2vixJWAboPpPvrCKoA")
173 | 
174 |         import PIL.Image
175 | 
176 |         img = PIL.Image.open("wordcloud_positive.png")
177 |         model = genai.GenerativeModel('gemini-pro-vision')
178 |         response = model.generate_content(img)
179 | 
180 |         response = model.generate_content(["You are a professional Data Analyst, write the complete conclusion and actionable insight based on the image about wordcloud positive sentiment", img])
181 |         response.resolve()
182 |         st.write("**Google Gemini Response About Data**")
183 |         st.write(response.text)
184 | 
185 | 
186 |         # Generate WordCloud for neutral sentiment
187 |         neutral_wordcloud = WordCloud(
188 |             min_font_size=3, max_words=200, width=800, height=400,
189 |             colormap='viridis', background_color='white'
190 |         ).generate(neutral_text)
191 | 
192 |         # Save the WordCloud image with a filename
193 |         neutral_wordcloud_filename = "wordcloud_neutral.png"
194 |         neutral_wordcloud.to_file(neutral_wordcloud_filename)
195 | 
196 |         # Display the saved WordCloud image using Streamlit
197 |         st.subheader("WordCloud for Neutral Sentiment")
198 |         st.image(neutral_wordcloud_filename)
199 | 
200 |         def to_markdown(text):
201 |             text = text.replace('•', '  *')
202 |             return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
203 | 
204 |         genai.configure(api_key="AIzaSyB2sQh_oHbFULJ7x2vixJWAboPpPvrCKoA")
205 | 
206 |         import PIL.Image
207 | 
208 |         img = PIL.Image.open("wordcloud_neutral.png")
209 |         model = genai.GenerativeModel('gemini-pro-vision')
210 |         response = model.generate_content(img)
211 | 
212 |         response = model.generate_content(["You are a professional Data Analyst, write the complete conclusion and actionable insight based on the image about wordcloud neutral sentiment", img])
213 |         response.resolve()
214 |         st.write("**Google Gemini Response About Data**")
215 |         st.write(response.text)
216 | 
217 | 
218 | 
219 |         # Generate WordCloud for negative sentiment
220 |         negative_wordcloud = WordCloud(
221 |             min_font_size=3, max_words=200, width=800, height=400,
222 |             colormap='viridis', background_color='white'
223 |         ).generate(negative_text)
224 | 
225 |         # Save the WordCloud image with a filename
226 |         negative_wordcloud_filename = "wordcloud_negative.png"
227 |         negative_wordcloud.to_file(negative_wordcloud_filename)
228 | 
229 |         # Display the saved WordCloud image using Streamlit
230 |         st.subheader("WordCloud for Negative Sentiment")
231 |         st.image(negative_wordcloud_filename)
232 | 
233 |         def to_markdown(text):
234 |             text = text.replace('•', '  *')
235 |             return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
236 | 
237 |         genai.configure(api_key="AIzaSyB2sQh_oHbFULJ7x2vixJWAboPpPvrCKoA")
238 | 
239 |         import PIL.Image
240 | 
241 |         img = PIL.Image.open("wordcloud_negative.png")
242 |         model = genai.GenerativeModel('gemini-pro-vision')
243 |         response = model.generate_content(img)
244 | 
245 |         response = model.generate_content(["You are a professional Data Analyst, write the complete conclusion and actionable insight based on the image about wordcloud negative sentiment", img])
246 |         response.resolve()
247 |         st.write("**Google Gemini Response About Data**")
248 |         st.write(response.text)
249 | 
250 | 
251 |         # Bigrams Positive Sentiment
252 |         words1 = positive_text.split()
253 |         # Get bigrams
254 |         bigrams = list(zip(words1, words1[1:]))
255 | 
256 |         # Count bigrams
257 |         bigram_counts = collections.Counter(bigrams)
258 | 
259 |         # Get top 10 bigram counts
260 |         top_bigrams = dict(bigram_counts.most_common(10))
261 | 
262 |         # Create bar chart
263 |         plt.figure(figsize=(10, 7))
264 |         plt.bar(range(len(top_bigrams)), list(top_bigrams.values()), align='center')
265 |         plt.xticks(range(len(top_bigrams)), list(top_bigrams.keys()), rotation=90)
266 |         plt.xlabel('Bigram Words')
267 |         plt.ylabel('Count')
268 |         plt.title(f"Top 10 Bigram for Positive Sentiment")
269 |         # Save the entire plot as a PNG
270 |         plt.tight_layout()
271 |         plt.savefig("bigram_positive.png")
272 |         st.subheader("Bigram for Positive Sentiment")
273 |         st.image("bigram_positive.png")
274 | 
275 |         def to_markdown(text):
276 |             text = text.replace('•', '  *')
277 |             return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
278 | 
279 |         genai.configure(api_key="AIzaSyB2sQh_oHbFULJ7x2vixJWAboPpPvrCKoA")
280 | 
281 |         import PIL.Image
282 | 
283 |         img = PIL.Image.open("bigram_positive.png")
284 |         model = genai.GenerativeModel('gemini-pro-vision')
285 |         response = model.generate_content(img)
286 | 
287 |         response = model.generate_content(["You are a professional Data Analyst, write the complete conclusion and actionable insight based on the image about bigram positive sentiment", img])
288 |         response.resolve()
289 |         st.write("**Google Gemini Response About Data**")
290 |         st.write(response.text)
291 | 
292 | 
293 | 
294 |         # Bigrams Neutral Sentiment
295 |         words1 = neutral_text.split()
296 |         # Get bigrams
297 |         bigrams = list(zip(words1, words1[1:]))
298 | 
299 |         # Count bigrams
300 |         bigram_counts = collections.Counter(bigrams)
301 | 
302 |         # Get top 10 bigram counts
303 |         top_bigrams = dict(bigram_counts.most_common(10))
304 | 
305 |         # Create bar chart
306 |         plt.figure(figsize=(10, 7))
307 |         plt.bar(range(len(top_bigrams)), list(top_bigrams.values()), align='center')
308 |         plt.xticks(range(len(top_bigrams)), list(top_bigrams.keys()), rotation=90)
309 |         plt.xlabel('Bigram Words')
310 |         plt.ylabel('Count')
311 |         plt.title(f"Top 10 Bigram for Neutral Sentiment")
312 |         # Save the entire plot as a PNG
313 |         plt.tight_layout()
314 |         plt.savefig("bigram_neutral.png")
315 |         st.subheader("Bigram for Neutral Sentiment")
316 |         st.image("bigram_neutral.png")
317 | 
318 |         def to_markdown(text):
319 |             text = text.replace('•', '  *')
320 |             return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
321 | 
322 |         genai.configure(api_key="AIzaSyB2sQh_oHbFULJ7x2vixJWAboPpPvrCKoA")
323 | 
324 |         import PIL.Image
325 | 
326 |         img = PIL.Image.open("bigram_neutral.png")
327 |         model = genai.GenerativeModel('gemini-pro-vision')
328 |         response = model.generate_content(img)
329 | 
330 |         response = model.generate_content(["You are a professional Data Analyst, write the complete conclusion and actionable insight based on the image about bigram neutral sentiment", img])
331 |         response.resolve()
332 |         st.write("**Google Gemini Response About Data**")
333 |         st.write(response.text)
334 | 
335 | 
336 | 
337 |         # Bigrams Negative Sentiment
338 |         words1 = negative_text.split()
339 |         # Get bigrams
340 |         bigrams = list(zip(words1, words1[1:]))
341 | 
342 |         # Count bigrams
343 |         bigram_counts = collections.Counter(bigrams)
344 | 
345 |         # Get top 10 bigram counts
346 |         top_bigrams = dict(bigram_counts.most_common(10))
347 | 
348 |         # Create bar chart
349 |         plt.figure(figsize=(10, 7))
350 |         plt.bar(range(len(top_bigrams)), list(top_bigrams.values()), align='center')
351 |         plt.xticks(range(len(top_bigrams)), list(top_bigrams.keys()), rotation=90)
352 |         plt.xlabel('Bigram Words')
353 |         plt.ylabel('Count')
354 |         plt.title(f"Top 10 Bigram for negative Sentiment")
355 |         # Save the entire plot as a PNG
356 |         plt.tight_layout()
357 |         plt.savefig("bigram_negative.png")
358 |         st.subheader("Bigram for Negative Sentiment")
359 |         st.image("bigram_negative.png")
360 | 
361 |         def to_markdown(text):
362 |             text = text.replace('•', '  *')
363 |             return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
364 | 
365 |         genai.configure(api_key="AIzaSyB2sQh_oHbFULJ7x2vixJWAboPpPvrCKoA")
366 | 
367 |         import PIL.Image
368 | 
369 |         img = PIL.Image.open("bigram_negative.png")
370 |         model = genai.GenerativeModel('gemini-pro-vision')
371 |         response = model.generate_content(img)
372 | 
373 |         response = model.generate_content(["You are a professional Data Analyst, write the complete conclusion and actionable insight based on the image about bigram negative sentiment", img])
374 |         response.resolve()
375 |         st.write("**Google Gemini Response About Data**")
376 |         st.write(response.text)
377 | 


--------------------------------------------------------------------------------
/Streamlit-Web-Application-main/chat_with_your_csv.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import random
  3 | import time
  4 | import pandas as pd
  5 | import seaborn as sns
  6 | import matplotlib.pyplot as plt
  7 | import numpy as np
  8 | from scipy import stats
  9 | import warnings
 10 | sns.set_theme(color_codes=True)
 11 | import os
 12 | import pathlib
 13 | import textwrap
 14 | import google.generativeai as genai
 15 | from IPython.display import display
 16 | from IPython.display import Markdown
 17 | import time
 18 | 
 19 | 
 20 | st.title("EDA with Google Gemini")
 21 |     
 22 | # Upload the CSV file
 23 | uploaded_file = st.file_uploader("Upload CSV file:")
 24 | 
 25 | # Check if the file is uploaded
 26 | if uploaded_file is not None:
 27 |     # Read the CSV file into a Pandas DataFrame
 28 |     df = pd.read_csv(uploaded_file)
 29 |         
 30 |     # Show the original DataFrame
 31 |     st.write("Original DataFrame:")
 32 |     st.dataframe(df)
 33 | 
 34 | 
 35 | 
 36 |     st.write("**Countplot Barchart**")
 37 | 
 38 |     # Get the names of all columns with data type 'object' (categorical columns) excluding 'Country'
 39 |     cat_vars = [col for col in df.select_dtypes(include='object').columns if df[col].nunique() > 1 and df[col].nunique() <= 10]
 40 | 
 41 |     # Create a figure with subplots
 42 |     num_cols = len(cat_vars)
 43 |     num_rows = (num_cols + 2) // 3
 44 |     fig, axs = plt.subplots(nrows=num_rows, ncols=3, figsize=(15, 5*num_rows))
 45 |     axs = axs.flatten()
 46 | 
 47 |     # Create a countplot for the top 10 values of each categorical variable using Seaborn
 48 |     for i, var in enumerate(cat_vars):
 49 |         top_values = df[var].value_counts().head(10).index
 50 |         filtered_df = df.copy()
 51 |         filtered_df[var] = df[var].apply(lambda x: x if x in top_values else 'Other')
 52 |         sns.countplot(x=var, data=filtered_df, ax=axs[i])
 53 |         axs[i].set_title(var)
 54 |         axs[i].tick_params(axis='x', rotation=90)
 55 | 
 56 |     # Remove any extra empty subplots if needed
 57 |     if num_cols < len(axs):
 58 |         for i in range(num_cols, len(axs)):
 59 |             fig.delaxes(axs[i])
 60 | 
 61 |     # Adjust spacing between subplots
 62 |     fig.tight_layout()
 63 | 
 64 |     # Show plots using Streamlit
 65 |     st.pyplot(fig)
 66 |     fig.savefig("plot4.png")
 67 | 
 68 |         
 69 | 
 70 |     st.write("**Histoplot**")
 71 |     # Get the names of all columns with data type 'int' or 'float'
 72 |     num_vars = [col for col in df.select_dtypes(include=['int', 'float']).columns]
 73 | 
 74 |     # Create a figure with subplots
 75 |     num_cols = len(num_vars)
 76 |     num_rows = (num_cols + 2) // 3
 77 |     fig, axs = plt.subplots(nrows=num_rows, ncols=3, figsize=(15, 5*num_rows))
 78 |     axs = axs.flatten()
 79 | 
 80 |     # Create a histplot for each numeric variable using Seaborn
 81 |     for i, var in enumerate(num_vars):
 82 |         sns.histplot(df[var], ax=axs[i], kde=True)
 83 |         axs[i].set_title(var)
 84 |         axs[i].set_xlabel('')
 85 | 
 86 |     # Remove any extra empty subplots if needed
 87 |     if num_cols < len(axs):
 88 |         for i in range(num_cols, len(axs)):
 89 |             fig.delaxes(axs[i])
 90 | 
 91 |     # Adjust spacing between subplots
 92 |     fig.tight_layout()
 93 | 
 94 |     # Show plots using Streamlit
 95 |     st.pyplot(fig)
 96 |     fig.savefig("plot7.png")
 97 | 
 98 |     
 99 |         
100 |     # Select target variable
101 |     target_variable = st.selectbox("Select target variable:", df.columns)
102 |         
103 |     # Select columns for analysis
104 |     columns_for_analysis = st.multiselect("Select columns for analysis:", [col for col in df.columns if col != target_variable])
105 | 
106 |     # Process button
107 |     if st.button("Process"):
108 |         # Select the target variable and columns for analysis from the original DataFrame
109 |         target_variable_data = df[target_variable]
110 |         columns_for_analysis_data = df[columns_for_analysis]
111 | 
112 |         # Display target variable in a dataframe
113 |         target_variable_df = df[[target_variable]]
114 |         st.write("Target Variable DataFrame:")
115 |         st.dataframe(target_variable_df)
116 |             
117 |         # Display columns for analysis in a dataframe
118 |         columns_for_analysis_df = df[columns_for_analysis]
119 |         st.write("Columns for Analysis DataFrame:")
120 |         st.dataframe(columns_for_analysis_df)
121 |             
122 |         # Concatenate target variable and columns for analysis into a single DataFrame
123 |         df = pd.concat([target_variable_data, columns_for_analysis_data], axis=1)
124 |         st.write("Columns for Analysis and Target Variable DataFrame:")
125 |         st.dataframe(df)
126 | 
127 |         # Drop columns with null values more than 25%
128 |         null_percentage = df.isnull().sum() / len(df)
129 |         columns_to_drop = null_percentage[null_percentage > 0.25].index
130 |         df.drop(columns=columns_to_drop, inplace=True)
131 | 
132 |         # Fill missing values below 25% with median
133 |         for col in df.columns:
134 |             if df[col].isnull().sum() > 0:  # Check if there are missing values
135 |                 if null_percentage[col] <= 0.25:
136 |                     if df[col].dtype in ['float64', 'int64']:  # Check if missing values are below 25%
137 |                         median_value = df[col].median()  # Calculate median for the column
138 |                         df[col].fillna(median_value, inplace=True)
139 |             
140 |         # Convert object datatype columns to lowercase
141 |         for col in df.columns:
142 |             if df[col].dtype == 'object':  # Check if datatype is object
143 |                 df[col] = df[col].str.lower()  # Convert values to lowercase
144 | 
145 |         st.write("Cleaned Dataset")
146 |         st.dataframe(df)
147 | 
148 | 
149 |         st.write("**Multiclass Barplot**")
150 |         # Get the names of all columns with data type 'object' (categorical columns)
151 |         cat_cols = df.columns.tolist()
152 | 
153 |         # Get the names of all columns with data type 'object' (categorical variables)
154 |         cat_vars = df.select_dtypes(include=['object']).columns.tolist()
155 | 
156 |         # Exclude 'Country' from the list if it exists in cat_vars
157 |         if target_variable in cat_vars:
158 |             cat_vars.remove(target_variable)
159 | 
160 |         # Create a figure with subplots, but only include the required number of subplots
161 |         num_cols = len(cat_vars)
162 |         num_rows = (num_cols + 2) // 3  # To make sure there are enough rows for the subplots
163 |         fig, axs = plt.subplots(nrows=num_rows, ncols=3, figsize=(15, 5*num_rows))
164 |         axs = axs.flatten()
165 | 
166 |         # Create a count plot for each categorical variable
167 |         for i, var in enumerate(cat_vars):
168 |             top_categories = df[var].value_counts().nlargest(10).index
169 |             filtered_df = df[df[var].notnull() & df[var].isin(top_categories)]  # Exclude rows with NaN values in the variable
170 |             sns.countplot(x=var, hue=target_variable, data=filtered_df, ax=axs[i])
171 |             axs[i].set_xticklabels(axs[i].get_xticklabels(), rotation=90)
172 | 
173 |         # Remove any remaining blank subplots
174 |         for i in range(num_cols, len(axs)):
175 |             fig.delaxes(axs[i])
176 | 
177 |         # Adjust spacing between subplots
178 |         fig.tight_layout()
179 | 
180 |         # Show plot
181 |         st.pyplot(fig)
182 |         fig.savefig("plot2.png")
183 | 
184 |         
185 | 
186 | 
187 |         st.write("**Multiclass Histplot**")
188 |         # Get the names of all columns with data type 'object' (categorical columns)
189 |         cat_cols = df.columns.tolist()
190 | 
191 |         # Get the names of all columns with data type 'int'
192 |         int_vars = df.select_dtypes(include=['int', 'float']).columns.tolist()
193 |         int_vars = [col for col in int_vars if col != target_variable]
194 | 
195 |         # Create a figure with subplots
196 |         num_cols = len(int_vars)
197 |         num_rows = (num_cols + 2) // 3  # To make sure there are enough rows for the subplots
198 |         fig, axs = plt.subplots(nrows=num_rows, ncols=3, figsize=(15, 5*num_rows))
199 |         axs = axs.flatten()
200 | 
201 |         # Create a histogram for each integer variable with hue='Attrition'
202 |         for i, var in enumerate(int_vars):
203 |             top_categories = df[var].value_counts().nlargest(10).index
204 |             filtered_df = df[df[var].notnull() & df[var].isin(top_categories)]
205 |             sns.histplot(data=df, x=var, hue=target_variable, kde=True, ax=axs[i])
206 |             axs[i].set_title(var)
207 | 
208 |         # Remove any extra empty subplots if needed
209 |         if num_cols < len(axs):
210 |             for i in range(num_cols, len(axs)):
211 |                 fig.delaxes(axs[i])
212 | 
213 |         # Adjust spacing between subplots
214 |         fig.tight_layout()
215 | 
216 |         # Show plot
217 |         st.pyplot(fig)
218 |         fig.savefig("plot3.png")
219 | 
220 | 
221 |     # Define the paths to the saved plots
222 |     plot_paths = ["plot4.png", "plot7.png", "plot2.png", "plot3.png"]
223 | 
224 |     # Create a new figure
225 |     fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(20, 15))
226 | 
227 |     # Iterate over each plot path and place it in the corresponding subplot
228 |     for i, plot_path in enumerate(plot_paths):
229 |         row = i // 2
230 |         col = i % 2
231 |         img = plt.imread(plot_path)
232 |         axs[row, col].imshow(img)
233 |         axs[row, col].axis('off')
234 | 
235 |     # Adjust spacing between subplots
236 |     plt.tight_layout()
237 | 
238 |     # Save the merged plot
239 |     fig.savefig("merged_plots.png")
240 | 
241 |     # Streamed response emulator
242 | 
243 |     def to_markdown(text):
244 |         text = text.replace('•', '  *')
245 |         return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
246 | 
247 |     genai.configure(api_key="AIzaSyDU0F3ZmGWBrrFpmUv21ZHuJBoTbtm4mL8")
248 | 
249 |     import PIL.Image
250 | 
251 |     img = PIL.Image.open("merged_plots.png")
252 |     model = genai.GenerativeModel('gemini-pro-vision')
253 |     response = model.generate_content(img)
254 | 
255 |     def response_generator():
256 |         response = response.text
257 | 
258 |             
259 |         for word in response.split():
260 |             yield word + " "
261 |             time.sleep(0.05)
262 | 
263 | 
264 |     st.title("Chat with your Data")
265 | 
266 |     # Initialize chat history
267 |     if "messages" not in st.session_state:
268 |         st.session_state.messages = []
269 | 
270 |     # Display chat messages from history on app rerun
271 |     for message in st.session_state.messages:
272 |         with st.chat_message(message["role"]):
273 |             st.markdown(message["content"])
274 | 
275 |     # Accept user input
276 |     if prompt := st.chat_input("Ask Your Data"):
277 |         # Add user message to chat history
278 |         st.session_state.messages.append({"role": "user", "content": prompt})
279 |         # Display user message in chat message container
280 |         with st.chat_message("user"):
281 |             st.markdown(prompt)
282 | 
283 |         # Generate Google Gemini response based on user's question
284 |         img = PIL.Image.open("merged_plots.png")
285 |         model = genai.GenerativeModel('gemini-pro-vision')
286 |         response = model.generate_content([prompt, img], stream=True)
287 |         response.resolve()
288 | 
289 |         # Format and display the response
290 |         response_text = response.text
291 |         response_markdown = to_markdown(response_text)
292 |         st.write(response.text)
293 | 
294 |         # Add assistant response to chat history
295 |         st.session_state.messages.append({"role": "assistant", "content": response_text})
296 | 
297 | 
298 | 
299 | 
300 | 
301 | 
302 |             
303 | 
304 | 
305 | 


--------------------------------------------------------------------------------
/Streamlit-Web-Application-main/cheatgpt.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from PIL import Image
 3 | import io
 4 | import textwrap
 5 | import google.generativeai as genai
 6 | from IPython.display import display
 7 | from IPython.display import Markdown
 8 | 
 9 | st.title("CheatGPT")
10 | 
11 | uploaded_file = st.file_uploader("Upload your PNG or JPG image:", type=["png", "jpg"])
12 | 
13 | if uploaded_file is not None:
14 | 
15 |     # Validate the file extension
16 |     if uploaded_file.type in ["image/png", "image/jpeg"]:
17 |         # Read the image bytes
18 |         img_bytes = uploaded_file.read()
19 | 
20 |         # Convert bytes to PIL Image object
21 |         img = Image.open(io.BytesIO(img_bytes))
22 |         st.write("Image Uploaded")
23 |         st.image(img)
24 | 
25 |         img.save("image.png")
26 | 
27 |         def to_markdown(text):
28 |             text = text.replace('•', '  *')
29 |             return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
30 | 
31 |         genai.configure(api_key="AIzaSyDU0F3ZmGWBrrFpmUv21ZHuJBoTbtm4mL8")
32 | 
33 |         import PIL.Image
34 | 
35 |         img1 = PIL.Image.open("image.png")
36 |         model = genai.GenerativeModel('gemini-pro-vision')
37 |         response = model.generate_content(img)
38 | 
39 |         response = model.generate_content(["Answer This Question and give the explanation", img1], stream=True)
40 |         response.resolve()
41 |         st.write("**Google Gemini Response About Data**")
42 |         st.write(response.text)
43 | 
44 |             


--------------------------------------------------------------------------------
/Streamlit-Web-Application-main/compare.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import streamlit as st
 4 | from langchain.chains import StuffDocumentsChain
 5 | from langchain.chains.llm import LLMChain
 6 | from langchain.prompts import PromptTemplate
 7 | from langchain_community.document_loaders import PyPDFLoader
 8 | from langchain_google_genai import ChatGoogleGenerativeAI
 9 | 
10 | # Title of the app
11 | st.title("PDF Document Comparer Analysis")
12 | 
13 | # Upload the PDF files
14 | uploaded_file1 = st.file_uploader("Upload First PDF file:", type='pdf')
15 | uploaded_file2 = st.file_uploader("Upload Second PDF file:", type='pdf')
16 | question = st.text_input("Insert Question", "Put your question here about both documents")
17 | 
18 | async def process_files():
19 |     if uploaded_file1 and uploaded_file2 and question:
20 |         # Save the uploaded files as file1.pdf and file2.pdf
21 |         file1_path = "file1.pdf"
22 |         file2_path = "file2.pdf"
23 |         with open(file1_path, "wb") as f1:
24 |             f1.write(uploaded_file1.getbuffer())
25 |         with open(file2_path, "wb") as f2:
26 |             f2.write(uploaded_file2.getbuffer())
27 | 
28 |         # Initialize the LLM with the Google API key
29 |         llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", google_api_key="AIzaSyCFI6cTqFdS-mpZBfi7kxwygewtnuF7PfA")
30 | 
31 |         # Load the PDF files
32 |         loader1 = PyPDFLoader(file1_path)
33 |         loader2 = PyPDFLoader(file2_path)
34 |         docs1 = loader1.load()
35 |         docs2 = loader2.load()
36 |         docs3 = docs1 + docs2
37 | 
38 |         # Define the Summarize Chain
39 |         template = """Write a concise summary of the following:
40 |         "{text}"
41 |         CONCISE SUMMARY:"""
42 |         prompt = PromptTemplate.from_template(template)
43 |         llm_chain = LLMChain(llm=llm, prompt=prompt)
44 |         stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")
45 | 
46 |         # Process both documents
47 |         response1 = stuff_chain.invoke(docs1)
48 |         response2 = stuff_chain.invoke(docs2)
49 | 
50 |         # Display the summaries
51 |         st.markdown("### Summary of the First Document")
52 |         st.write(response1["output_text"])
53 | 
54 |         st.markdown("### Summary of the Second Document")
55 |         st.write(response2["output_text"])
56 | 
57 |         # Additional comparison logic can be added here based on the question
58 |         comparison_template = question + """Write a concise summary of the following:
59 |         "{text}"
60 |         CONCISE SUMMARY:"""
61 | 
62 |         prompt1 = PromptTemplate.from_template(comparison_template)
63 |         llm_chain1 = LLMChain(llm=llm, prompt=prompt1)
64 |         stuff_chain1 = StuffDocumentsChain(llm_chain=llm_chain1, document_variable_name="text")
65 |         response3 = stuff_chain1.invoke(docs3)
66 | 
67 |         # Display the comparison result
68 |         st.markdown("### Comparison Result")
69 |         st.write(response3["output_text"])
70 | 
71 |         # Clean up the temporary files
72 |         os.remove(uploaded_file1.name)
73 |         os.remove(uploaded_file2.name)
74 | 
75 | if st.button("Process"):
76 |     asyncio.run(process_files())
77 | 


--------------------------------------------------------------------------------
/Streamlit-Web-Application-main/diagnosis.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import datetime
 3 | import os
 4 | import PIL.Image
 5 | import google.generativeai as genai
 6 | from IPython.display import Markdown
 7 | import time
 8 | import io
 9 | from PIL import Image
10 | import textwrap
11 | 
12 | # Replace with your GenerativeAI API key
13 | genai.configure(api_key="AIzaSyB2sQh_oHbFULJ7x2vixJWAboPpPvrCKoA")
14 | 
15 | st.title("CT Scan and MRI Diagnosis Explanator")
16 | 
17 | # Initialize chat history
18 | if "messages" not in st.session_state:
19 |     st.session_state.messages = []
20 | 
21 | # Display chat messages from history on app rerun
22 | for message in st.session_state.messages:
23 |     with st.chat_message(message["role"]):
24 |         st.markdown(message["content"])
25 | 
26 | # Upload an image file
27 | uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png"])
28 | 
29 | if uploaded_file is not None:
30 |     if uploaded_file.type in ["image/png", "image/jpeg"]:
31 |         img_bytes = uploaded_file.read()
32 |         img = Image.open(io.BytesIO(img_bytes))
33 |         st.write("Image Uploaded")
34 |         st.image(img)
35 | 
36 |         img.save("image.png")
37 | 
38 |         def to_markdown(text):  # Consider removing if formatting not needed
39 |             text = text.replace('•', '  *')
40 |             return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
41 | 
42 |         model = genai.GenerativeModel('gemini-pro-vision')  # Check supported models
43 |         response = model.generate_content(["Can you analyze this CT scan or MRI and explain any potential abnormalities?", img], stream=True)
44 |         response.resolve()
45 | 
46 |         st.write("**Google Gemini Response About the image**")
47 |         
48 | 
49 |         # Extract text from all candidates (GitHub solution)
50 |         text_parts = []
51 |         for candidate in response.candidates:
52 |             text_parts.extend([part.text for part in candidate.content.parts])
53 |         full_text = ''.join(text_parts)  # Join text parts for a cohesive response
54 | 
55 |         st.write(full_text)  # Display the combined text
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/Streamlit-Web-Application-main/ecommerce_clustering_llm.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import seaborn as sns
  4 | import matplotlib.pyplot as plt
  5 | from sklearn.cluster import KMeans
  6 | from sklearn.metrics import silhouette_score
  7 | from mpl_toolkits.mplot3d import Axes3D
  8 | sns.set_theme(color_codes=True)
  9 | import os
 10 | import pathlib
 11 | import textwrap
 12 | import google.generativeai as genai
 13 | from IPython.display import display
 14 | from IPython.display import Markdown
 15 | import streamlit as st
 16 | 
 17 | st.title("Ecommerce Segmentation Analysis")
 18 |     
 19 | # Upload the CSV file
 20 | uploaded_file = st.file_uploader("Upload CSV file:")
 21 | 
 22 | # Check if the file is uploaded
 23 | if uploaded_file is not None:
 24 |     # Read the CSV file into a Pandas DataFrame
 25 |     df = pd.read_csv(uploaded_file)
 26 | 
 27 |     # Show the DataFrame
 28 |     st.dataframe(df)
 29 | 
 30 | # Get numeric columns for clustering
 31 |     numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns
 32 |     clustering_columns = st.multiselect("Select numeric columns for clustering:", numeric_columns)
 33 | 
 34 |     # Check if at least 3 columns are selected
 35 |     if len(clustering_columns) != 3:
 36 |         st.warning("Please select exactly 3 numeric columns for clustering.")
 37 |     else:
 38 |         # Display the selected columns
 39 |         st.subheader("Selected Columns for Clustering:")
 40 |         selected_data = df[clustering_columns]
 41 |         st.dataframe(selected_data)
 42 | 
 43 |         # Remove missing values
 44 |         selected_data.dropna(inplace=True)
 45 | 
 46 |         def visualize_clustering(df, selected_data):
 47 |             # Visualize the Elbow Method to find optimal clusters
 48 |             wcss = []
 49 |             for i in range(1, 11):
 50 |                 kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
 51 |                 kmeans.fit(selected_data)
 52 |                 wcss.append(kmeans.inertia_)
 53 | 
 54 |             # Plot the Elbow Method
 55 |             st.subheader("Elbow Method to Determine Optimal Clusters")
 56 |             fig, ax = plt.subplots(figsize=(8, 5))
 57 |             ax.plot(range(1, 11), wcss, marker='o')
 58 |             ax.set_title('Elbow Method')
 59 |             ax.set_xlabel('Number of Clusters')
 60 |             ax.set_ylabel('WCSS')  # Within-Cluster Sum of Squares
 61 |             st.pyplot(fig)
 62 | 
 63 |             # Visualize Silhouette Score for different cluster numbers
 64 |             silhouette_scores = []
 65 |             for n_clusters in range(2, 11):
 66 |                 kmeans = KMeans(n_clusters=n_clusters, init='k-means++', max_iter=300, n_init=10, random_state=0)
 67 |                 kmeans.fit(selected_data)
 68 |                 silhouette_avg = silhouette_score(selected_data, kmeans.labels_)
 69 |                 silhouette_scores.append(silhouette_avg)
 70 |                 
 71 |             # Plot Silhouette Score
 72 |             st.subheader("Silhouette Score for Different Cluster Numbers")
 73 |             fig, ax = plt.subplots(figsize=(8, 5))
 74 |             ax.plot(range(2, 11), silhouette_scores, marker='o')
 75 |             ax.set_title('Silhouette Score')
 76 |             ax.set_xlabel('Number of Clusters')
 77 |             ax.set_ylabel('Silhouette Score')
 78 |             st.pyplot(fig)
 79 | 
 80 |             # Apply KMeans clustering based on user-selected number of clusters
 81 |             num_clusters = st.slider("Select the number of clusters (2-10):", 2, 10, 3)
 82 |             kmeans = KMeans(n_clusters=num_clusters, init='k-means++', max_iter=300, n_init=10, random_state=0)
 83 |             cluster_labels = kmeans.fit_predict(selected_data)
 84 | 
 85 |             # Create a new DataFrame with the cluster labels
 86 |             clustered_df = pd.DataFrame(cluster_labels, columns=['cluster'], index=selected_data.index)
 87 | 
 88 |             # Concatenate the clustered_df with the original DataFrame
 89 |             df = pd.concat([df, clustered_df], axis=1)
 90 |             st.subheader("Clustered Dataset")
 91 |             st.dataframe(df)
 92 | 
 93 |             # Visualize clustering results in 3D plot
 94 |             fig = plt.figure(figsize=(10, 12))
 95 |             ax = fig.add_subplot(111, projection='3d')
 96 |             scatter = ax.scatter(selected_data[clustering_columns[0]], 
 97 |                                 selected_data[clustering_columns[1]], 
 98 |                                 selected_data[clustering_columns[2]], 
 99 |                                 c=cluster_labels, cmap='viridis', s=50)
100 |                 
101 |             ax.set_xlabel(clustering_columns[0])
102 |             ax.set_ylabel(clustering_columns[1])
103 |             ax.set_zlabel(clustering_columns[2])
104 |             ax.set_title(f'3D Clustering (Cluster Amount = {num_clusters})')
105 | 
106 |             # Add a legend
107 |             legend = ax.legend(*scatter.legend_elements(), title="Clusters")
108 |             ax.add_artist(legend)
109 | 
110 |             # Show the 3D plot
111 |             st.pyplot(fig)
112 |             fig.savefig("plot8.png")
113 | 
114 |         # Visualize clustering
115 |         visualize_clustering(df, selected_data)
116 | 
117 | 
118 |         def to_markdown(text):
119 |             text = text.replace('•', '  *')
120 |             return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
121 | 
122 |         genai.configure(api_key="AIzaSyCY-mXpPt-J0oGRaSiPaeAyAVollbMxCF8")
123 | 
124 |         import PIL.Image
125 | 
126 |         img = PIL.Image.open("plot8.png")
127 |         model = genai.GenerativeModel('gemini-pro-vision')
128 |         response = model.generate_content(img)
129 | 
130 |         response = model.generate_content(["You are a professional Data Analyst, write the complete conclusion and actionable insight based on the each cluster colour. write the conclusion in English", img], stream=True)
131 |         response.resolve()
132 |         st.subheader("**Google Gemini Response About Data**")
133 |         st.write(response.text)
134 | 
135 | 
136 | 
137 | 


--------------------------------------------------------------------------------
/Streamlit-Web-Application-main/fraud_analysis_llm.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import seaborn as sns
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | import streamlit as st
  6 | sns.set_theme(color_codes=True)
  7 | import textwrap
  8 | import google.generativeai as genai
  9 | from IPython.display import display
 10 | from IPython.display import Markdown
 11 | 
 12 | st.title("Fraud Analysis and Detection with Google Gen AI")
 13 | 
 14 | # Upload the CSV file
 15 | uploaded_file = st.file_uploader("Upload CSV file:")
 16 | 
 17 | # Check if the file is uploaded
 18 | if uploaded_file is not None:
 19 |     # Read the CSV file into a Pandas DataFrame
 20 |     df = pd.read_csv(uploaded_file)
 21 |         
 22 |     # Show the original DataFrame
 23 |     st.write("Original DataFrame:")
 24 |     st.dataframe(df)
 25 | 
 26 |     # Data Cleansing
 27 |     for col in df.columns:
 28 |         if 'value' in col or 'price' in col or 'cost' in col or 'amount' in col or 'Value' in col or 'Price' in col or 'Cost' in col or 'Amount' in col:
 29 |             df[col] = df[col].str.replace('$', '')
 30 |             df[col] = df[col].str.replace('£', '')
 31 |             df[col] = df[col].str.replace('€', '')
 32 |             # Remove non-numeric characters
 33 |             df[col] = df[col].replace('[^\d.-]', '', regex=True).astype(float)
 34 | 
 35 |     # Drop columns with null values more than 25%
 36 |     null_percentage = df.isnull().sum() / len(df)
 37 |     columns_to_drop = null_percentage[null_percentage > 0.25].index
 38 |     df.drop(columns=columns_to_drop, inplace=True)
 39 | 
 40 |     # Fill missing values below 25% with median
 41 |     for col in df.columns:
 42 |         if df[col].isnull().sum() > 0:  # Check if there are missing values
 43 |             if null_percentage[col] <= 0.25:
 44 |                 if df[col].dtype in ['float64', 'int64']:  # Check if missing values are below 25%
 45 |                     median_value = df[col].median()  # Calculate median for the column
 46 |                     df[col].fillna(median_value, inplace=True)
 47 |             
 48 |     # Convert object datatype columns to lowercase
 49 |     for col in df.columns:
 50 |         if df[col].dtype == 'object':  # Check if datatype is object
 51 |             df[col] = df[col].str.lower()  # Convert values to lowercase
 52 | 
 53 |     st.write("Cleaned Dataset")
 54 |     st.dataframe(df)
 55 | 
 56 | 
 57 | 
 58 |     st.write("**Countplot Barchart**")
 59 | 
 60 |     # Get the names of all columns with data type 'object' (categorical columns) excluding 'Country'
 61 |     cat_vars = [col for col in df.select_dtypes(include='object').columns if df[col].nunique() > 1 and df[col].nunique() <= 10]
 62 | 
 63 |     # Create a figure with subplots
 64 |     num_cols = len(cat_vars)
 65 |     num_rows = (num_cols + 2) // 3
 66 |     fig, axs = plt.subplots(nrows=num_rows, ncols=3, figsize=(15, 5*num_rows))
 67 |     axs = axs.flatten()
 68 | 
 69 |     # Create a countplot for the top 10 values of each categorical variable using Seaborn
 70 |     for i, var in enumerate(cat_vars):
 71 |         top_values = df[var].value_counts().head(10).index
 72 |         filtered_df = df.copy()
 73 |         filtered_df[var] = df[var].apply(lambda x: x if x in top_values else 'Other')
 74 |         sns.countplot(x=var, data=filtered_df, ax=axs[i])
 75 |         axs[i].set_title(var)
 76 |         axs[i].tick_params(axis='x', rotation=90)
 77 | 
 78 |     # Remove any extra empty subplots if needed
 79 |     if num_cols < len(axs):
 80 |         for i in range(num_cols, len(axs)):
 81 |             fig.delaxes(axs[i])
 82 | 
 83 |     # Adjust spacing between subplots
 84 |     fig.tight_layout()
 85 | 
 86 |     # Show plots using Streamlit
 87 |     st.pyplot(fig)
 88 |     fig.savefig("plot4.png")
 89 | 
 90 |     def to_markdown(text):
 91 |         text = text.replace('•', '  *')
 92 |         return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
 93 | 
 94 |     genai.configure(api_key="AIzaSyDU0F3ZmGWBrrFpmUv21ZHuJBoTbtm4mL8")
 95 | 
 96 |     import PIL.Image
 97 | 
 98 |     img = PIL.Image.open("plot4.png")
 99 |     model = genai.GenerativeModel('gemini-pro-vision')
100 |     response = model.generate_content(img)
101 | 
102 |     response = model.generate_content(["You are a professional Data Analyst, write the complete conclusion and actionable insight based on the image", img], stream=True)
103 |     response.resolve()
104 |     st.write("**Google Gemini Response About Data**")
105 |     st.write(response.text)
106 | 
107 | 
108 | 
109 |     # Get the names of all columns with data type 'int' or 'float'
110 |     num_vars = [col for col in df.select_dtypes(include=['int', 'float']).columns]
111 | 
112 |     # Create a figure with subplots
113 |     num_cols = len(num_vars)
114 |     num_rows = (num_cols + 2) // 3
115 |     fig, axs = plt.subplots(nrows=num_rows, ncols=3, figsize=(15, 5*num_rows))
116 |     axs = axs.flatten()
117 | 
118 |     # Create a histplot for each numeric variable using Seaborn
119 |     for i, var in enumerate(num_vars):
120 |         sns.histplot(df[var], ax=axs[i], kde=True)
121 |         axs[i].set_title(var)
122 |         axs[i].set_xlabel('')
123 | 
124 |     # Remove any extra empty subplots if needed
125 |     if num_cols < len(axs):
126 |         for i in range(num_cols, len(axs)):
127 |             fig.delaxes(axs[i])
128 | 
129 |     # Adjust spacing between subplots
130 |     fig.tight_layout()
131 | 
132 |     # Show plots using Streamlit
133 |     st.pyplot(fig)
134 |     fig.savefig("plot5.png")
135 | 
136 |     def to_markdown(text):
137 |         text = text.replace('•', '  *')
138 |         return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
139 | 
140 |     genai.configure(api_key="AIzaSyDU0F3ZmGWBrrFpmUv21ZHuJBoTbtm4mL8")
141 | 
142 |     img = PIL.Image.open("plot5.png")
143 |     model = genai.GenerativeModel('gemini-pro-vision')
144 |     response = model.generate_content(["You are a professional Data Analyst, write the complete conclusion and actionable insight based on the image", img], stream=True)
145 |     response.resolve()
146 |     st.write("**Google Gemini Response About Data**")
147 |     st.write(response.text)
148 | 
149 | 
150 |     # Select target variable
151 |     target_variable = st.selectbox("Select target variable:", df.columns)
152 |         
153 |     # Select columns for analysis
154 |     columns_for_analysis = st.multiselect("Select columns for analysis:", [col for col in df.columns if col != target_variable])
155 | 
156 |     # Process button
157 |     if st.button("Process"):
158 |         # Select the target variable and columns for analysis from the original DataFrame
159 |         target_variable_data = df[target_variable]
160 |         columns_for_analysis_data = df[columns_for_analysis]
161 | 
162 |         # Display target variable in a dataframe
163 |         target_variable_df = df[[target_variable]]
164 |         st.write("Target Variable DataFrame:")
165 |         st.dataframe(target_variable_df)
166 |             
167 |         # Display columns for analysis in a dataframe
168 |         columns_for_analysis_df = df[columns_for_analysis]
169 |         st.write("Columns for Analysis DataFrame:")
170 |         st.dataframe(columns_for_analysis_df)
171 |             
172 |         # Concatenate target variable and columns for analysis into a single DataFrame
173 |         df = pd.concat([target_variable_data, columns_for_analysis_data], axis=1)
174 | 
175 |         # Drop columns with null values more than 25%
176 |         null_percentage = df.isnull().sum() / len(df)
177 |         columns_to_drop = null_percentage[null_percentage > 0.25].index
178 |         df.drop(columns=columns_to_drop, inplace=True)
179 | 
180 |         # Fill missing values below 25% with median
181 |         for col in df.columns:
182 |             if df[col].isnull().sum() > 0:  # Check if there are missing values
183 |                 if null_percentage[col] <= 0.25:
184 |                     if df[col].dtype in ['float64', 'int64']:  # Check if missing values are below 25%
185 |                         median_value = df[col].median()  # Calculate median for the column
186 |                         df[col].fillna(median_value, inplace=True)
187 |             
188 |         # Convert object datatype columns to lowercase
189 |         for col in df.columns:
190 |             if df[col].dtype == 'object':  # Check if datatype is object
191 |                 df[col] = df[col].str.lower()  # Convert values to lowercase
192 | 
193 |         st.write("Cleaned Dataset")
194 |         st.dataframe(df)
195 | 
196 |         st.write("**Multiclass Barplot**")
197 |         # Get the names of all columns with data type 'object' (categorical columns)
198 |         cat_cols = df.columns.tolist()
199 | 
200 |         # Get the names of all columns with data type 'object' (categorical variables)
201 |         cat_vars = df.select_dtypes(include=['object']).columns.tolist()
202 | 
203 |         # Exclude 'Country' from the list if it exists in cat_vars
204 |         if target_variable in cat_vars:
205 |             cat_vars.remove(target_variable)
206 | 
207 |         # Create a figure with subplots, but only include the required number of subplots
208 |         num_cols = len(cat_vars)
209 |         num_rows = (num_cols + 2) // 3  # To make sure there are enough rows for the subplots
210 |         fig, axs = plt.subplots(nrows=num_rows, ncols=3, figsize=(15, 5*num_rows))
211 |         axs = axs.flatten()
212 | 
213 |         # Create a count plot for each categorical variable
214 |         for i, var in enumerate(cat_vars):
215 |             top_categories = df[var].value_counts().nlargest(10).index
216 |             filtered_df = df[df[var].notnull() & df[var].isin(top_categories)]  # Exclude rows with NaN values in the variable
217 |             sns.countplot(x=var, hue=target_variable, data=filtered_df, ax=axs[i])
218 |             axs[i].set_xticklabels(axs[i].get_xticklabels(), rotation=90)
219 | 
220 |         # Remove any remaining blank subplots
221 |         for i in range(num_cols, len(axs)):
222 |             fig.delaxes(axs[i])
223 | 
224 |         # Adjust spacing between subplots
225 |         fig.tight_layout()
226 | 
227 |         # Show plot
228 |         st.pyplot(fig)
229 |         fig.savefig("plot6.png")
230 | 
231 |         def to_markdown(text):
232 |             text = text.replace('•', '  *')
233 |             return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
234 | 
235 |         genai.configure(api_key="AIzaSyDU0F3ZmGWBrrFpmUv21ZHuJBoTbtm4mL8")
236 | 
237 |         import PIL.Image
238 | 
239 |         img = PIL.Image.open("plot6.png")
240 |         model = genai.GenerativeModel('gemini-pro-vision')
241 |         response = model.generate_content(img)
242 | 
243 |         response = model.generate_content(["You are a professional Data Analyst, write the complete conclusion and actionable insight based on the image", img], stream=True)
244 |         response.resolve()
245 |         st.write("**Google Gemini Response About Data**")
246 |         st.write(response.text)
247 | 
248 | 
249 |         st.write("**Multiclass Histplot**")
250 |         # Get the names of all columns with data type 'object' (categorical columns)
251 |         cat_cols = df.columns.tolist()
252 | 
253 |         # Get the names of all columns with data type 'int'
254 |         int_vars = df.select_dtypes(include=['int', 'float']).columns.tolist()
255 |         int_vars = [col for col in int_vars if col != target_variable]
256 | 
257 |         # Create a figure with subplots
258 |         num_cols = len(int_vars)
259 |         num_rows = (num_cols + 2) // 3  # To make sure there are enough rows for the subplots
260 |         fig, axs = plt.subplots(nrows=num_rows, ncols=3, figsize=(15, 5*num_rows))
261 |         axs = axs.flatten()
262 | 
263 |         # Create a histogram for each integer variable with hue='Attrition'
264 |         for i, var in enumerate(int_vars):
265 |             top_categories = df[var].value_counts().nlargest(10).index
266 |             filtered_df = df[df[var].notnull() & df[var].isin(top_categories)]
267 |             sns.histplot(data=df, x=var, hue=target_variable, kde=True, ax=axs[i])
268 |             axs[i].set_title(var)
269 | 
270 |         # Remove any extra empty subplots if needed
271 |         if num_cols < len(axs):
272 |             for i in range(num_cols, len(axs)):
273 |                 fig.delaxes(axs[i])
274 | 
275 |         # Adjust spacing between subplots
276 |         fig.tight_layout()
277 | 
278 |         # Show plot
279 |         st.pyplot(fig)
280 |         fig.savefig("plot7.png")
281 | 
282 |         def to_markdown(text):
283 |             text = text.replace('•', '  *')
284 |             return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
285 | 
286 |         genai.configure(api_key="AIzaSyDU0F3ZmGWBrrFpmUv21ZHuJBoTbtm4mL8")
287 | 
288 |         import PIL.Image
289 | 
290 |         img = PIL.Image.open("plot7.png")
291 |         model = genai.GenerativeModel('gemini-pro-vision')
292 |         response = model.generate_content(img)
293 | 
294 |         response = model.generate_content(["You are a professional Data Analyst, write the complete conclusion and actionable insight based on the image", img], stream=True)
295 |         response.resolve()
296 |         st.write("**Google Gemini Response About Data**")
297 |         st.write(response.text)
298 | 
299 | 
300 | 
301 | 
302 | 
303 | 


--------------------------------------------------------------------------------
/Streamlit-Web-Application-main/indonesia-bert-sentiment-classification/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "indobenchmark/indobert-base-p1",
 3 |   "_num_labels": 5,
 4 |   "architectures": [
 5 |     "BertForSequenceClassification"
 6 |   ],
 7 |   "attention_probs_dropout_prob": 0.1,
 8 |   "classifier_dropout": null,
 9 |   "directionality": "bidi",
10 |   "gradient_checkpointing": false,
11 |   "hidden_act": "gelu",
12 |   "hidden_dropout_prob": 0.1,
13 |   "hidden_size": 768,
14 |   "id2label": {
15 |     "0": "LABEL_0",
16 |     "1": "LABEL_1",
17 |     "2": "LABEL_2"
18 |   },
19 |   "initializer_range": 0.02,
20 |   "intermediate_size": 3072,
21 |   "label2id": {
22 |     "LABEL_0": 0,
23 |     "LABEL_1": 1,
24 |     "LABEL_2": 2
25 |   },
26 |   "layer_norm_eps": 1e-12,
27 |   "max_position_embeddings": 512,
28 |   "model_type": "bert",
29 |   "num_attention_heads": 12,
30 |   "num_hidden_layers": 12,
31 |   "output_past": true,
32 |   "pad_token_id": 0,
33 |   "pooler_fc_size": 768,
34 |   "pooler_num_attention_heads": 12,
35 |   "pooler_num_fc_layers": 3,
36 |   "pooler_size_per_head": 128,
37 |   "pooler_type": "first_token_transform",
38 |   "position_embedding_type": "absolute",
39 |   "problem_type": "single_label_classification",
40 |   "torch_dtype": "float32",
41 |   "transformers_version": "4.10.2",
42 |   "type_vocab_size": 2,
43 |   "use_cache": true,
44 |   "vocab_size": 50000
45 | }
46 | 


--------------------------------------------------------------------------------
/Streamlit-Web-Application-main/llmpandas.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import streamlit as st
 3 | from langchain_groq.chat_models import ChatGroq
 4 | from pandasai import SmartDataframe
 5 | import os
 6 | from PIL import Image
 7 | import textwrap
 8 | import google.generativeai as genai
 9 | from IPython.display import display
10 | from IPython.display import Markdown
11 | import io
12 | import matplotlib.pyplot as plt
13 | 
14 | # Load language model
15 | llm = ChatGroq(
16 |     model_name="mixtral-8x7b-32768",
17 |     api_key="YOUR_GROQ_API")
18 | 
19 | def main():
20 |     st.title("Ask your CSV")
21 | 
22 |     # Allow user to upload CSV file
23 |     uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
24 | 
25 |     if uploaded_file is not None:
26 |         # Read uploaded CSV file into pandas DataFrame
27 |         data = pd.read_csv(uploaded_file)
28 |         st.dataframe(data)
29 | 
30 |         # Convert DataFrame into SmartDataFrame
31 |         df = SmartDataframe(data, config={"llm": llm})
32 | 
33 |         # Add text box for user input
34 |         question = st.text_input("Ask a question about the data:")
35 | 
36 |         if st.button("Ask"):
37 |             if question:
38 |                 # Answer the user's question using the language model
39 |                 answer = df.chat(question)
40 | 
41 |                 # Display the answer
42 |                 st.write("Answer:", answer)
43 | 
44 |                 # Check if the answer is a visualization
45 |                 if isinstance(answer, str) and os.path.exists(answer):
46 |                     # Open the image file
47 |                     image = Image.open(answer)
48 |                     # Display the image
49 |                     st.image(image, caption="Visualization")
50 | 
51 |                     # Save the figure as result.png
52 |                     plt.savefig("result.png")
53 | 
54 |                     # Generate content using Google Gemini
55 |                     def to_markdown(text):
56 |                         text = text.replace('•', '  *')
57 |                         return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
58 | 
59 |                     genai.configure(api_key="YOUR_GOOGLE_GEMINI_API")
60 |                     model = genai.GenerativeModel('gemini-pro-vision')
61 | 
62 |                     img1 = Image.open("result.png")
63 |                     response = model.generate_content(["You are a Professional Data Analyst, give a conclusion and actionable insight based on the visualization", img1], stream=True)
64 |                     response.resolve()
65 | 
66 |                     st.write("**Google Gemini Response About Data**")
67 |                     st.write(response.text)
68 |                 else:
69 |                     st.warning("No visualization found.")
70 |             else:
71 |                 st.warning("Please ask a question.")
72 | 
73 | if __name__ == "__main__":
74 |     main()
75 | 
76 | 
77 | 
78 | 
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/Streamlit-Web-Application-main/pdf_comparer.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import seaborn as sns
  3 | import matplotlib.pyplot as plt
  4 | import PyPDF2
  5 | sns.set_theme(color_codes=True)
  6 | import pandas as pd
  7 | from io import StringIO
  8 | import re
  9 | import os
 10 | import pathlib
 11 | import textwrap
 12 | import google.generativeai as genai
 13 | from IPython.display import display
 14 | from IPython.display import Markdown
 15 | # import StemmerFactory class
 16 | from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
 17 | # create stemmer
 18 | factory = StemmerFactory()
 19 | stemmer = factory.create_stemmer()
 20 | 
 21 | st.title("PDF Document Comparison")
 22 | 
 23 | additional_stopwords = st.text_input("Enter additional stopwords (comma-separated)", value="")
 24 | additional_stopwords = additional_stopwords.split(",")
 25 | 
 26 | from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
 27 | stop_factory = StopWordRemoverFactory()
 28 | more_stopword = ['dengan', 'ia','bahwa','oleh','rp','undang','pasal','ayat','bab']
 29 | data = stop_factory.get_stop_words()+more_stopword + additional_stopwords
 30 | stopword = stop_factory.create_stop_word_remover()
 31 | 
 32 | # Function to read PDF and return string
 33 | def read_pdf(file):
 34 |     # Create a PyPDF2 reader object
 35 |     pdf_reader = PyPDF2.PdfFileReader(file)
 36 | 
 37 |     # Extract text from all pages of PDF
 38 |     text = ""
 39 |     for page in range(pdf_reader.getNumPages()):
 40 |         text += pdf_reader.getPage(page).extractText()
 41 | 
 42 |     # Return the text as a string
 43 |     return text
 44 | 
 45 | # Upload PDF file
 46 | file = st.file_uploader("Upload a PDF file", type="pdf", key='text1')
 47 | 
 48 | # If file is uploaded
 49 | if file is not None:
 50 |     # Call read_pdf function to convert PDF to string
 51 |     text1 = read_pdf(file)
 52 | 
 53 | 
 54 | # Function to read PDF and return string
 55 | def read_pdf(file):
 56 |     # Create a PyPDF2 reader object
 57 |     pdf_reader = PyPDF2.PdfFileReader(file)
 58 | 
 59 |     # Extract text from all pages of PDF
 60 |     text = ""
 61 |     for page in range(pdf_reader.getNumPages()):
 62 |         text += pdf_reader.getPage(page).extractText()
 63 | 
 64 |     # Return the text as a string
 65 |     return text
 66 | 
 67 | # Upload PDF file
 68 | file = st.file_uploader("Upload a PDF file", type="pdf", key='text2')
 69 | 
 70 | # If file is uploaded
 71 | if file is not None:
 72 |     # Call read_pdf function to convert PDF to string
 73 |     text2 = read_pdf(file)
 74 | 
 75 | 
 76 | if st.button("Process"):
 77 | 
 78 |     sentence1 = text1
 79 |     output1   = stemmer.stem(sentence1)
 80 | 
 81 |     hasil1 = re.sub(r"\d+", "", output1)
 82 |     hasil1 = re.sub(r'[^a-zA-Z\s]','',output1)
 83 | 
 84 |     pattern = re.compile(r'\b(' + r'|'.join(data) + r')\b\s*')
 85 |     hasil1 = pattern.sub('', hasil1)
 86 | 
 87 | 
 88 |     sentence2 = text2
 89 |     output2   = stemmer.stem(sentence2)
 90 | 
 91 |     hasil2 = re.sub(r"\d+", "", output2)
 92 |     hasil2 = re.sub(r'[^a-zA-Z\s]','',output2)
 93 | 
 94 |     pattern = re.compile(r'\b(' + r'|'.join(data) + r')\b\s*')
 95 |     hasil2 = pattern.sub('', hasil2)
 96 | 
 97 |     documents = [hasil1, hasil2]
 98 |     from sklearn.feature_extraction.text import CountVectorizer
 99 |     import pandas as pd
100 | 
101 |     # Create the Document Term Matrix
102 |     count_vectorizer = CountVectorizer(stop_words='english')
103 |     count_vectorizer = CountVectorizer()
104 |     sparse_matrix = count_vectorizer.fit_transform(documents)
105 |     from sklearn.metrics.pairwise import cosine_similarity
106 |     cosine_sim = cosine_similarity(sparse_matrix, sparse_matrix)
107 | 
108 | 
109 |     plt.rcParams.update({'font.size': 26})
110 | 
111 |     heatmap = plt.figure(figsize =(5, 5))
112 |     sns.heatmap(cosine_sim, fmt='.2g', annot=True)
113 | 
114 | 
115 |     import matplotlib.pyplot as plt
116 |     from wordcloud import WordCloud
117 | 
118 |     # Create a WordCloud object
119 |     wordcloud = WordCloud(min_font_size=3,max_words=200,width=1600,height=720,
120 |                        colormap = 'Set2', background_color='white').generate(hasil1)
121 | 
122 |     # Display the WordCloud using Matplotlib and Streamlit
123 |     fig, ax = plt.subplots()
124 |     ax.imshow(wordcloud, interpolation='bilinear')
125 |     ax.axis('off')
126 | 
127 | 
128 |     # Create a WordCloud object
129 |     wordcloud = WordCloud(min_font_size=3,max_words=200,width=1600,height=720,
130 |                        colormap = 'Set2', background_color='white').generate(hasil2)
131 | 
132 |     # Display the WordCloud using Matplotlib and Streamlit
133 |     fig2, ax = plt.subplots()
134 |     ax.imshow(wordcloud, interpolation='bilinear')
135 |     ax.axis('off')
136 | 
137 | 
138 |     str=hasil1+hasil2
139 |     # Create a WordCloud object
140 |     wordcloud = WordCloud(min_font_size=3,max_words=200,width=1600,height=720,
141 |                        colormap = 'Set2', background_color='white').generate(str)
142 | 
143 |     # Display the WordCloud using Matplotlib and Streamlit
144 |     fig3, ax = plt.subplots()
145 |     ax.imshow(wordcloud, interpolation='bilinear')
146 |     ax.axis('off')
147 | 
148 | 
149 | 
150 |     #bigram visualization
151 |     import collections
152 |     # Get bigrams
153 |     words1 = hasil1.split()
154 |     bigrams = list(zip(words1, words1[1:]))
155 | 
156 |     # Count bigrams
157 |     bigram_counts = collections.Counter(bigrams)
158 | 
159 |     # Get top 10 bigram counts
160 |     top_bigrams = dict(bigram_counts.most_common(10))
161 | 
162 |     # Create bar chart
163 |     plt.rcParams.update({'font.size': 12})
164 |     fig4, ax = plt.subplots()
165 |     ax.bar(range(len(top_bigrams)), list(top_bigrams.values()), align='center')
166 |     ax.set_xticks(range(len(top_bigrams)))
167 |     ax.set_xticklabels(list(top_bigrams.keys()))
168 |     ax.set_xlabel('Bigram Words')
169 |     ax.set_ylabel('Count')
170 |     ax.set_title('Top 10 Bigram Word Counts')
171 |     plt.xticks(rotation=90)
172 |     plt.figure(figsize =(15, 15))
173 |     
174 | 
175 | 
176 | 
177 |     #bigram visualization
178 |     import collections
179 |     # Get bigrams
180 |     words2 = hasil2.split()
181 |     bigrams = list(zip(words2, words2[1:]))
182 | 
183 |     # Count bigrams
184 |     bigram_counts = collections.Counter(bigrams)
185 | 
186 |     # Get top 10 bigram counts
187 |     top_bigrams = dict(bigram_counts.most_common(10))
188 | 
189 |     # Create bar chart
190 |     plt.rcParams.update({'font.size': 12})
191 |     fig5, ax = plt.subplots()
192 |     ax.bar(range(len(top_bigrams)), list(top_bigrams.values()), align='center')
193 |     ax.set_xticks(range(len(top_bigrams)))
194 |     ax.set_xticklabels(list(top_bigrams.keys()))
195 |     ax.set_xlabel('Bigram Words')
196 |     ax.set_ylabel('Count')
197 |     ax.set_title('Top 10 Bigram Word Counts')
198 |     plt.xticks(rotation=90)
199 |     plt.figure(figsize =(15, 15))
200 | 
201 |     st.write("**Accuracy**")
202 |     st.write(heatmap)
203 | 
204 |     st.write("**WordCloud Document 1**")
205 |     st.pyplot(fig)
206 | 
207 |     st.write("**WordCloud Document 2**")
208 |     st.pyplot(fig2)
209 | 
210 |     st.write("**WordCloud From Both Documents**")
211 |     st.pyplot(fig3)
212 | 
213 |     st.write("**Bi-Gram for Document 1**")
214 |     st.pyplot(fig4)
215 | 
216 |     st.write("**Bi-Gram for Document 2**")
217 |     st.pyplot(fig5)
218 | 
219 | 
220 |     def to_markdown(text):
221 |         text = text.replace('•', '  *')
222 |         return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
223 |     
224 |     # Configure genai with API key
225 |     genai.configure(api_key="AIzaSyB2sQh_oHbFULJ7x2vixJWAboPpPvrCKoA")
226 | 
227 |     # Instantiate the model
228 |     model = genai.GenerativeModel('gemini-1.0-pro-latest')
229 | 
230 |     # Generate content
231 |     response = model.generate_content(["Compare the simmilarities and give some conclusion between these 2 PDF Document : ", hasil1, "and", hasil2], stream=True)
232 |     response.resolve()
233 |     st.write("**Google Gemini Response About Data**")
234 |     st.write(response.text)


--------------------------------------------------------------------------------
/Streamlit-Web-Application-main/pdf_document_analysis.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import pandas as pd
  3 | from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
  4 | from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
  5 | from wordcloud import WordCloud
  6 | import PyPDF2
  7 | import re
  8 | from io import StringIO
  9 | import plotly.express as px
 10 | import pandas as pd
 11 | import collections
 12 | import seaborn as sns
 13 | sns.set_theme(color_codes=True)
 14 | import os
 15 | import pathlib
 16 | import textwrap
 17 | import google.generativeai as genai
 18 | from IPython.display import display
 19 | from IPython.display import Markdown
 20 | import PIL.Image
 21 | import matplotlib.pyplot as plt
 22 | 
 23 | st.title("NLP : PDF Document Analysis")
 24 | st.set_option('deprecation.showPyplotGlobalUse', False)
 25 | 
 26 | # Function to convert text to Markdown format
 27 | def to_markdown(text):
 28 |     text = text.replace('•', '  *')
 29 |     return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
 30 | 
 31 | # Create stemmer
 32 | factory = StemmerFactory()
 33 | stemmer = factory.create_stemmer()
 34 | 
 35 | # Create stopword remover
 36 | stop_factory = StopWordRemoverFactory()
 37 | more_stopword = ['dengan', 'ia', 'bahwa', 'oleh', 'rp', 'undang', 'pasal', 'ayat', 'bab']
 38 | data = stop_factory.get_stop_words() + more_stopword
 39 | 
 40 | # User input for custom stopwords
 41 | custom_stopwords = st.text_input("Enter custom stopwords (comma-separated):")
 42 | if custom_stopwords:
 43 |     custom_stopword_list = [word.strip() for word in custom_stopwords.split(",")]
 44 |     data.extend(custom_stopword_list)
 45 | 
 46 | # Function to read PDF and return string
 47 | def read_pdf(file):
 48 |     pdf_reader = PyPDF2.PdfFileReader(file)
 49 |     text = ""
 50 |     for page in range(pdf_reader.getNumPages()):
 51 |         text += pdf_reader.getPage(page).extractText()
 52 |     return text
 53 | 
 54 | # Upload PDF file
 55 | file = st.file_uploader("Upload a PDF file", type="pdf", key='text1')
 56 | 
 57 | # If file is uploaded
 58 | if file is not None:
 59 |     # Call read_pdf function to convert PDF to string
 60 |     text1 = read_pdf(file)
 61 | 
 62 |     # Stem and preprocess the text
 63 |     sentence1 = text1
 64 |     output1 = stemmer.stem(sentence1)
 65 |     hasil1 = re.sub(r"\d+", "", output1)
 66 |     hasil1 = re.sub(r'[^a-zA-Z\s]', '', hasil1)
 67 |     pattern = re.compile(r'\b(' + r'|'.join(data) + r')\b\s*')
 68 |     hasil1 = pattern.sub('', hasil1)
 69 | 
 70 |     # Create WordCloud
 71 |     wordcloud = WordCloud(
 72 |         min_font_size=3, max_words=200, width=800, height=400,
 73 |         colormap='Set2', background_color='white'
 74 |     ).generate(hasil1)
 75 | 
 76 |     # Save the WordCloud image
 77 |     wordcloud_file = "wordcloud.png"
 78 |     wordcloud.to_file(wordcloud_file)
 79 | 
 80 |     # Display the WordCloud using Streamlit
 81 |     st.subheader(f"Wordcloud Visualization")
 82 |     st.image(wordcloud_file)
 83 | 
 84 |     # Use Google Gemini API to generate content based on the uploaded image
 85 |     st.subheader("Google Gemini Response")
 86 | 
 87 |     # Load the image
 88 |     img = PIL.Image.open(wordcloud_file)
 89 | 
 90 |     # Configure and use the GenerativeAI model
 91 |     genai.configure(api_key="AIzaSyDU0F3ZmGWBrrFpmUv21ZHuJBoTbtm4mL8")
 92 |     model = genai.GenerativeModel('gemini-pro-vision')
 93 |     response = model.generate_content(["You are a professional Data Analyst, write the complete conclusion and actionable insight based on the image", img], stream=True)
 94 |     response.resolve()
 95 | 
 96 |     # Display Gemini API response in Markdown format
 97 |     st.write(response.text)
 98 | 
 99 |     # Use Google Gemini API to generate content based on the WordCloud image
100 |     genai.configure(api_key="AIzaSyDU0F3ZmGWBrrFpmUv21ZHuJBoTbtm4mL8")
101 |     model = genai.GenerativeModel('gemini-pro-vision')
102 |     response_gemini = model.generate_content(["You are a professional Data Analyst, write the complete conclusion and actionable insight based on the image", img], stream=True)
103 |     response_gemini.resolve()
104 | 
105 |     # Bigram visualization
106 |     # Get bigrams
107 |     words1 = hasil1.split()
108 |     # Get bigrams
109 |     bigrams = list(zip(words1, words1[1:]))
110 | 
111 |     # Count bigrams
112 |     bigram_counts = collections.Counter(bigrams)
113 | 
114 |     # Get top 10 bigram counts
115 |     top_bigrams = dict(bigram_counts.most_common(10))
116 | 
117 |     # Create bar chart
118 |     plt.figure(figsize=(10, 7))
119 |     plt.bar(range(len(top_bigrams)), list(top_bigrams.values()), align='center')
120 |     plt.xticks(range(len(top_bigrams)), list(top_bigrams.keys()), rotation=90)
121 |     plt.xlabel('Bigram Words')
122 |     plt.ylabel('Count')
123 |     plt.title(f"Top 10 Bigram from PDF Document")
124 | 
125 |     # Add Gemini response text to the plot
126 |     gemini_response_text = response_gemini.text
127 |    
128 |     # Save the entire plot as a PNG
129 |     plt.tight_layout()
130 |     plt.savefig("bigram_with_gemini_response.png")
131 | 
132 |     # Display the plot and Gemini response in Streamlit
133 |     st.subheader("Bigram for PDF Document")
134 |     st.image("bigram_with_gemini_response.png")
135 |     st.subheader("Google Gemini Response")
136 |     st.write(gemini_response_text)
137 | 


--------------------------------------------------------------------------------
/Streamlit-Web-Application-main/table_scraper_analysis.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import requests
  3 | from bs4 import BeautifulSoup
  4 | import pandas as pd
  5 | import pathlib
  6 | import textwrap
  7 | import google.generativeai as genai
  8 | from IPython.display import display
  9 | from IPython.display import Markdown
 10 | 
 11 | def scrape_tables(url):
 12 |   """
 13 |   Scrapes all tables from a given URL and returns them as a list of DataFrames.
 14 | 
 15 |   Args:
 16 |       url: The URL of the webpage to scrape.
 17 | 
 18 |   Returns:
 19 |       A list of pandas DataFrames, each representing a scraped table.
 20 |   """
 21 |   # Fetch the HTML content
 22 |   response = requests.get(url)
 23 |   response.raise_for_status()  # Raise an error if the request fails
 24 | 
 25 |   # Parse the HTML content
 26 |   soup = BeautifulSoup(response.content, "html.parser")
 27 | 
 28 |   # Find all tables
 29 |   tables = soup.find_all("table")
 30 | 
 31 |   # Extract data and convert to DataFrames
 32 |   all_dataframes = []
 33 |   for table in tables:
 34 |     # Extract rows from the table
 35 |     rows = table.find_all("tr")
 36 |     table_data = []
 37 |     for row in rows:
 38 |       # Extract cells from each row
 39 |       cells = row.find_all(["th", "td"])  # Consider both headers and data cells
 40 |       row_data = [cell.text.strip() for cell in cells]  # Extract text and strip whitespace
 41 |       table_data.append(row_data)
 42 | 
 43 |     # Check if there's data before creating a DataFrame
 44 |     if table_data:
 45 |       df = pd.DataFrame(table_data)
 46 |       all_dataframes.append(df)
 47 | 
 48 |   return all_dataframes
 49 | 
 50 | def display_and_modify_tables(dataframes):
 51 |   """
 52 |   Displays scraped DataFrames in Streamlit and allows user interaction for modifications.
 53 | 
 54 |   Args:
 55 |       dataframes: A list of pandas DataFrames containing scraped data.
 56 |   """
 57 |   # Display all scraped tables (head)
 58 |   if dataframes:
 59 |     st.subheader("Scraped Tables:")
 60 |     for i, df in enumerate(dataframes):
 61 |       st.write(f"Table {i+1}")
 62 |       st.dataframe(df.head())  # Show only the head (first few rows)
 63 | 
 64 |     # Table selection for modification
 65 |     selected_table_index = st.selectbox("Select a Table to Modify", range(len(dataframes)))
 66 |     selected_df = dataframes[selected_table_index]
 67 | 
 68 |     # Display the full selected table
 69 |     st.subheader(f"Selected Table {selected_table_index+1}")
 70 |     st.dataframe(selected_df)
 71 | 
 72 |     # Row selection for removal with multi-select
 73 |     rows_to_remove = st.multiselect("Select rows to remove (0-based):", selected_df.index.tolist(), key="rows_to_remove")
 74 | 
 75 |     # Combined button for row removal with confirmation
 76 |     if st.button("Remove Selected Rows"):
 77 |       if rows_to_remove:  # Check if any rows were selected
 78 |         try:
 79 |           selected_df.drop(rows_to_remove, axis=0, inplace=True)  # Remove rows
 80 |           st.success(f"Selected rows removed successfully!")
 81 |           # Display the modified DataFrame
 82 |           st.subheader(f"Modified Table {selected_table_index+1}")
 83 |           st.dataframe(selected_df)
 84 |         except Exception as e:
 85 |           st.error(f"Error removing rows: {e}")
 86 | 
 87 |     # --- Google Gemini Integration ---
 88 |     # Convert the DataFrame to a string variable
 89 |     df_string = selected_df.to_string()
 90 | 
 91 |     # Configure genai with API key (replace with your actual key)
 92 |     genai.configure(api_key="AIzaSyB2sQh_oHbFULJ7x2vixJWAboPpPvrCKoA")  # Replace with your Google GenerativeAI API key
 93 | 
 94 |     model = genai.GenerativeModel('gemini-1.0-pro-latest')
 95 | 
 96 |     try:
 97 |         # Generate content with Gemini
 98 |         response = model.generate_content(["You are a Professional Data Analyst, Make a Summary and actionable insight based on the csv dataset here :", df_string], stream=True)
 99 |         response.resolve()
100 |         st.write("**Google Gemini Response About Data**")
101 |         st.write(response.text)
102 |     except Exception as e:
103 |         st.error(f"Error generating content with Google Gemini: {e}")
104 | 
105 |   
106 | # Streamlit app
107 | st.title("Table Scraper and Modifier App")
108 | url = st.text_input("Enter the URL to scrape:")
109 | if url:
110 |     try:
111 |         scraped_dataframes = scrape_tables(url)
112 |         display_and_modify_tables(scraped_dataframes)
113 |     except requests.exceptions.RequestException as e:
114 |         st.error(f"An error occurred scraping the URL: {e}")
115 |         
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/Streamlit-Web-Application-main/web_scrape.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import requests
  3 | from bs4 import BeautifulSoup
  4 | import spacy
  5 | import pandas as pd
  6 | import matplotlib.pyplot as plt
  7 | import seaborn as sns
  8 | sns.set_theme(color_codes=True)
  9 | 
 10 | st.title("Web Article Summarizer")
 11 | 
 12 | target_url = st.text_input("Enter the target URL:")
 13 | process_button = st.button("Scrape Text")  # Button text adjusted
 14 | 
 15 | def scrape_text(url):
 16 |   """Scrapes text from a website and returns the extracted text.
 17 | 
 18 |   Args:
 19 |       url: The URL of the website to scrape.
 20 | 
 21 |   Returns:
 22 |       The scraped text content as a string, or None if there's an error.
 23 |   """
 24 | 
 25 |   if not url:  # Check if URL is empty
 26 |     return None
 27 | 
 28 |   try:
 29 |     # Send HTTP request and parse HTML content
 30 |     response = requests.get(url)
 31 |     soup = BeautifulSoup(response.content, "html.parser")
 32 | 
 33 |     # Extract text based on your desired method (modify as needed)
 34 |     # Here, we're extracting text from all paragraphs
 35 |     paragraphs = soup.find_all("p")
 36 |     paragraph_text = []
 37 |     for paragraph in paragraphs[:2]:  # Limit to first 2 paragraphs
 38 |       paragraph_text.append(paragraph.text.strip())
 39 | 
 40 |     # Combine text from all paragraphs (limited to first 2)
 41 |     all_paragraph_text = "\n".join(paragraph_text)
 42 | 
 43 |     return all_paragraph_text
 44 |   except Exception as e:
 45 |     st.error(f"Error scraping text: {e}")
 46 |     return None
 47 | 
 48 | if process_button:  # Only execute if button is clicked
 49 |   scraped_text = scrape_text(target_url)
 50 | 
 51 |   if scraped_text:
 52 |     st.success("Text scraped successfully!")
 53 |     st.subheader("Showing First Paragraphs of Article:")
 54 |     st.write(scraped_text)  # Show only the first 2 paragraphs
 55 | 
 56 |     # Load English tokenizer, tagger, parser and NER
 57 |     nlp = spacy.load("en_core_web_sm")
 58 | 
 59 |     # Process the scraped text
 60 |     doc = nlp(scraped_text)
 61 | 
 62 |     # Analyze syntax - Extract Noun Phrases
 63 |     noun_phrases = [chunk.text for chunk in doc.noun_chunks]
 64 | 
 65 |     # Create DataFrame using Pandas (alternative to columns argument)
 66 |     noun_phrases_df = pd.DataFrame(noun_phrases, columns=["Noun Phrase"])  # Create DataFrame with Pandas
 67 | 
 68 |     # Display Noun Phrases in Streamlit table
 69 |     st.subheader("Noun Phrases:")
 70 |     st.dataframe(noun_phrases_df)
 71 | 
 72 |     # Analyze syntax - Extract Verbs
 73 |     verbs = [token.lemma_ for token in doc if token.pos_ == "VERB"]
 74 | 
 75 |     # Create DataFrame for Verbs
 76 |     verbs_df = pd.DataFrame(verbs, columns=["Verb"])
 77 | 
 78 |     # Display Verbs in Streamlit table
 79 |     st.subheader("Verbs:")
 80 |     st.dataframe(verbs_df)
 81 | 
 82 | 
 83 |     # Analyze Part-of-Speech Distribution
 84 |     pos_counts = {token.pos_: 0 for token in doc}
 85 |     for token in doc:
 86 |       pos_counts[token.pos_] += 1
 87 | 
 88 |     # Create Part-of-Speech Distribution Plot (using matplotlib)
 89 |     plt.figure(figsize=(8, 6))
 90 |     plt.bar(pos_counts.keys(), pos_counts.values())
 91 |     plt.xlabel("Part of Speech")
 92 |     plt.ylabel("Count")
 93 |     plt.xticks(rotation=45)
 94 |     plt.tight_layout()
 95 | 
 96 |     # Display Part-of-Speech Distribution Plot in Streamlit
 97 |     st.subheader("Part-of-Speech Distribution :")
 98 |     st.pyplot(plt)
 99 | 
100 |   else:
101 |     st.warning("No text found on the provided URL or an error occurred.")
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/Tableau/Dashboard 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MagicDash91/All-of-Data-Science-Project/734e54ff951d39fb8d7ba007dcc9c82859ac7ec6/Tableau/Dashboard 1.png


--------------------------------------------------------------------------------