├── AI_Technical_Analysis.py
├── AutoScraper Tutorial.ipynb
├── LLM_timeseries_crypto.ipynb
├── Pandas_Top_5_Tricks.ipynb
├── README.md
├── Technical_Indicators_For_Machine_Learning.ipynb
├── Yelp API Notebook.ipynb
├── ai_coding_agent_tutorial.ipynb
├── ai_image_generator.py
├── ai_sentiment_analysis_gemini.ipynb
├── ai_stocks_prediction.py
├── app_streamlit_app_builder_ai.py
├── fast.py
├── flight_delay_ML_project.ipynb
├── scikit-ollama-tutorial.ipynb
├── stock_sentiment_agents.ipynb
├── stocks_dashboard.py
└── structured_outputs.ipynb


/AI_Technical_Analysis.py:
--------------------------------------------------------------------------------
  1 | ## Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts)
  2 | 
  3 | #### NOTE: Set yfinance to the following version to get chart working: "pip install yfinance==0.2.40"
  4 | 
  5 | import streamlit as st
  6 | import yfinance as yf
  7 | import pandas as pd
  8 | import plotly.graph_objects as go
  9 | import ollama
 10 | import tempfile
 11 | import base64
 12 | import os
 13 | 
 14 | # Set up Streamlit app
 15 | st.set_page_config(layout="wide")
 16 | st.title("AI-Powered Technical Stock Analysis Dashboard")
 17 | st.sidebar.header("Configuration")
 18 | 
 19 | # Input for stock ticker and date range
 20 | ticker = st.sidebar.text_input("Enter Stock Ticker (e.g., AAPL):", "AAPL")
 21 | start_date = st.sidebar.date_input("Start Date", value=pd.to_datetime("2023-01-01"))
 22 | end_date = st.sidebar.date_input("End Date", value=pd.to_datetime("2024-12-14"))
 23 | 
 24 | # Fetch stock data
 25 | if st.sidebar.button("Fetch Data"):
 26 |     st.session_state["stock_data"] = yf.download(ticker, start=start_date, end=end_date)
 27 |     st.success("Stock data loaded successfully!")
 28 | 
 29 | # Check if data is available
 30 | if "stock_data" in st.session_state:
 31 |     data = st.session_state["stock_data"]
 32 | 
 33 |     # Plot candlestick chart
 34 |     fig = go.Figure(data=[
 35 |         go.Candlestick(
 36 |             x=data.index,
 37 |             open=data['Open'],
 38 |             high=data['High'],
 39 |             low=data['Low'],
 40 |             close=data['Close'],
 41 |             name="Candlestick"  # Replace "trace 0" with "Candlestick"
 42 |         )
 43 |     ])
 44 | 
 45 |     # Sidebar: Select technical indicators
 46 |     st.sidebar.subheader("Technical Indicators")
 47 |     indicators = st.sidebar.multiselect(
 48 |         "Select Indicators:",
 49 |         ["20-Day SMA", "20-Day EMA", "20-Day Bollinger Bands", "VWAP"],
 50 |         default=["20-Day SMA"]
 51 |     )
 52 | 
 53 |     # Helper function to add indicators to the chart
 54 |     def add_indicator(indicator):
 55 |         if indicator == "20-Day SMA":
 56 |             sma = data['Close'].rolling(window=20).mean()
 57 |             fig.add_trace(go.Scatter(x=data.index, y=sma, mode='lines', name='SMA (20)'))
 58 |         elif indicator == "20-Day EMA":
 59 |             ema = data['Close'].ewm(span=20).mean()
 60 |             fig.add_trace(go.Scatter(x=data.index, y=ema, mode='lines', name='EMA (20)'))
 61 |         elif indicator == "20-Day Bollinger Bands":
 62 |             sma = data['Close'].rolling(window=20).mean()
 63 |             std = data['Close'].rolling(window=20).std()
 64 |             bb_upper = sma + 2 * std
 65 |             bb_lower = sma - 2 * std
 66 |             fig.add_trace(go.Scatter(x=data.index, y=bb_upper, mode='lines', name='BB Upper'))
 67 |             fig.add_trace(go.Scatter(x=data.index, y=bb_lower, mode='lines', name='BB Lower'))
 68 |         elif indicator == "VWAP":
 69 |             data['VWAP'] = (data['Close'] * data['Volume']).cumsum() / data['Volume'].cumsum()
 70 |             fig.add_trace(go.Scatter(x=data.index, y=data['VWAP'], mode='lines', name='VWAP'))
 71 | 
 72 |     # Add selected indicators to the chart
 73 |     for indicator in indicators:
 74 |         add_indicator(indicator)
 75 | 
 76 |     fig.update_layout(xaxis_rangeslider_visible=False)
 77 |     st.plotly_chart(fig)
 78 | 
 79 |     # Analyze chart with LLaMA 3.2 Vision
 80 |     st.subheader("AI-Powered Analysis")
 81 |     if st.button("Run AI Analysis"):
 82 |         with st.spinner("Analyzing the chart, please wait..."):
 83 |             # Save chart as a temporary image
 84 |             with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
 85 |                 fig.write_image(tmpfile.name)
 86 |                 tmpfile_path = tmpfile.name
 87 | 
 88 |             # Read image and encode to Base64
 89 |             with open(tmpfile_path, "rb") as image_file:
 90 |                 image_data = base64.b64encode(image_file.read()).decode('utf-8')
 91 | 
 92 |             # Prepare AI analysis request
 93 |             messages = [{
 94 |                 'role': 'user',
 95 |                 'content': """You are a Stock Trader specializing in Technical Analysis at a top financial institution.
 96 |                             Analyze the stock chart's technical indicators and provide a buy/hold/sell recommendation.
 97 |                             Base your recommendation only on the candlestick chart and the displayed technical indicators.
 98 |                             First, provide the recommendation, then, provide your detailed reasoning.
 99 |                 """,
100 |                 'images': [image_data]
101 |             }]
102 |             response = ollama.chat(model='llama3.2-vision', messages=messages)
103 | 
104 |             # Display AI analysis result
105 |             st.write("**AI Analysis Results:**")
106 |             st.write(response["message"]["content"])
107 | 
108 |             # Clean up temporary file
109 |             os.remove(tmpfile_path)
110 | 
111 | 
112 | 
113 |             
114 | 


--------------------------------------------------------------------------------
/AutoScraper Tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts)"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "# AutoScraper Tutorial"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "### Example 1: Pulling Text"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "from autoscraper import AutoScraper\n",
 31 |     "\n",
 32 |     "# Web Page to Scrape from\n",
 33 |     "url = 'https://www.noaa.gov/media-releases'\n",
 34 |     "\n",
 35 |     "# Example Text to Pull\n",
 36 |     "# Note: Change below to most recent news release headline on 'https://www.noaa.gov/media-releases'\n",
 37 |     "news_list = [\"Applications now open nationwide for community-led heat-monitoring campaigns\"]\n",
 38 |     "\n",
 39 |     "# Initialize AutoScraper\n",
 40 |     "scraper = AutoScraper()\n",
 41 |     "\n",
 42 |     "# Build Model\n",
 43 |     "news_result = scraper.build(url, news_list)\n",
 44 |     "\n",
 45 |     "# Review Results\n",
 46 |     "news_result"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "### Example 2: Pulling Tabular Data"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "from autoscraper import AutoScraper\n",
 63 |     "\n",
 64 |     "# Web Page to Scrape from\n",
 65 |     "url = 'https://en.wikipedia.org/wiki/List_of_counties_in_California'\n",
 66 |     "\n",
 67 |     "# Example Text to Pull\n",
 68 |     "county_list = [\"Alameda County\",\"Yuba County\"]\n",
 69 |     "est_list = [\"1,622,188\",\"85,722\"]\n",
 70 |     "\n",
 71 |     "\n",
 72 |     "# Initialize AutoScraper\n",
 73 |     "scraper = AutoScraper()\n",
 74 |     "\n",
 75 |     "# Build Model\n",
 76 |     "county_result = scraper.build(url, county_list)\n",
 77 |     "est_result = scraper.build(url, est_list)\n",
 78 |     "\n",
 79 |     "# Review Results\n",
 80 |     "print(county_result)\n",
 81 |     "print(est_result)\n"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "import pandas as pd\n",
 91 |     "\n",
 92 |     "# Convert Lists to Dictionary\n",
 93 |     "data = {'County': county_result, 'Estimated Population': est_result}\n",
 94 |     "\n",
 95 |     "# Convert Dictionary to Dataframe\n",
 96 |     "df = pd.DataFrame(data)\n",
 97 |     "\n",
 98 |     "df"
 99 |    ]
100 |   }
101 |  ],
102 |  "metadata": {
103 |   "kernelspec": {
104 |    "display_name": "general_env",
105 |    "language": "python",
106 |    "name": "python3"
107 |   },
108 |   "language_info": {
109 |    "codemirror_mode": {
110 |     "name": "ipython",
111 |     "version": 3
112 |    },
113 |    "file_extension": ".py",
114 |    "mimetype": "text/x-python",
115 |    "name": "python",
116 |    "nbconvert_exporter": "python",
117 |    "pygments_lexer": "ipython3",
118 |    "version": "3.12.3"
119 |   }
120 |  },
121 |  "nbformat": 4,
122 |  "nbformat_minor": 2
123 | }
124 | 


--------------------------------------------------------------------------------
/LLM_timeseries_crypto.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Predicting Crypto with LLMs"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Libraries"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 10,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import pandas as pd\n",
 24 |     "import numpy as np\n",
 25 |     "from datetime import datetime, timedelta\n",
 26 |     "import yfinance as yf\n",
 27 |     "from statsmodels.tsa.arima.model import ARIMA\n",
 28 |     "from langchain_community.llms import Ollama"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "## Pull Crypto data"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 11,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "# Pull stock data from yfinance for the past month\n",
 45 |     "def pull_stocks(ticker):\n",
 46 |     "    end_date = datetime.today()\n",
 47 |     "    start_date = end_date - timedelta(days=30)\n",
 48 |     "    stock_data = yf.Ticker(ticker)\n",
 49 |     "    stock_df = stock_data.history(start=start_date, end=end_date)\n",
 50 |     "    stock_df.index = stock_df.index.tz_localize(None)  # Ensure stock data is timezone-naive\n",
 51 |     "    stock_df = stock_df.reset_index()\n",
 52 |     "    stock_df['Date'] = stock_df['Date'].dt.strftime('%Y-%m-%d')\n",
 53 |     "\n",
 54 |     "    stock_df['pct_change'] = stock_df['Close'].pct_change()\n",
 55 |     "\n",
 56 |     "    stock_df = stock_df[stock_df['pct_change'].notna()==True]\n",
 57 |     "\n",
 58 |     "    stock_df = stock_df[['Date','pct_change']]\n",
 59 |     "\n",
 60 |     "    actual_final = stock_df.tail(1)\n",
 61 |     "\n",
 62 |     "    # stock_df = stock_df.iloc[:-1]\n",
 63 |     "\n",
 64 |     "    return stock_df, actual_final\n",
 65 |     "\n",
 66 |     "btc, btc_final = pull_stocks('BTC-USD')\n",
 67 |     "eth, eth_final = pull_stocks('ETH-USD')\n",
 68 |     "xrp, xrp_final = pull_stocks('XRP-USD')\n",
 69 |     "\n"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "## Run ARIMA"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 12,
 82 |    "metadata": {},
 83 |    "outputs": [
 84 |     {
 85 |      "name": "stderr",
 86 |      "output_type": "stream",
 87 |      "text": [
 88 |       "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
 89 |       "  self._init_dates(dates, freq)\n",
 90 |       "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
 91 |       "  self._init_dates(dates, freq)\n",
 92 |       "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
 93 |       "  self._init_dates(dates, freq)\n",
 94 |       "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
 95 |       "  self._init_dates(dates, freq)\n",
 96 |       "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
 97 |       "  self._init_dates(dates, freq)\n",
 98 |       "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
 99 |       "  self._init_dates(dates, freq)\n",
100 |       "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.\n",
101 |       "  warn('Non-invertible starting MA parameters found.'\n",
102 |       "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
103 |       "  self._init_dates(dates, freq)\n",
104 |       "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
105 |       "  self._init_dates(dates, freq)\n",
106 |       "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
107 |       "  self._init_dates(dates, freq)\n"
108 |      ]
109 |     },
110 |     {
111 |      "name": "stdout",
112 |      "output_type": "stream",
113 |      "text": [
114 |       "Predicted percentage change for next day: -0.007978\n",
115 |       "Predicted percentage change for next day: -0.005279\n",
116 |       "Predicted percentage change for next day: -0.006455\n"
117 |      ]
118 |     }
119 |    ],
120 |    "source": [
121 |     "def arima(timeseries_df):\n",
122 |     "    # Ensure 'Date' is the index and in datetime format\n",
123 |     "    timeseries_df.set_index('Date', inplace=True)\n",
124 |     "    timeseries_df.index = pd.to_datetime(timeseries_df.index)\n",
125 |     "\n",
126 |     "    # Remove the last row (assumed to be NaN)\n",
127 |     "    timeseries_df = timeseries_df[:-1]\n",
128 |     "\n",
129 |     "    # Convert percentage strings to float if necessary\n",
130 |     "    if timeseries_df['pct_change'].dtype == 'object':\n",
131 |     "        timeseries_df['pct_change'] = timeseries_df['pct_change'].str.rstrip('%').astype('float') / 100.0\n",
132 |     "\n",
133 |     "    # Fit ARIMA model\n",
134 |     "    model = ARIMA(timeseries_df['pct_change'].dropna(), order=(1, 1, 1))\n",
135 |     "    results = model.fit()\n",
136 |     "\n",
137 |     "    # Predict the next day's percentage change\n",
138 |     "    forecast = results.forecast(steps=1)\n",
139 |     "    predicted_pct_change = forecast.values[0]\n",
140 |     "\n",
141 |     "    print(f\"Predicted percentage change for next day: {predicted_pct_change:.6f}\")\n",
142 |     "\n",
143 |     "arima(btc)\n",
144 |     "arima(eth)\n",
145 |     "arima(xrp)"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "## Prepare data for LLM"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 13,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "from io import StringIO\n",
162 |     "\n",
163 |     "def convert_to_csv_string(timeseries):\n",
164 |     "\n",
165 |     "    timeseries = timeseries.reset_index()\n",
166 |     "\n",
167 |     "    timeseries['pct_change'] = np.round(timeseries['pct_change'], 6)\n",
168 |     "\n",
169 |     "    # Remove final row\n",
170 |     "    timeseries = timeseries.iloc[:-1]\n",
171 |     "\n",
172 |     "    # Convert DataFrame to CSV string\n",
173 |     "    csv_buffer = StringIO()\n",
174 |     "    timeseries.to_csv(csv_buffer, index=False)\n",
175 |     "    csv_string = csv_buffer.getvalue()\n",
176 |     "\n",
177 |     "    return csv_string\n",
178 |     "\n",
179 |     "\n",
180 |     "btc_for_llm = convert_to_csv_string(btc)\n",
181 |     "eth_for_llm = convert_to_csv_string(eth)\n",
182 |     "xrp_for_llm = convert_to_csv_string(xrp)\n"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": 14,
188 |    "metadata": {},
189 |    "outputs": [
190 |     {
191 |      "name": "stdout",
192 |      "output_type": "stream",
193 |      "text": [
194 |       "Date,pct_change\n",
195 |       "2024-05-31,-0.012778\n",
196 |       "2024-06-01,0.003193\n",
197 |       "2024-06-02,0.00066\n",
198 |       "2024-06-03,0.015545\n",
199 |       "2024-06-04,0.025623\n",
200 |       "2024-06-05,0.007299\n",
201 |       "2024-06-06,-0.004581\n",
202 |       "2024-06-07,-0.019992\n",
203 |       "2024-06-08,-0.000531\n",
204 |       "2024-06-09,0.004938\n",
205 |       "2024-06-10,-0.001949\n",
206 |       "2024-06-11,-0.031365\n",
207 |       "2024-06-12,0.013503\n",
208 |       "2024-06-13,-0.021758\n",
209 |       "2024-06-14,-0.011165\n",
210 |       "2024-06-15,0.002725\n",
211 |       "2024-06-16,0.006769\n",
212 |       "2024-06-17,-0.002232\n",
213 |       "2024-06-18,-0.020297\n",
214 |       "2024-06-19,-0.00277\n",
215 |       "2024-06-20,-0.002026\n",
216 |       "2024-06-21,-0.011298\n",
217 |       "2024-06-22,0.00244\n",
218 |       "2024-06-23,-0.016681\n",
219 |       "2024-06-24,-0.045954\n",
220 |       "2024-06-25,0.025337\n",
221 |       "2024-06-26,-0.016073\n",
222 |       "2024-06-27,0.013049\n",
223 |       "\n"
224 |      ]
225 |     }
226 |    ],
227 |    "source": [
228 |     "print(btc_for_llm)"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "metadata": {},
234 |    "source": [
235 |     "## Run LLM"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "markdown",
240 |    "metadata": {},
241 |    "source": [
242 |     "Note: This requires having an active local Ollama server connection running and installing the llama3, mistral, and gemma3 models"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": 15,
248 |    "metadata": {},
249 |    "outputs": [],
250 |    "source": [
251 |     "## Note: Change the date of the prediction/forecast for your own use (here, 2024-06-28)\n",
252 |     "\n",
253 |     "def predict_timeseries(timeseries):\n",
254 |     "    output = llm.invoke(f\"\"\"\n",
255 |     "        You are a large language model with time series forecasting capabilities.\n",
256 |     "        Predict the percent change for the day immediately after the end of the provided time series (2024-06-28).\n",
257 |     "        Use only your model capabilities, not any other method.\n",
258 |     "        The data is in the format of a csv file.\n",
259 |     "        The dataset includes:\n",
260 |     "        - Date\n",
261 |     "        - Percent change in the cryptocurrency from the previous day\n",
262 |     "        Provide only the forecasted percent change for 2024-06-28 as a point estimate. \n",
263 |     "        Do not include any other text or context, just the one value:\n",
264 |     "        {timeseries}\n",
265 |     "    \"\"\")\n",
266 |     "    return output.strip()\n"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": 16,
272 |    "metadata": {},
273 |    "outputs": [
274 |     {
275 |      "name": "stdout",
276 |      "output_type": "stream",
277 |      "text": [
278 |       "0.008937 (rounded to four decimal places)\n",
279 |       "0.0198 (rounded to two decimal places)\n",
280 |       "0.00895 (rounded to four decimal places)\n"
281 |      ]
282 |     }
283 |    ],
284 |    "source": [
285 |     "llm = Ollama(model=\"mistral\", temperature=0)\n",
286 |     "\n",
287 |     "print(predict_timeseries(btc_for_llm))\n",
288 |     "print(predict_timeseries(eth_for_llm))\n",
289 |     "print(predict_timeseries(xrp_for_llm))"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": 17,
295 |    "metadata": {},
296 |    "outputs": [
297 |     {
298 |      "name": "stdout",
299 |      "output_type": "stream",
300 |      "text": [
301 |       "0.011345\n",
302 |       "0.011345\n",
303 |       "0.005211\n"
304 |      ]
305 |     }
306 |    ],
307 |    "source": [
308 |     "llm = Ollama(model=\"llama3\", temperature=0)\n",
309 |     "\n",
310 |     "print(predict_timeseries(btc_for_llm))\n",
311 |     "print(predict_timeseries(eth_for_llm))\n",
312 |     "print(predict_timeseries(xrp_for_llm))\n"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": 18,
318 |    "metadata": {},
319 |    "outputs": [
320 |     {
321 |      "name": "stdout",
322 |      "output_type": "stream",
323 |      "text": [
324 |       "0.012778\n",
325 |       "0.012345\n",
326 |       "0.005678\n"
327 |      ]
328 |     }
329 |    ],
330 |    "source": [
331 |     "llm = Ollama(model=\"gemma2\", temperature=0)\n",
332 |     "\n",
333 |     "print(predict_timeseries(btc_for_llm))\n",
334 |     "print(predict_timeseries(eth_for_llm))\n",
335 |     "print(predict_timeseries(xrp_for_llm))\n"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "markdown",
340 |    "metadata": {},
341 |    "source": [
342 |     "## Actual values for predicted day"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "code",
347 |    "execution_count": 19,
348 |    "metadata": {},
349 |    "outputs": [
350 |     {
351 |      "data": {
352 |       "text/html": [
353 |        "<div>\n",
354 |        "<style scoped>\n",
355 |        "    .dataframe tbody tr th:only-of-type {\n",
356 |        "        vertical-align: middle;\n",
357 |        "    }\n",
358 |        "\n",
359 |        "    .dataframe tbody tr th {\n",
360 |        "        vertical-align: top;\n",
361 |        "    }\n",
362 |        "\n",
363 |        "    .dataframe thead th {\n",
364 |        "        text-align: right;\n",
365 |        "    }\n",
366 |        "</style>\n",
367 |        "<table border=\"1\" class=\"dataframe\">\n",
368 |        "  <thead>\n",
369 |        "    <tr style=\"text-align: right;\">\n",
370 |        "      <th></th>\n",
371 |        "      <th>Date</th>\n",
372 |        "      <th>pct_change</th>\n",
373 |        "    </tr>\n",
374 |        "  </thead>\n",
375 |        "  <tbody>\n",
376 |        "    <tr>\n",
377 |        "      <th>29</th>\n",
378 |        "      <td>2024-06-28</td>\n",
379 |        "      <td>-0.020853</td>\n",
380 |        "    </tr>\n",
381 |        "  </tbody>\n",
382 |        "</table>\n",
383 |        "</div>"
384 |       ],
385 |       "text/plain": [
386 |        "          Date  pct_change\n",
387 |        "29  2024-06-28   -0.020853"
388 |       ]
389 |      },
390 |      "execution_count": 19,
391 |      "metadata": {},
392 |      "output_type": "execute_result"
393 |     }
394 |    ],
395 |    "source": [
396 |     "btc_final"
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "code",
401 |    "execution_count": 20,
402 |    "metadata": {},
403 |    "outputs": [
404 |     {
405 |      "data": {
406 |       "text/html": [
407 |        "<div>\n",
408 |        "<style scoped>\n",
409 |        "    .dataframe tbody tr th:only-of-type {\n",
410 |        "        vertical-align: middle;\n",
411 |        "    }\n",
412 |        "\n",
413 |        "    .dataframe tbody tr th {\n",
414 |        "        vertical-align: top;\n",
415 |        "    }\n",
416 |        "\n",
417 |        "    .dataframe thead th {\n",
418 |        "        text-align: right;\n",
419 |        "    }\n",
420 |        "</style>\n",
421 |        "<table border=\"1\" class=\"dataframe\">\n",
422 |        "  <thead>\n",
423 |        "    <tr style=\"text-align: right;\">\n",
424 |        "      <th></th>\n",
425 |        "      <th>Date</th>\n",
426 |        "      <th>pct_change</th>\n",
427 |        "    </tr>\n",
428 |        "  </thead>\n",
429 |        "  <tbody>\n",
430 |        "    <tr>\n",
431 |        "      <th>29</th>\n",
432 |        "      <td>2024-06-28</td>\n",
433 |        "      <td>-0.020659</td>\n",
434 |        "    </tr>\n",
435 |        "  </tbody>\n",
436 |        "</table>\n",
437 |        "</div>"
438 |       ],
439 |       "text/plain": [
440 |        "          Date  pct_change\n",
441 |        "29  2024-06-28   -0.020659"
442 |       ]
443 |      },
444 |      "execution_count": 20,
445 |      "metadata": {},
446 |      "output_type": "execute_result"
447 |     }
448 |    ],
449 |    "source": [
450 |     "eth_final"
451 |    ]
452 |   },
453 |   {
454 |    "cell_type": "code",
455 |    "execution_count": 21,
456 |    "metadata": {},
457 |    "outputs": [
458 |     {
459 |      "data": {
460 |       "text/html": [
461 |        "<div>\n",
462 |        "<style scoped>\n",
463 |        "    .dataframe tbody tr th:only-of-type {\n",
464 |        "        vertical-align: middle;\n",
465 |        "    }\n",
466 |        "\n",
467 |        "    .dataframe tbody tr th {\n",
468 |        "        vertical-align: top;\n",
469 |        "    }\n",
470 |        "\n",
471 |        "    .dataframe thead th {\n",
472 |        "        text-align: right;\n",
473 |        "    }\n",
474 |        "</style>\n",
475 |        "<table border=\"1\" class=\"dataframe\">\n",
476 |        "  <thead>\n",
477 |        "    <tr style=\"text-align: right;\">\n",
478 |        "      <th></th>\n",
479 |        "      <th>Date</th>\n",
480 |        "      <th>pct_change</th>\n",
481 |        "    </tr>\n",
482 |        "  </thead>\n",
483 |        "  <tbody>\n",
484 |        "    <tr>\n",
485 |        "      <th>29</th>\n",
486 |        "      <td>2024-06-28</td>\n",
487 |        "      <td>-0.007943</td>\n",
488 |        "    </tr>\n",
489 |        "  </tbody>\n",
490 |        "</table>\n",
491 |        "</div>"
492 |       ],
493 |       "text/plain": [
494 |        "          Date  pct_change\n",
495 |        "29  2024-06-28   -0.007943"
496 |       ]
497 |      },
498 |      "execution_count": 21,
499 |      "metadata": {},
500 |      "output_type": "execute_result"
501 |     }
502 |    ],
503 |    "source": [
504 |     "xrp_final"
505 |    ]
506 |   }
507 |  ],
508 |  "metadata": {
509 |   "kernelspec": {
510 |    "display_name": "general_env",
511 |    "language": "python",
512 |    "name": "python3"
513 |   },
514 |   "language_info": {
515 |    "codemirror_mode": {
516 |     "name": "ipython",
517 |     "version": 3
518 |    },
519 |    "file_extension": ".py",
520 |    "mimetype": "text/x-python",
521 |    "name": "python",
522 |    "nbconvert_exporter": "python",
523 |    "pygments_lexer": "ipython3",
524 |    "version": "3.1.-1"
525 |   }
526 |  },
527 |  "nbformat": 4,
528 |  "nbformat_minor": 2
529 | }
530 | 


--------------------------------------------------------------------------------
/Pandas_Top_5_Tricks.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "Source: @DeepCharts Youtube Channel"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "markdown",
  12 |    "metadata": {},
  13 |    "source": [
  14 |     "# Top 5 Pandas Tips and Tricks"
  15 |    ]
  16 |   },
  17 |   {
  18 |    "cell_type": "code",
  19 |    "execution_count": 1,
  20 |    "metadata": {},
  21 |    "outputs": [],
  22 |    "source": [
  23 |     "import pandas as pd"
  24 |    ]
  25 |   },
  26 |   {
  27 |    "cell_type": "markdown",
  28 |    "metadata": {},
  29 |    "source": [
  30 |     "### 1. Merging with the Indicator Argument"
  31 |    ]
  32 |   },
  33 |   {
  34 |    "cell_type": "code",
  35 |    "execution_count": 3,
  36 |    "metadata": {},
  37 |    "outputs": [
  38 |     {
  39 |      "data": {
  40 |       "text/html": [
  41 |        "<div>\n",
  42 |        "<style scoped>\n",
  43 |        "    .dataframe tbody tr th:only-of-type {\n",
  44 |        "        vertical-align: middle;\n",
  45 |        "    }\n",
  46 |        "\n",
  47 |        "    .dataframe tbody tr th {\n",
  48 |        "        vertical-align: top;\n",
  49 |        "    }\n",
  50 |        "\n",
  51 |        "    .dataframe thead th {\n",
  52 |        "        text-align: right;\n",
  53 |        "    }\n",
  54 |        "</style>\n",
  55 |        "<table border=\"1\" class=\"dataframe\">\n",
  56 |        "  <thead>\n",
  57 |        "    <tr style=\"text-align: right;\">\n",
  58 |        "      <th></th>\n",
  59 |        "      <th>key</th>\n",
  60 |        "      <th>value1</th>\n",
  61 |        "      <th>value2</th>\n",
  62 |        "      <th>_merge</th>\n",
  63 |        "    </tr>\n",
  64 |        "  </thead>\n",
  65 |        "  <tbody>\n",
  66 |        "    <tr>\n",
  67 |        "      <th>0</th>\n",
  68 |        "      <td>A</td>\n",
  69 |        "      <td>1.0</td>\n",
  70 |        "      <td>NaN</td>\n",
  71 |        "      <td>left_only</td>\n",
  72 |        "    </tr>\n",
  73 |        "    <tr>\n",
  74 |        "      <th>1</th>\n",
  75 |        "      <td>B</td>\n",
  76 |        "      <td>2.0</td>\n",
  77 |        "      <td>4.0</td>\n",
  78 |        "      <td>both</td>\n",
  79 |        "    </tr>\n",
  80 |        "    <tr>\n",
  81 |        "      <th>2</th>\n",
  82 |        "      <td>C</td>\n",
  83 |        "      <td>3.0</td>\n",
  84 |        "      <td>5.0</td>\n",
  85 |        "      <td>both</td>\n",
  86 |        "    </tr>\n",
  87 |        "    <tr>\n",
  88 |        "      <th>3</th>\n",
  89 |        "      <td>D</td>\n",
  90 |        "      <td>NaN</td>\n",
  91 |        "      <td>6.0</td>\n",
  92 |        "      <td>right_only</td>\n",
  93 |        "    </tr>\n",
  94 |        "  </tbody>\n",
  95 |        "</table>\n",
  96 |        "</div>"
  97 |       ],
  98 |       "text/plain": [
  99 |        "  key  value1  value2      _merge\n",
 100 |        "0   A     1.0     NaN   left_only\n",
 101 |        "1   B     2.0     4.0        both\n",
 102 |        "2   C     3.0     5.0        both\n",
 103 |        "3   D     NaN     6.0  right_only"
 104 |       ]
 105 |      },
 106 |      "execution_count": 3,
 107 |      "metadata": {},
 108 |      "output_type": "execute_result"
 109 |     }
 110 |    ],
 111 |    "source": [
 112 |     "df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value1': [1, 2, 3]})\n",
 113 |     "df2 = pd.DataFrame({'key': ['B', 'C', 'D'], 'value2': [4, 5, 6]})\n",
 114 |     "\n",
 115 |     "merged = pd.merge(df1, df2, on='key', how='outer', indicator=True)\n",
 116 |     "merged"
 117 |    ]
 118 |   },
 119 |   {
 120 |    "cell_type": "markdown",
 121 |    "metadata": {},
 122 |    "source": [
 123 |     "### 2. Custom Chaining with pipe"
 124 |    ]
 125 |   },
 126 |   {
 127 |    "cell_type": "code",
 128 |    "execution_count": 5,
 129 |    "metadata": {},
 130 |    "outputs": [
 131 |     {
 132 |      "data": {
 133 |       "text/html": [
 134 |        "<div>\n",
 135 |        "<style scoped>\n",
 136 |        "    .dataframe tbody tr th:only-of-type {\n",
 137 |        "        vertical-align: middle;\n",
 138 |        "    }\n",
 139 |        "\n",
 140 |        "    .dataframe tbody tr th {\n",
 141 |        "        vertical-align: top;\n",
 142 |        "    }\n",
 143 |        "\n",
 144 |        "    .dataframe thead th {\n",
 145 |        "        text-align: right;\n",
 146 |        "    }\n",
 147 |        "</style>\n",
 148 |        "<table border=\"1\" class=\"dataframe\">\n",
 149 |        "  <thead>\n",
 150 |        "    <tr style=\"text-align: right;\">\n",
 151 |        "      <th></th>\n",
 152 |        "      <th>Quantity</th>\n",
 153 |        "      <th>Price</th>\n",
 154 |        "      <th>Total</th>\n",
 155 |        "    </tr>\n",
 156 |        "  </thead>\n",
 157 |        "  <tbody>\n",
 158 |        "    <tr>\n",
 159 |        "      <th>3</th>\n",
 160 |        "      <td>20</td>\n",
 161 |        "      <td>250</td>\n",
 162 |        "      <td>5000</td>\n",
 163 |        "    </tr>\n",
 164 |        "    <tr>\n",
 165 |        "      <th>1</th>\n",
 166 |        "      <td>15</td>\n",
 167 |        "      <td>150</td>\n",
 168 |        "      <td>2250</td>\n",
 169 |        "    </tr>\n",
 170 |        "    <tr>\n",
 171 |        "      <th>2</th>\n",
 172 |        "      <td>10</td>\n",
 173 |        "      <td>200</td>\n",
 174 |        "      <td>2000</td>\n",
 175 |        "    </tr>\n",
 176 |        "  </tbody>\n",
 177 |        "</table>\n",
 178 |        "</div>"
 179 |       ],
 180 |       "text/plain": [
 181 |        "   Quantity  Price  Total\n",
 182 |        "3        20    250   5000\n",
 183 |        "1        15    150   2250\n",
 184 |        "2        10    200   2000"
 185 |       ]
 186 |      },
 187 |      "execution_count": 5,
 188 |      "metadata": {},
 189 |      "output_type": "execute_result"
 190 |     }
 191 |    ],
 192 |    "source": [
 193 |     "df = pd.DataFrame({\n",
 194 |     "    'Quantity': [10, 15, 10, 20],\n",
 195 |     "    'Price': [100, 150, 200, 250]\n",
 196 |     "})\n",
 197 |     "\n",
 198 |     "\n",
 199 |     "# Custom function to calculate Total\n",
 200 |     "def add_total(df):\n",
 201 |     "    df['Total'] = df['Quantity'] * df['Price']\n",
 202 |     "    return df\n",
 203 |     "\n",
 204 |     "# Method chaining with pipe\n",
 205 |     "result = (\n",
 206 |     "    df\n",
 207 |     "    .pipe(add_total)\n",
 208 |     "    .query('Total > 1000')\n",
 209 |     "    .sort_values('Total', ascending=False)\n",
 210 |     ")\n",
 211 |     "\n",
 212 |     "result"
 213 |    ]
 214 |   },
 215 |   {
 216 |    "cell_type": "markdown",
 217 |    "metadata": {},
 218 |    "source": [
 219 |     "### 3. Window Functions (Moving Average and Cumulative Sum)"
 220 |    ]
 221 |   },
 222 |   {
 223 |    "cell_type": "code",
 224 |    "execution_count": 7,
 225 |    "metadata": {},
 226 |    "outputs": [
 227 |     {
 228 |      "data": {
 229 |       "text/html": [
 230 |        "<div>\n",
 231 |        "<style scoped>\n",
 232 |        "    .dataframe tbody tr th:only-of-type {\n",
 233 |        "        vertical-align: middle;\n",
 234 |        "    }\n",
 235 |        "\n",
 236 |        "    .dataframe tbody tr th {\n",
 237 |        "        vertical-align: top;\n",
 238 |        "    }\n",
 239 |        "\n",
 240 |        "    .dataframe thead th {\n",
 241 |        "        text-align: right;\n",
 242 |        "    }\n",
 243 |        "</style>\n",
 244 |        "<table border=\"1\" class=\"dataframe\">\n",
 245 |        "  <thead>\n",
 246 |        "    <tr style=\"text-align: right;\">\n",
 247 |        "      <th></th>\n",
 248 |        "      <th>Date</th>\n",
 249 |        "      <th>Sales</th>\n",
 250 |        "      <th>3-Day MA</th>\n",
 251 |        "      <th>Cumulative Sales</th>\n",
 252 |        "    </tr>\n",
 253 |        "  </thead>\n",
 254 |        "  <tbody>\n",
 255 |        "    <tr>\n",
 256 |        "      <th>0</th>\n",
 257 |        "      <td>2023-01-01</td>\n",
 258 |        "      <td>100</td>\n",
 259 |        "      <td>NaN</td>\n",
 260 |        "      <td>100.0</td>\n",
 261 |        "    </tr>\n",
 262 |        "    <tr>\n",
 263 |        "      <th>1</th>\n",
 264 |        "      <td>2023-01-02</td>\n",
 265 |        "      <td>150</td>\n",
 266 |        "      <td>NaN</td>\n",
 267 |        "      <td>250.0</td>\n",
 268 |        "    </tr>\n",
 269 |        "    <tr>\n",
 270 |        "      <th>2</th>\n",
 271 |        "      <td>2023-01-03</td>\n",
 272 |        "      <td>200</td>\n",
 273 |        "      <td>150.0</td>\n",
 274 |        "      <td>450.0</td>\n",
 275 |        "    </tr>\n",
 276 |        "    <tr>\n",
 277 |        "      <th>3</th>\n",
 278 |        "      <td>2023-01-04</td>\n",
 279 |        "      <td>250</td>\n",
 280 |        "      <td>200.0</td>\n",
 281 |        "      <td>700.0</td>\n",
 282 |        "    </tr>\n",
 283 |        "    <tr>\n",
 284 |        "      <th>4</th>\n",
 285 |        "      <td>2023-01-05</td>\n",
 286 |        "      <td>300</td>\n",
 287 |        "      <td>250.0</td>\n",
 288 |        "      <td>1000.0</td>\n",
 289 |        "    </tr>\n",
 290 |        "    <tr>\n",
 291 |        "      <th>5</th>\n",
 292 |        "      <td>2023-01-06</td>\n",
 293 |        "      <td>350</td>\n",
 294 |        "      <td>300.0</td>\n",
 295 |        "      <td>1350.0</td>\n",
 296 |        "    </tr>\n",
 297 |        "    <tr>\n",
 298 |        "      <th>6</th>\n",
 299 |        "      <td>2023-01-07</td>\n",
 300 |        "      <td>400</td>\n",
 301 |        "      <td>350.0</td>\n",
 302 |        "      <td>1750.0</td>\n",
 303 |        "    </tr>\n",
 304 |        "  </tbody>\n",
 305 |        "</table>\n",
 306 |        "</div>"
 307 |       ],
 308 |       "text/plain": [
 309 |        "        Date  Sales  3-Day MA  Cumulative Sales\n",
 310 |        "0 2023-01-01    100       NaN             100.0\n",
 311 |        "1 2023-01-02    150       NaN             250.0\n",
 312 |        "2 2023-01-03    200     150.0             450.0\n",
 313 |        "3 2023-01-04    250     200.0             700.0\n",
 314 |        "4 2023-01-05    300     250.0            1000.0\n",
 315 |        "5 2023-01-06    350     300.0            1350.0\n",
 316 |        "6 2023-01-07    400     350.0            1750.0"
 317 |       ]
 318 |      },
 319 |      "execution_count": 7,
 320 |      "metadata": {},
 321 |      "output_type": "execute_result"
 322 |     }
 323 |    ],
 324 |    "source": [
 325 |     "# Sample DataFrame\n",
 326 |     "df = pd.DataFrame({\n",
 327 |     "    'Date': pd.date_range('2023-01-01', periods=7),\n",
 328 |     "    'Sales': [100, 150, 200, 250, 300, 350, 400]\n",
 329 |     "})\n",
 330 |     "\n",
 331 |     "# Rolling average\n",
 332 |     "df['3-Day MA'] = df['Sales'].rolling(window=3).mean()\n",
 333 |     "\n",
 334 |     "# Cumulative sum\n",
 335 |     "df['Cumulative Sales'] = df['Sales'].expanding().sum()\n",
 336 |     "\n",
 337 |     "df"
 338 |    ]
 339 |   },
 340 |   {
 341 |    "cell_type": "markdown",
 342 |    "metadata": {},
 343 |    "source": [
 344 |     "### 4. Identify Duplicates and Drop Duplicates"
 345 |    ]
 346 |   },
 347 |   {
 348 |    "cell_type": "code",
 349 |    "execution_count": 9,
 350 |    "metadata": {},
 351 |    "outputs": [
 352 |     {
 353 |      "data": {
 354 |       "text/html": [
 355 |        "<div>\n",
 356 |        "<style scoped>\n",
 357 |        "    .dataframe tbody tr th:only-of-type {\n",
 358 |        "        vertical-align: middle;\n",
 359 |        "    }\n",
 360 |        "\n",
 361 |        "    .dataframe tbody tr th {\n",
 362 |        "        vertical-align: top;\n",
 363 |        "    }\n",
 364 |        "\n",
 365 |        "    .dataframe thead th {\n",
 366 |        "        text-align: right;\n",
 367 |        "    }\n",
 368 |        "</style>\n",
 369 |        "<table border=\"1\" class=\"dataframe\">\n",
 370 |        "  <thead>\n",
 371 |        "    <tr style=\"text-align: right;\">\n",
 372 |        "      <th></th>\n",
 373 |        "      <th>ID</th>\n",
 374 |        "      <th>Name</th>\n",
 375 |        "    </tr>\n",
 376 |        "  </thead>\n",
 377 |        "  <tbody>\n",
 378 |        "    <tr>\n",
 379 |        "      <th>0</th>\n",
 380 |        "      <td>1</td>\n",
 381 |        "      <td>Alice</td>\n",
 382 |        "    </tr>\n",
 383 |        "    <tr>\n",
 384 |        "      <th>1</th>\n",
 385 |        "      <td>2</td>\n",
 386 |        "      <td>Bob</td>\n",
 387 |        "    </tr>\n",
 388 |        "    <tr>\n",
 389 |        "      <th>2</th>\n",
 390 |        "      <td>2</td>\n",
 391 |        "      <td>Bob</td>\n",
 392 |        "    </tr>\n",
 393 |        "    <tr>\n",
 394 |        "      <th>3</th>\n",
 395 |        "      <td>3</td>\n",
 396 |        "      <td>Charlie</td>\n",
 397 |        "    </tr>\n",
 398 |        "    <tr>\n",
 399 |        "      <th>4</th>\n",
 400 |        "      <td>4</td>\n",
 401 |        "      <td>David</td>\n",
 402 |        "    </tr>\n",
 403 |        "    <tr>\n",
 404 |        "      <th>5</th>\n",
 405 |        "      <td>4</td>\n",
 406 |        "      <td>David</td>\n",
 407 |        "    </tr>\n",
 408 |        "  </tbody>\n",
 409 |        "</table>\n",
 410 |        "</div>"
 411 |       ],
 412 |       "text/plain": [
 413 |        "   ID     Name\n",
 414 |        "0   1    Alice\n",
 415 |        "1   2      Bob\n",
 416 |        "2   2      Bob\n",
 417 |        "3   3  Charlie\n",
 418 |        "4   4    David\n",
 419 |        "5   4    David"
 420 |       ]
 421 |      },
 422 |      "execution_count": 9,
 423 |      "metadata": {},
 424 |      "output_type": "execute_result"
 425 |     }
 426 |    ],
 427 |    "source": [
 428 |     "# Sample DataFrame with duplicates\n",
 429 |     "df = pd.DataFrame({\n",
 430 |     "    'ID': [1, 2, 2, 3, 4, 4],\n",
 431 |     "    'Name': ['Alice', 'Bob', 'Bob', 'Charlie', 'David', 'David']\n",
 432 |     "})\n",
 433 |     "df"
 434 |    ]
 435 |   },
 436 |   {
 437 |    "cell_type": "code",
 438 |    "execution_count": 11,
 439 |    "metadata": {},
 440 |    "outputs": [
 441 |     {
 442 |      "data": {
 443 |       "text/html": [
 444 |        "<div>\n",
 445 |        "<style scoped>\n",
 446 |        "    .dataframe tbody tr th:only-of-type {\n",
 447 |        "        vertical-align: middle;\n",
 448 |        "    }\n",
 449 |        "\n",
 450 |        "    .dataframe tbody tr th {\n",
 451 |        "        vertical-align: top;\n",
 452 |        "    }\n",
 453 |        "\n",
 454 |        "    .dataframe thead th {\n",
 455 |        "        text-align: right;\n",
 456 |        "    }\n",
 457 |        "</style>\n",
 458 |        "<table border=\"1\" class=\"dataframe\">\n",
 459 |        "  <thead>\n",
 460 |        "    <tr style=\"text-align: right;\">\n",
 461 |        "      <th></th>\n",
 462 |        "      <th>ID</th>\n",
 463 |        "      <th>Name</th>\n",
 464 |        "    </tr>\n",
 465 |        "  </thead>\n",
 466 |        "  <tbody>\n",
 467 |        "    <tr>\n",
 468 |        "      <th>1</th>\n",
 469 |        "      <td>2</td>\n",
 470 |        "      <td>Bob</td>\n",
 471 |        "    </tr>\n",
 472 |        "    <tr>\n",
 473 |        "      <th>2</th>\n",
 474 |        "      <td>2</td>\n",
 475 |        "      <td>Bob</td>\n",
 476 |        "    </tr>\n",
 477 |        "    <tr>\n",
 478 |        "      <th>4</th>\n",
 479 |        "      <td>4</td>\n",
 480 |        "      <td>David</td>\n",
 481 |        "    </tr>\n",
 482 |        "    <tr>\n",
 483 |        "      <th>5</th>\n",
 484 |        "      <td>4</td>\n",
 485 |        "      <td>David</td>\n",
 486 |        "    </tr>\n",
 487 |        "  </tbody>\n",
 488 |        "</table>\n",
 489 |        "</div>"
 490 |       ],
 491 |       "text/plain": [
 492 |        "   ID   Name\n",
 493 |        "1   2    Bob\n",
 494 |        "2   2    Bob\n",
 495 |        "4   4  David\n",
 496 |        "5   4  David"
 497 |       ]
 498 |      },
 499 |      "execution_count": 11,
 500 |      "metadata": {},
 501 |      "output_type": "execute_result"
 502 |     }
 503 |    ],
 504 |    "source": [
 505 |     "# Identify duplicates\n",
 506 |     "duplicates = df[df.duplicated(subset='ID', keep=False)]\n",
 507 |     "duplicates\n"
 508 |    ]
 509 |   },
 510 |   {
 511 |    "cell_type": "code",
 512 |    "execution_count": 13,
 513 |    "metadata": {},
 514 |    "outputs": [
 515 |     {
 516 |      "data": {
 517 |       "text/html": [
 518 |        "<div>\n",
 519 |        "<style scoped>\n",
 520 |        "    .dataframe tbody tr th:only-of-type {\n",
 521 |        "        vertical-align: middle;\n",
 522 |        "    }\n",
 523 |        "\n",
 524 |        "    .dataframe tbody tr th {\n",
 525 |        "        vertical-align: top;\n",
 526 |        "    }\n",
 527 |        "\n",
 528 |        "    .dataframe thead th {\n",
 529 |        "        text-align: right;\n",
 530 |        "    }\n",
 531 |        "</style>\n",
 532 |        "<table border=\"1\" class=\"dataframe\">\n",
 533 |        "  <thead>\n",
 534 |        "    <tr style=\"text-align: right;\">\n",
 535 |        "      <th></th>\n",
 536 |        "      <th>ID</th>\n",
 537 |        "      <th>Name</th>\n",
 538 |        "    </tr>\n",
 539 |        "  </thead>\n",
 540 |        "  <tbody>\n",
 541 |        "    <tr>\n",
 542 |        "      <th>0</th>\n",
 543 |        "      <td>1</td>\n",
 544 |        "      <td>Alice</td>\n",
 545 |        "    </tr>\n",
 546 |        "    <tr>\n",
 547 |        "      <th>1</th>\n",
 548 |        "      <td>2</td>\n",
 549 |        "      <td>Bob</td>\n",
 550 |        "    </tr>\n",
 551 |        "    <tr>\n",
 552 |        "      <th>3</th>\n",
 553 |        "      <td>3</td>\n",
 554 |        "      <td>Charlie</td>\n",
 555 |        "    </tr>\n",
 556 |        "    <tr>\n",
 557 |        "      <th>4</th>\n",
 558 |        "      <td>4</td>\n",
 559 |        "      <td>David</td>\n",
 560 |        "    </tr>\n",
 561 |        "  </tbody>\n",
 562 |        "</table>\n",
 563 |        "</div>"
 564 |       ],
 565 |       "text/plain": [
 566 |        "   ID     Name\n",
 567 |        "0   1    Alice\n",
 568 |        "1   2      Bob\n",
 569 |        "3   3  Charlie\n",
 570 |        "4   4    David"
 571 |       ]
 572 |      },
 573 |      "execution_count": 13,
 574 |      "metadata": {},
 575 |      "output_type": "execute_result"
 576 |     }
 577 |    ],
 578 |    "source": [
 579 |     "# Remove duplicates, keep first\n",
 580 |     "df_cleaned = df.drop_duplicates(subset='ID')\n",
 581 |     "df_cleaned"
 582 |    ]
 583 |   },
 584 |   {
 585 |    "cell_type": "markdown",
 586 |    "metadata": {},
 587 |    "source": [
 588 |     "### 5. Binning Data with cut and qcut"
 589 |    ]
 590 |   },
 591 |   {
 592 |    "cell_type": "code",
 593 |    "execution_count": 15,
 594 |    "metadata": {},
 595 |    "outputs": [
 596 |     {
 597 |      "data": {
 598 |       "text/html": [
 599 |        "<div>\n",
 600 |        "<style scoped>\n",
 601 |        "    .dataframe tbody tr th:only-of-type {\n",
 602 |        "        vertical-align: middle;\n",
 603 |        "    }\n",
 604 |        "\n",
 605 |        "    .dataframe tbody tr th {\n",
 606 |        "        vertical-align: top;\n",
 607 |        "    }\n",
 608 |        "\n",
 609 |        "    .dataframe thead th {\n",
 610 |        "        text-align: right;\n",
 611 |        "    }\n",
 612 |        "</style>\n",
 613 |        "<table border=\"1\" class=\"dataframe\">\n",
 614 |        "  <thead>\n",
 615 |        "    <tr style=\"text-align: right;\">\n",
 616 |        "      <th></th>\n",
 617 |        "      <th>Age</th>\n",
 618 |        "      <th>Income</th>\n",
 619 |        "    </tr>\n",
 620 |        "  </thead>\n",
 621 |        "  <tbody>\n",
 622 |        "    <tr>\n",
 623 |        "      <th>0</th>\n",
 624 |        "      <td>22</td>\n",
 625 |        "      <td>25000</td>\n",
 626 |        "    </tr>\n",
 627 |        "    <tr>\n",
 628 |        "      <th>1</th>\n",
 629 |        "      <td>25</td>\n",
 630 |        "      <td>27000</td>\n",
 631 |        "    </tr>\n",
 632 |        "    <tr>\n",
 633 |        "      <th>2</th>\n",
 634 |        "      <td>29</td>\n",
 635 |        "      <td>30000</td>\n",
 636 |        "    </tr>\n",
 637 |        "    <tr>\n",
 638 |        "      <th>3</th>\n",
 639 |        "      <td>34</td>\n",
 640 |        "      <td>32000</td>\n",
 641 |        "    </tr>\n",
 642 |        "    <tr>\n",
 643 |        "      <th>4</th>\n",
 644 |        "      <td>45</td>\n",
 645 |        "      <td>40000</td>\n",
 646 |        "    </tr>\n",
 647 |        "    <tr>\n",
 648 |        "      <th>5</th>\n",
 649 |        "      <td>52</td>\n",
 650 |        "      <td>50000</td>\n",
 651 |        "    </tr>\n",
 652 |        "    <tr>\n",
 653 |        "      <th>6</th>\n",
 654 |        "      <td>61</td>\n",
 655 |        "      <td>60000</td>\n",
 656 |        "    </tr>\n",
 657 |        "    <tr>\n",
 658 |        "      <th>7</th>\n",
 659 |        "      <td>70</td>\n",
 660 |        "      <td>70000</td>\n",
 661 |        "    </tr>\n",
 662 |        "    <tr>\n",
 663 |        "      <th>8</th>\n",
 664 |        "      <td>80</td>\n",
 665 |        "      <td>80000</td>\n",
 666 |        "    </tr>\n",
 667 |        "    <tr>\n",
 668 |        "      <th>9</th>\n",
 669 |        "      <td>90</td>\n",
 670 |        "      <td>90000</td>\n",
 671 |        "    </tr>\n",
 672 |        "  </tbody>\n",
 673 |        "</table>\n",
 674 |        "</div>"
 675 |       ],
 676 |       "text/plain": [
 677 |        "   Age  Income\n",
 678 |        "0   22   25000\n",
 679 |        "1   25   27000\n",
 680 |        "2   29   30000\n",
 681 |        "3   34   32000\n",
 682 |        "4   45   40000\n",
 683 |        "5   52   50000\n",
 684 |        "6   61   60000\n",
 685 |        "7   70   70000\n",
 686 |        "8   80   80000\n",
 687 |        "9   90   90000"
 688 |       ]
 689 |      },
 690 |      "execution_count": 15,
 691 |      "metadata": {},
 692 |      "output_type": "execute_result"
 693 |     }
 694 |    ],
 695 |    "source": [
 696 |     "# Sample data\n",
 697 |     "data = {\n",
 698 |     "    'Age': [22, 25, 29, 34, 45, 52, 61, 70, 80, 90],\n",
 699 |     "    'Income': [25000, 27000, 30000, 32000, 40000, 50000, 60000, 70000, 80000, 90000]\n",
 700 |     "}\n",
 701 |     "\n",
 702 |     "df = pd.DataFrame(data)\n",
 703 |     "df"
 704 |    ]
 705 |   },
 706 |   {
 707 |    "cell_type": "code",
 708 |    "execution_count": 17,
 709 |    "metadata": {},
 710 |    "outputs": [
 711 |     {
 712 |      "data": {
 713 |       "text/html": [
 714 |        "<div>\n",
 715 |        "<style scoped>\n",
 716 |        "    .dataframe tbody tr th:only-of-type {\n",
 717 |        "        vertical-align: middle;\n",
 718 |        "    }\n",
 719 |        "\n",
 720 |        "    .dataframe tbody tr th {\n",
 721 |        "        vertical-align: top;\n",
 722 |        "    }\n",
 723 |        "\n",
 724 |        "    .dataframe thead th {\n",
 725 |        "        text-align: right;\n",
 726 |        "    }\n",
 727 |        "</style>\n",
 728 |        "<table border=\"1\" class=\"dataframe\">\n",
 729 |        "  <thead>\n",
 730 |        "    <tr style=\"text-align: right;\">\n",
 731 |        "      <th></th>\n",
 732 |        "      <th>Age</th>\n",
 733 |        "      <th>Income</th>\n",
 734 |        "      <th>Age Group</th>\n",
 735 |        "    </tr>\n",
 736 |        "  </thead>\n",
 737 |        "  <tbody>\n",
 738 |        "    <tr>\n",
 739 |        "      <th>0</th>\n",
 740 |        "      <td>22</td>\n",
 741 |        "      <td>25000</td>\n",
 742 |        "      <td>Young Adult</td>\n",
 743 |        "    </tr>\n",
 744 |        "    <tr>\n",
 745 |        "      <th>1</th>\n",
 746 |        "      <td>25</td>\n",
 747 |        "      <td>27000</td>\n",
 748 |        "      <td>Young Adult</td>\n",
 749 |        "    </tr>\n",
 750 |        "    <tr>\n",
 751 |        "      <th>2</th>\n",
 752 |        "      <td>29</td>\n",
 753 |        "      <td>30000</td>\n",
 754 |        "      <td>Young Adult</td>\n",
 755 |        "    </tr>\n",
 756 |        "    <tr>\n",
 757 |        "      <th>3</th>\n",
 758 |        "      <td>34</td>\n",
 759 |        "      <td>32000</td>\n",
 760 |        "      <td>Young Adult</td>\n",
 761 |        "    </tr>\n",
 762 |        "    <tr>\n",
 763 |        "      <th>4</th>\n",
 764 |        "      <td>45</td>\n",
 765 |        "      <td>40000</td>\n",
 766 |        "      <td>Adult</td>\n",
 767 |        "    </tr>\n",
 768 |        "    <tr>\n",
 769 |        "      <th>5</th>\n",
 770 |        "      <td>52</td>\n",
 771 |        "      <td>50000</td>\n",
 772 |        "      <td>Adult</td>\n",
 773 |        "    </tr>\n",
 774 |        "    <tr>\n",
 775 |        "      <th>6</th>\n",
 776 |        "      <td>61</td>\n",
 777 |        "      <td>60000</td>\n",
 778 |        "      <td>Senior</td>\n",
 779 |        "    </tr>\n",
 780 |        "    <tr>\n",
 781 |        "      <th>7</th>\n",
 782 |        "      <td>70</td>\n",
 783 |        "      <td>70000</td>\n",
 784 |        "      <td>Senior</td>\n",
 785 |        "    </tr>\n",
 786 |        "    <tr>\n",
 787 |        "      <th>8</th>\n",
 788 |        "      <td>80</td>\n",
 789 |        "      <td>80000</td>\n",
 790 |        "      <td>Senior</td>\n",
 791 |        "    </tr>\n",
 792 |        "    <tr>\n",
 793 |        "      <th>9</th>\n",
 794 |        "      <td>90</td>\n",
 795 |        "      <td>90000</td>\n",
 796 |        "      <td>Senior</td>\n",
 797 |        "    </tr>\n",
 798 |        "  </tbody>\n",
 799 |        "</table>\n",
 800 |        "</div>"
 801 |       ],
 802 |       "text/plain": [
 803 |        "   Age  Income    Age Group\n",
 804 |        "0   22   25000  Young Adult\n",
 805 |        "1   25   27000  Young Adult\n",
 806 |        "2   29   30000  Young Adult\n",
 807 |        "3   34   32000  Young Adult\n",
 808 |        "4   45   40000        Adult\n",
 809 |        "5   52   50000        Adult\n",
 810 |        "6   61   60000       Senior\n",
 811 |        "7   70   70000       Senior\n",
 812 |        "8   80   80000       Senior\n",
 813 |        "9   90   90000       Senior"
 814 |       ]
 815 |      },
 816 |      "execution_count": 17,
 817 |      "metadata": {},
 818 |      "output_type": "execute_result"
 819 |     }
 820 |    ],
 821 |    "source": [
 822 |     "# Equal-width binning for Age\n",
 823 |     "age_bins = [0, 18, 35, 60, 100]\n",
 824 |     "age_labels = ['Child', 'Young Adult', 'Adult', 'Senior']\n",
 825 |     "df['Age Group'] = pd.cut(df['Age'], bins=age_bins, labels=age_labels)\n",
 826 |     "df"
 827 |    ]
 828 |   },
 829 |   {
 830 |    "cell_type": "code",
 831 |    "execution_count": 19,
 832 |    "metadata": {},
 833 |    "outputs": [
 834 |     {
 835 |      "data": {
 836 |       "text/html": [
 837 |        "<div>\n",
 838 |        "<style scoped>\n",
 839 |        "    .dataframe tbody tr th:only-of-type {\n",
 840 |        "        vertical-align: middle;\n",
 841 |        "    }\n",
 842 |        "\n",
 843 |        "    .dataframe tbody tr th {\n",
 844 |        "        vertical-align: top;\n",
 845 |        "    }\n",
 846 |        "\n",
 847 |        "    .dataframe thead th {\n",
 848 |        "        text-align: right;\n",
 849 |        "    }\n",
 850 |        "</style>\n",
 851 |        "<table border=\"1\" class=\"dataframe\">\n",
 852 |        "  <thead>\n",
 853 |        "    <tr style=\"text-align: right;\">\n",
 854 |        "      <th></th>\n",
 855 |        "      <th>Age</th>\n",
 856 |        "      <th>Income</th>\n",
 857 |        "      <th>Age Group</th>\n",
 858 |        "      <th>Income Quartile</th>\n",
 859 |        "    </tr>\n",
 860 |        "  </thead>\n",
 861 |        "  <tbody>\n",
 862 |        "    <tr>\n",
 863 |        "      <th>0</th>\n",
 864 |        "      <td>22</td>\n",
 865 |        "      <td>25000</td>\n",
 866 |        "      <td>Young Adult</td>\n",
 867 |        "      <td>Q1</td>\n",
 868 |        "    </tr>\n",
 869 |        "    <tr>\n",
 870 |        "      <th>1</th>\n",
 871 |        "      <td>25</td>\n",
 872 |        "      <td>27000</td>\n",
 873 |        "      <td>Young Adult</td>\n",
 874 |        "      <td>Q1</td>\n",
 875 |        "    </tr>\n",
 876 |        "    <tr>\n",
 877 |        "      <th>2</th>\n",
 878 |        "      <td>29</td>\n",
 879 |        "      <td>30000</td>\n",
 880 |        "      <td>Young Adult</td>\n",
 881 |        "      <td>Q1</td>\n",
 882 |        "    </tr>\n",
 883 |        "    <tr>\n",
 884 |        "      <th>3</th>\n",
 885 |        "      <td>34</td>\n",
 886 |        "      <td>32000</td>\n",
 887 |        "      <td>Young Adult</td>\n",
 888 |        "      <td>Q2</td>\n",
 889 |        "    </tr>\n",
 890 |        "    <tr>\n",
 891 |        "      <th>4</th>\n",
 892 |        "      <td>45</td>\n",
 893 |        "      <td>40000</td>\n",
 894 |        "      <td>Adult</td>\n",
 895 |        "      <td>Q2</td>\n",
 896 |        "    </tr>\n",
 897 |        "    <tr>\n",
 898 |        "      <th>5</th>\n",
 899 |        "      <td>52</td>\n",
 900 |        "      <td>50000</td>\n",
 901 |        "      <td>Adult</td>\n",
 902 |        "      <td>Q3</td>\n",
 903 |        "    </tr>\n",
 904 |        "    <tr>\n",
 905 |        "      <th>6</th>\n",
 906 |        "      <td>61</td>\n",
 907 |        "      <td>60000</td>\n",
 908 |        "      <td>Senior</td>\n",
 909 |        "      <td>Q3</td>\n",
 910 |        "    </tr>\n",
 911 |        "    <tr>\n",
 912 |        "      <th>7</th>\n",
 913 |        "      <td>70</td>\n",
 914 |        "      <td>70000</td>\n",
 915 |        "      <td>Senior</td>\n",
 916 |        "      <td>Q4</td>\n",
 917 |        "    </tr>\n",
 918 |        "    <tr>\n",
 919 |        "      <th>8</th>\n",
 920 |        "      <td>80</td>\n",
 921 |        "      <td>80000</td>\n",
 922 |        "      <td>Senior</td>\n",
 923 |        "      <td>Q4</td>\n",
 924 |        "    </tr>\n",
 925 |        "    <tr>\n",
 926 |        "      <th>9</th>\n",
 927 |        "      <td>90</td>\n",
 928 |        "      <td>90000</td>\n",
 929 |        "      <td>Senior</td>\n",
 930 |        "      <td>Q4</td>\n",
 931 |        "    </tr>\n",
 932 |        "  </tbody>\n",
 933 |        "</table>\n",
 934 |        "</div>"
 935 |       ],
 936 |       "text/plain": [
 937 |        "   Age  Income    Age Group Income Quartile\n",
 938 |        "0   22   25000  Young Adult              Q1\n",
 939 |        "1   25   27000  Young Adult              Q1\n",
 940 |        "2   29   30000  Young Adult              Q1\n",
 941 |        "3   34   32000  Young Adult              Q2\n",
 942 |        "4   45   40000        Adult              Q2\n",
 943 |        "5   52   50000        Adult              Q3\n",
 944 |        "6   61   60000       Senior              Q3\n",
 945 |        "7   70   70000       Senior              Q4\n",
 946 |        "8   80   80000       Senior              Q4\n",
 947 |        "9   90   90000       Senior              Q4"
 948 |       ]
 949 |      },
 950 |      "execution_count": 19,
 951 |      "metadata": {},
 952 |      "output_type": "execute_result"
 953 |     }
 954 |    ],
 955 |    "source": [
 956 |     "# Quantile-based binning for Income\n",
 957 |     "df['Income Quartile'] = pd.qcut(df['Income'], 4, labels=['Q1', 'Q2', 'Q3', 'Q4'])\n",
 958 |     "df"
 959 |    ]
 960 |   },
 961 |   {
 962 |    "cell_type": "markdown",
 963 |    "metadata": {},
 964 |    "source": [
 965 |     "### BONUS. Interpolating Data"
 966 |    ]
 967 |   },
 968 |   {
 969 |    "cell_type": "code",
 970 |    "execution_count": 21,
 971 |    "metadata": {},
 972 |    "outputs": [
 973 |     {
 974 |      "data": {
 975 |       "text/html": [
 976 |        "<div>\n",
 977 |        "<style scoped>\n",
 978 |        "    .dataframe tbody tr th:only-of-type {\n",
 979 |        "        vertical-align: middle;\n",
 980 |        "    }\n",
 981 |        "\n",
 982 |        "    .dataframe tbody tr th {\n",
 983 |        "        vertical-align: top;\n",
 984 |        "    }\n",
 985 |        "\n",
 986 |        "    .dataframe thead th {\n",
 987 |        "        text-align: right;\n",
 988 |        "    }\n",
 989 |        "</style>\n",
 990 |        "<table border=\"1\" class=\"dataframe\">\n",
 991 |        "  <thead>\n",
 992 |        "    <tr style=\"text-align: right;\">\n",
 993 |        "      <th></th>\n",
 994 |        "      <th>Time</th>\n",
 995 |        "      <th>Value</th>\n",
 996 |        "    </tr>\n",
 997 |        "  </thead>\n",
 998 |        "  <tbody>\n",
 999 |        "    <tr>\n",
1000 |        "      <th>0</th>\n",
1001 |        "      <td>2020-01-01</td>\n",
1002 |        "      <td>1.0</td>\n",
1003 |        "    </tr>\n",
1004 |        "    <tr>\n",
1005 |        "      <th>1</th>\n",
1006 |        "      <td>2020-01-02</td>\n",
1007 |        "      <td>NaN</td>\n",
1008 |        "    </tr>\n",
1009 |        "    <tr>\n",
1010 |        "      <th>2</th>\n",
1011 |        "      <td>2020-01-03</td>\n",
1012 |        "      <td>NaN</td>\n",
1013 |        "    </tr>\n",
1014 |        "    <tr>\n",
1015 |        "      <th>3</th>\n",
1016 |        "      <td>2020-01-04</td>\n",
1017 |        "      <td>4.0</td>\n",
1018 |        "    </tr>\n",
1019 |        "    <tr>\n",
1020 |        "      <th>4</th>\n",
1021 |        "      <td>2020-01-05</td>\n",
1022 |        "      <td>5.0</td>\n",
1023 |        "    </tr>\n",
1024 |        "  </tbody>\n",
1025 |        "</table>\n",
1026 |        "</div>"
1027 |       ],
1028 |       "text/plain": [
1029 |        "        Time  Value\n",
1030 |        "0 2020-01-01    1.0\n",
1031 |        "1 2020-01-02    NaN\n",
1032 |        "2 2020-01-03    NaN\n",
1033 |        "3 2020-01-04    4.0\n",
1034 |        "4 2020-01-05    5.0"
1035 |       ]
1036 |      },
1037 |      "execution_count": 21,
1038 |      "metadata": {},
1039 |      "output_type": "execute_result"
1040 |     }
1041 |    ],
1042 |    "source": [
1043 |     "import numpy as np\n",
1044 |     "\n",
1045 |     "df = pd.DataFrame({'Time': pd.date_range(start='1/1/2020', periods=5, freq='D'),\n",
1046 |     "                   'Value': [1, np.nan, np.nan, 4, 5]})\n",
1047 |     "df"
1048 |    ]
1049 |   },
1050 |   {
1051 |    "cell_type": "code",
1052 |    "execution_count": 23,
1053 |    "metadata": {},
1054 |    "outputs": [
1055 |     {
1056 |      "data": {
1057 |       "text/html": [
1058 |        "<div>\n",
1059 |        "<style scoped>\n",
1060 |        "    .dataframe tbody tr th:only-of-type {\n",
1061 |        "        vertical-align: middle;\n",
1062 |        "    }\n",
1063 |        "\n",
1064 |        "    .dataframe tbody tr th {\n",
1065 |        "        vertical-align: top;\n",
1066 |        "    }\n",
1067 |        "\n",
1068 |        "    .dataframe thead th {\n",
1069 |        "        text-align: right;\n",
1070 |        "    }\n",
1071 |        "</style>\n",
1072 |        "<table border=\"1\" class=\"dataframe\">\n",
1073 |        "  <thead>\n",
1074 |        "    <tr style=\"text-align: right;\">\n",
1075 |        "      <th></th>\n",
1076 |        "      <th>Time</th>\n",
1077 |        "      <th>Value</th>\n",
1078 |        "      <th>Interpolated</th>\n",
1079 |        "    </tr>\n",
1080 |        "  </thead>\n",
1081 |        "  <tbody>\n",
1082 |        "    <tr>\n",
1083 |        "      <th>0</th>\n",
1084 |        "      <td>2020-01-01</td>\n",
1085 |        "      <td>1.0</td>\n",
1086 |        "      <td>1.0</td>\n",
1087 |        "    </tr>\n",
1088 |        "    <tr>\n",
1089 |        "      <th>1</th>\n",
1090 |        "      <td>2020-01-02</td>\n",
1091 |        "      <td>NaN</td>\n",
1092 |        "      <td>2.0</td>\n",
1093 |        "    </tr>\n",
1094 |        "    <tr>\n",
1095 |        "      <th>2</th>\n",
1096 |        "      <td>2020-01-03</td>\n",
1097 |        "      <td>NaN</td>\n",
1098 |        "      <td>3.0</td>\n",
1099 |        "    </tr>\n",
1100 |        "    <tr>\n",
1101 |        "      <th>3</th>\n",
1102 |        "      <td>2020-01-04</td>\n",
1103 |        "      <td>4.0</td>\n",
1104 |        "      <td>4.0</td>\n",
1105 |        "    </tr>\n",
1106 |        "    <tr>\n",
1107 |        "      <th>4</th>\n",
1108 |        "      <td>2020-01-05</td>\n",
1109 |        "      <td>5.0</td>\n",
1110 |        "      <td>5.0</td>\n",
1111 |        "    </tr>\n",
1112 |        "  </tbody>\n",
1113 |        "</table>\n",
1114 |        "</div>"
1115 |       ],
1116 |       "text/plain": [
1117 |        "        Time  Value  Interpolated\n",
1118 |        "0 2020-01-01    1.0           1.0\n",
1119 |        "1 2020-01-02    NaN           2.0\n",
1120 |        "2 2020-01-03    NaN           3.0\n",
1121 |        "3 2020-01-04    4.0           4.0\n",
1122 |        "4 2020-01-05    5.0           5.0"
1123 |       ]
1124 |      },
1125 |      "execution_count": 23,
1126 |      "metadata": {},
1127 |      "output_type": "execute_result"
1128 |     }
1129 |    ],
1130 |    "source": [
1131 |     "df['Interpolated'] = df['Value'].interpolate(method='linear')\n",
1132 |     "df"
1133 |    ]
1134 |   }
1135 |  ],
1136 |  "metadata": {
1137 |   "kernelspec": {
1138 |    "display_name": "general_env",
1139 |    "language": "python",
1140 |    "name": "python3"
1141 |   },
1142 |   "language_info": {
1143 |    "codemirror_mode": {
1144 |     "name": "ipython",
1145 |     "version": 3
1146 |    },
1147 |    "file_extension": ".py",
1148 |    "mimetype": "text/x-python",
1149 |    "name": "python",
1150 |    "nbconvert_exporter": "python",
1151 |    "pygments_lexer": "ipython3",
1152 |    "version": "3.12.3"
1153 |   }
1154 |  },
1155 |  "nbformat": 4,
1156 |  "nbformat_minor": 2
1157 | }
1158 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # projects


--------------------------------------------------------------------------------
/ai_coding_agent_tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Coding Agents with smolagents and Gemini Flash"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Setup"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "### Libraries"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 3,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMModel\n",
 31 |     "import os"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "### Gemini API Key"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 1,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "os.environ[\"GEMINI_API_KEY\"] = \"API Key Goes Here\""
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 4,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "# Gemini\n",
 57 |     "model = LiteLLMModel(model_id=\"gemini/gemini-1.5-flash\")"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 14,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "# Ollama (Llama3.2)\n",
 67 |     "# model = LiteLLMModel(model_id=\"ollama/llama3.2\")\n"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "## Agent Creation"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 5,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "# Define the Feature Selection Agent\n",
 84 |     "feature_selection_agent = CodeAgent(\n",
 85 |     "    tools=[DuckDuckGoSearchTool], # search internet if necessary\n",
 86 |     "    additional_authorized_imports=['pandas','statsmodels','sklearn','numpy','json'], # packages for code interpreter\n",
 87 |     "    model=model # model set above\n",
 88 |     ")\n"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "### Set Task Prompt"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 6,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "# Task for the agent\n",
105 |     "task = \"\"\"\n",
106 |     "1. Load the Diabetes dataset from the 'sklearn' library using the following code:\n",
107 |     "        from sklearn.datasets import load_diabetes\n",
108 |     "        import pandas as pd\n",
109 |     "\n",
110 |     "        # Load the dataset\n",
111 |     "        data, target = load_diabetes(return_X_y=True, as_frame=False)\n",
112 |     "\n",
113 |     "        # Create a DataFrame\n",
114 |     "        df = pd.DataFrame(data, columns=load_diabetes().feature_names)\n",
115 |     "        df['target'] = target\n",
116 |     "2. Split data with a train/test split of 75%/25%\n",
117 |     "3. Create a linear regression model on the training data predicting the target variable using the \"sklearn\" or \"statsmodels\" library.\n",
118 |     "4. Execute on a strategy of combination of up to 3 predictors that attains the lowest root mean square error (RMSE) on the testing data. \n",
119 |     "   (You can't use the target variable).\n",
120 |     "5. Use feature engineering as needed to improve model performance.\n",
121 |     "6. Based on the lowest RMSE of each model for the testing data, provide a final list of predictors for the top 5 models\n",
122 |     "7. Output as a table\n",
123 |     "\"\"\""
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "## Execute the agent and task"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": null,
136 |    "metadata": {},
137 |    "outputs": [],
138 |    "source": [
139 |     "result = feature_selection_agent.run(task)"
140 |    ]
141 |   }
142 |  ],
143 |  "metadata": {
144 |   "kernelspec": {
145 |    "display_name": "smol_env",
146 |    "language": "python",
147 |    "name": "python3"
148 |   },
149 |   "language_info": {
150 |    "codemirror_mode": {
151 |     "name": "ipython",
152 |     "version": 3
153 |    },
154 |    "file_extension": ".py",
155 |    "mimetype": "text/x-python",
156 |    "name": "python",
157 |    "nbconvert_exporter": "python",
158 |    "pygments_lexer": "ipython3",
159 |    "version": "3.12.8"
160 |   }
161 |  },
162 |  "nbformat": 4,
163 |  "nbformat_minor": 2
164 | }
165 | 


--------------------------------------------------------------------------------
/ai_image_generator.py:
--------------------------------------------------------------------------------
  1 | # Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts)
  2 | 
  3 | # PART 1: LIBRARY IMPORTS
  4 | 
  5 | import streamlit as st
  6 | import replicate
  7 | import os
  8 | import requests
  9 | from PIL import Image
 10 | from io import BytesIO
 11 | 
 12 | 
 13 | # PART 2: SETUP REPLICATE CREDENTIALS AND AUTHENTICATION
 14 | 
 15 | # Set up your Replicate API key (optionally from environment variable)
 16 | REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN")  # You can store your API key in an environment variable
 17 | 
 18 | if REPLICATE_API_TOKEN is None:
 19 |     st.error("Replicate API token not found. Please set it in your environment.")
 20 |     st.stop()
 21 | 
 22 | # Authenticate with Replicate using the API token
 23 | replicate.Client(api_token=REPLICATE_API_TOKEN)
 24 | 
 25 | 
 26 | # PART 3: STREAMLIT WEBAPP
 27 | 
 28 | # Initialize session state for storing the generated image URL
 29 | if 'image_url' not in st.session_state:
 30 |     st.session_state['image_url'] = None
 31 | 
 32 | 
 33 | # PART 3A: SIDEBAR OPTIONS
 34 | 
 35 | # Sidebar inputs
 36 | with st.sidebar:
 37 | 
 38 |     # Title of the app
 39 |     st.title('AI Image Generation: Flux Schnell')
 40 | 
 41 |     st.header("Prompt and Options")
 42 | 
 43 |     # Input box for the user to type the prompt (using text_area for multiline input)
 44 |     prompt = st.text_area('Enter a prompt to generate an image', height=50)
 45 | 
 46 |     # Checkbox to enable or disable random seed
 47 |     use_random_seed = st.checkbox('Use Random Seed', value=True)
 48 | 
 49 |     # Slider for random seed (only if the checkbox is checked)
 50 |     if use_random_seed:
 51 |         random_seed = st.slider('Random Seed', 0, 1000, 435)
 52 |     else:
 53 |         random_seed = None
 54 | 
 55 |     # Slider for output quality
 56 |     output_quality = st.slider('Output Quality', 50, 100, 80)
 57 | 
 58 |     # Create two columns for Generate and Download buttons
 59 |     col1, col2 = st.columns([1, 1])
 60 | 
 61 |     # Button to submit the prompt and generate image
 62 |     generate_button = col1.button('Generate Image')
 63 | 
 64 | 
 65 | # PART 4A: MAIN CONTENT AREA (IMAGE GENERATION AND ACCESS)
 66 | 
 67 | # Check if the button was pressed and if there is a prompt
 68 | if generate_button and prompt:
 69 |     with st.spinner('Generating image...'):
 70 |         try:
 71 |             # Call the Flux Schnell model on Replicate
 72 |             input_data = {
 73 |                 "prompt": prompt,
 74 |                 "aspect_ratio": '3:2',  # Set the aspect ratio
 75 |                 "quality": output_quality  # Set the output quality
 76 |             }
 77 | 
 78 |             # Add random seed only if it's enabled
 79 |             if random_seed is not None:
 80 |                 input_data["seed"] = random_seed
 81 | 
 82 |             # Use replicate.run to invoke the model
 83 |             output = replicate.run(
 84 |                 "black-forest-labs/flux-schnell",  # Model name
 85 |                 input=input_data  # Input to the model
 86 |             )
 87 | 
 88 |             # Store the generated image URL in session state
 89 |             st.session_state['image_url'] = output[0]  # Assuming the image is the first element in output
 90 | 
 91 |         except Exception as e:
 92 |             st.error(f"An error occurred: {e}")
 93 | 
 94 | # If an image URL is present in session state, display the image and download button
 95 | if st.session_state['image_url']:
 96 |     # Display the image
 97 |     st.image(st.session_state['image_url'], caption='Generated Image')
 98 | 
 99 |     # Download the image from the URL
100 |     response = requests.get(st.session_state['image_url'])
101 |     image = Image.open(BytesIO(response.content))
102 | 
103 |     # Convert the image to a binary stream and save it as .jpg
104 |     img_buffer = BytesIO()
105 |     image.save(img_buffer, format="JPEG") 
106 |     img_buffer.seek(0)
107 | 
108 |     # Display the download button in the second column
109 |     with col2:
110 |         st.download_button(
111 |             label="Download Image",
112 |             data=img_buffer,
113 |             file_name="generated_image.jpg",
114 |             mime="image/jpeg"
115 |         )


--------------------------------------------------------------------------------
/ai_sentiment_analysis_gemini.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# AI Pipeline: Blue Sky Scraper + Gemini Flash Sentiment Analysis"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "markdown",
  12 |    "metadata": {},
  13 |    "source": [
  14 |     "### Libraries"
  15 |    ]
  16 |   },
  17 |   {
  18 |    "cell_type": "code",
  19 |    "execution_count": 1,
  20 |    "metadata": {},
  21 |    "outputs": [],
  22 |    "source": [
  23 |     "import requests\n",
  24 |     "import pandas as pd\n",
  25 |     "import google.generativeai as genai\n",
  26 |     "import enum\n",
  27 |     "from typing_extensions import TypedDict\n",
  28 |     "import json\n",
  29 |     "import plotly.express as px"
  30 |    ]
  31 |   },
  32 |   {
  33 |    "cell_type": "markdown",
  34 |    "metadata": {},
  35 |    "source": [
  36 |     "## 1. Configuration"
  37 |    ]
  38 |   },
  39 |   {
  40 |    "cell_type": "markdown",
  41 |    "metadata": {},
  42 |    "source": [
  43 |     "### Authentication and API Keys"
  44 |    ]
  45 |   },
  46 |   {
  47 |    "cell_type": "code",
  48 |    "execution_count": 2,
  49 |    "metadata": {},
  50 |    "outputs": [],
  51 |    "source": [
  52 |     "# Replace with your Bluesky handle and password\n",
  53 |     "BLUESKY_HANDLE = 'handle goes here'\n",
  54 |     "BLUESKY_PASSWORD = 'password goes here'\n",
  55 |     "\n",
  56 |     "# Replace with your Google AI Studio API key\n",
  57 |     "genai.configure(api_key='api key goes here')"
  58 |    ]
  59 |   },
  60 |   {
  61 |    "cell_type": "markdown",
  62 |    "metadata": {},
  63 |    "source": [
  64 |     "### Gemini Model"
  65 |    ]
  66 |   },
  67 |   {
  68 |    "cell_type": "code",
  69 |    "execution_count": 3,
  70 |    "metadata": {},
  71 |    "outputs": [],
  72 |    "source": [
  73 |     "model = genai.GenerativeModel(\"gemini-1.5-flash\") # gemini-2.0-flash-exp"
  74 |    ]
  75 |   },
  76 |   {
  77 |    "cell_type": "markdown",
  78 |    "metadata": {},
  79 |    "source": [
  80 |     "### Stock (or keyword to analyze)"
  81 |    ]
  82 |   },
  83 |   {
  84 |    "cell_type": "code",
  85 |    "execution_count": 4,
  86 |    "metadata": {},
  87 |    "outputs": [],
  88 |    "source": [
  89 |     "search_term = 'ADBE'"
  90 |    ]
  91 |   },
  92 |   {
  93 |    "cell_type": "markdown",
  94 |    "metadata": {},
  95 |    "source": [
  96 |     "### Number of posts to return"
  97 |    ]
  98 |   },
  99 |   {
 100 |    "cell_type": "code",
 101 |    "execution_count": 5,
 102 |    "metadata": {},
 103 |    "outputs": [],
 104 |    "source": [
 105 |     "n = 100  # Number of latest posts to retrieve"
 106 |    ]
 107 |   },
 108 |   {
 109 |    "cell_type": "markdown",
 110 |    "metadata": {},
 111 |    "source": [
 112 |     "## 2. Blue Sky Web Scraper"
 113 |    ]
 114 |   },
 115 |   {
 116 |    "cell_type": "code",
 117 |    "execution_count": 6,
 118 |    "metadata": {},
 119 |    "outputs": [],
 120 |    "source": [
 121 |     "# Authenticate and obtain access token\n",
 122 |     "auth_response = requests.post(\n",
 123 |     "    'https://bsky.social/xrpc/com.atproto.server.createSession',\n",
 124 |     "    json={'identifier': BLUESKY_HANDLE, 'password': BLUESKY_PASSWORD}\n",
 125 |     ")\n",
 126 |     "auth_response.raise_for_status()\n",
 127 |     "access_token = auth_response.json().get('accessJwt')\n"
 128 |    ]
 129 |   },
 130 |   {
 131 |    "cell_type": "code",
 132 |    "execution_count": 7,
 133 |    "metadata": {},
 134 |    "outputs": [],
 135 |    "source": [
 136 |     "# Set up the request headers with the access token\n",
 137 |     "headers = {'Authorization': f'Bearer {access_token}'}\n",
 138 |     "\n",
 139 |     "# Define the search parameters\n",
 140 |     "params = {\n",
 141 |     "    'q': search_term,\n",
 142 |     "    'sort': 'latest',\n",
 143 |     "    'limit': n\n",
 144 |     "}\n",
 145 |     "\n",
 146 |     "# Perform the search request\n",
 147 |     "search_response = requests.get(\n",
 148 |     "    'https://bsky.social/xrpc/app.bsky.feed.searchPosts',\n",
 149 |     "    headers=headers,\n",
 150 |     "    params=params\n",
 151 |     ")\n",
 152 |     "search_response.raise_for_status()\n",
 153 |     "posts = search_response.json().get('posts', [])"
 154 |    ]
 155 |   },
 156 |   {
 157 |    "cell_type": "code",
 158 |    "execution_count": 8,
 159 |    "metadata": {},
 160 |    "outputs": [],
 161 |    "source": [
 162 |     "# Extract data and create a list of dictionaries\n",
 163 |     "data = []\n",
 164 |     "for post in posts:\n",
 165 |     "    author = post.get('author', {}).get('handle', 'Unknown')\n",
 166 |     "    content = post.get('record', {}).get('text', 'No content')\n",
 167 |     "    created_at = post.get('record', {}).get('createdAt', 'Unknown date')\n",
 168 |     "    data.append({'Date': created_at, 'Content': content, 'Author': author})\n",
 169 |     "\n",
 170 |     "# Convert list of dictionaries to DataFrame\n",
 171 |     "df = pd.DataFrame(data)\n",
 172 |     "\n",
 173 |     "# Convert 'Date' column to datetime format for better handling\n",
 174 |     "df['Date'] = pd.to_datetime(df['Date'], errors='coerce')"
 175 |    ]
 176 |   },
 177 |   {
 178 |    "cell_type": "code",
 179 |    "execution_count": 9,
 180 |    "metadata": {},
 181 |    "outputs": [
 182 |     {
 183 |      "data": {
 184 |       "text/html": [
 185 |        "<div>\n",
 186 |        "<style scoped>\n",
 187 |        "    .dataframe tbody tr th:only-of-type {\n",
 188 |        "        vertical-align: middle;\n",
 189 |        "    }\n",
 190 |        "\n",
 191 |        "    .dataframe tbody tr th {\n",
 192 |        "        vertical-align: top;\n",
 193 |        "    }\n",
 194 |        "\n",
 195 |        "    .dataframe thead th {\n",
 196 |        "        text-align: right;\n",
 197 |        "    }\n",
 198 |        "</style>\n",
 199 |        "<table border=\"1\" class=\"dataframe\">\n",
 200 |        "  <thead>\n",
 201 |        "    <tr style=\"text-align: right;\">\n",
 202 |        "      <th></th>\n",
 203 |        "      <th>Date</th>\n",
 204 |        "      <th>Content</th>\n",
 205 |        "    </tr>\n",
 206 |        "  </thead>\n",
 207 |        "  <tbody>\n",
 208 |        "    <tr>\n",
 209 |        "      <th>0</th>\n",
 210 |        "      <td>2024-12-31 19:10:07.207000+00:00</td>\n",
 211 |        "      <td>\\n#MarjorieTaylorGreene Went Christmas Shoppin...</td>\n",
 212 |        "    </tr>\n",
 213 |        "    <tr>\n",
 214 |        "      <th>1</th>\n",
 215 |        "      <td>2024-12-31 17:39:40.015000+00:00</td>\n",
 216 |        "      <td>Over the past year #AJB and #ADBE swapped from...</td>\n",
 217 |        "    </tr>\n",
 218 |        "    <tr>\n",
 219 |        "      <th>2</th>\n",
 220 |        "      <td>2024-12-31 17:39:40.014000+00:00</td>\n",
 221 |        "      <td>The major changes to the port. over the year w...</td>\n",
 222 |        "    </tr>\n",
 223 |        "    <tr>\n",
 224 |        "      <th>3</th>\n",
 225 |        "      <td>2024-12-31 16:03:55.965884+00:00</td>\n",
 226 |        "      <td>📊 ADBE Market Analysis - Dec 31, 2024\\n\\nCurre...</td>\n",
 227 |        "    </tr>\n",
 228 |        "    <tr>\n",
 229 |        "      <th>4</th>\n",
 230 |        "      <td>2024-12-30 16:01:16.570000+00:00</td>\n",
 231 |        "      <td>Adobe knows that #DEI is good for people, good...</td>\n",
 232 |        "    </tr>\n",
 233 |        "    <tr>\n",
 234 |        "      <th>...</th>\n",
 235 |        "      <td>...</td>\n",
 236 |        "      <td>...</td>\n",
 237 |        "    </tr>\n",
 238 |        "    <tr>\n",
 239 |        "      <th>93</th>\n",
 240 |        "      <td>2024-12-12 17:19:22.999000+00:00</td>\n",
 241 |        "      <td>Hello, Investors! 👋\\nStocks were down modestly...</td>\n",
 242 |        "    </tr>\n",
 243 |        "    <tr>\n",
 244 |        "      <th>94</th>\n",
 245 |        "      <td>2024-12-12 16:53:12.278000+00:00</td>\n",
 246 |        "      <td>Adobe posts record-breaking revenue 📈 but inve...</td>\n",
 247 |        "    </tr>\n",
 248 |        "    <tr>\n",
 249 |        "      <th>95</th>\n",
 250 |        "      <td>2024-12-12 16:20:44.597227+00:00</td>\n",
 251 |        "      <td>$ADBE Technical Analysis | Dec 12\\nPrice: $549...</td>\n",
 252 |        "    </tr>\n",
 253 |        "    <tr>\n",
 254 |        "      <th>96</th>\n",
 255 |        "      <td>2024-12-12 15:49:51.063000+00:00</td>\n",
 256 |        "      <td>$ADBE: Adobe shares dropped 13% as its 2025 ou...</td>\n",
 257 |        "    </tr>\n",
 258 |        "    <tr>\n",
 259 |        "      <th>97</th>\n",
 260 |        "      <td>2024-12-12 15:38:39.435000+00:00</td>\n",
 261 |        "      <td>$ADBE FY-2024: Strong Q3 Perf: Rev Surpasses E...</td>\n",
 262 |        "    </tr>\n",
 263 |        "  </tbody>\n",
 264 |        "</table>\n",
 265 |        "<p>98 rows × 2 columns</p>\n",
 266 |        "</div>"
 267 |       ],
 268 |       "text/plain": [
 269 |        "                               Date  \\\n",
 270 |        "0  2024-12-31 19:10:07.207000+00:00   \n",
 271 |        "1  2024-12-31 17:39:40.015000+00:00   \n",
 272 |        "2  2024-12-31 17:39:40.014000+00:00   \n",
 273 |        "3  2024-12-31 16:03:55.965884+00:00   \n",
 274 |        "4  2024-12-30 16:01:16.570000+00:00   \n",
 275 |        "..                              ...   \n",
 276 |        "93 2024-12-12 17:19:22.999000+00:00   \n",
 277 |        "94 2024-12-12 16:53:12.278000+00:00   \n",
 278 |        "95 2024-12-12 16:20:44.597227+00:00   \n",
 279 |        "96 2024-12-12 15:49:51.063000+00:00   \n",
 280 |        "97 2024-12-12 15:38:39.435000+00:00   \n",
 281 |        "\n",
 282 |        "                                              Content  \n",
 283 |        "0   \\n#MarjorieTaylorGreene Went Christmas Shoppin...  \n",
 284 |        "1   Over the past year #AJB and #ADBE swapped from...  \n",
 285 |        "2   The major changes to the port. over the year w...  \n",
 286 |        "3   📊 ADBE Market Analysis - Dec 31, 2024\\n\\nCurre...  \n",
 287 |        "4   Adobe knows that #DEI is good for people, good...  \n",
 288 |        "..                                                ...  \n",
 289 |        "93  Hello, Investors! 👋\\nStocks were down modestly...  \n",
 290 |        "94  Adobe posts record-breaking revenue 📈 but inve...  \n",
 291 |        "95  $ADBE Technical Analysis | Dec 12\\nPrice: $549...  \n",
 292 |        "96  $ADBE: Adobe shares dropped 13% as its 2025 ou...  \n",
 293 |        "97  $ADBE FY-2024: Strong Q3 Perf: Rev Surpasses E...  \n",
 294 |        "\n",
 295 |        "[98 rows x 2 columns]"
 296 |       ]
 297 |      },
 298 |      "execution_count": 9,
 299 |      "metadata": {},
 300 |      "output_type": "execute_result"
 301 |     }
 302 |    ],
 303 |    "source": [
 304 |     "# Display the DataFrame\n",
 305 |     "df[['Date','Content']]"
 306 |    ]
 307 |   },
 308 |   {
 309 |    "cell_type": "markdown",
 310 |    "metadata": {},
 311 |    "source": [
 312 |     "## 3. Google Gemini Sentiment Analysis"
 313 |    ]
 314 |   },
 315 |   {
 316 |    "cell_type": "code",
 317 |    "execution_count": 10,
 318 |    "metadata": {},
 319 |    "outputs": [],
 320 |    "source": [
 321 |     "class Sentiment(enum.Enum):\n",
 322 |     "    POSITIVE = \"positive\"\n",
 323 |     "    NEGATIVE = \"negative\"\n",
 324 |     "    NEUTRAL = \"neutral\"\n",
 325 |     "\n",
 326 |     "class AnalysisResult(TypedDict):\n",
 327 |     "    is_stock_related: bool\n",
 328 |     "    sentiment: Sentiment\n"
 329 |    ]
 330 |   },
 331 |   {
 332 |    "cell_type": "code",
 333 |    "execution_count": 11,
 334 |    "metadata": {},
 335 |    "outputs": [],
 336 |    "source": [
 337 |     "\n",
 338 |     "def analyze_post(content: str) -> AnalysisResult:\n",
 339 |     "    prompt = f\"\"\"\n",
 340 |     "    Analyze the following post and determine:\n",
 341 |     "    1. Whether it is related to the company, {search_term}, and relates to or discusses \n",
 342 |     "        past, current, or future stock performance of {search_term} explicitly.\n",
 343 |     "    2. If related, classify the sentiment as positive, negative, or neutral.\n",
 344 |     "\n",
 345 |     "    Post: \"{content}\"\n",
 346 |     "    \"\"\"\n",
 347 |     "    response = model.generate_content(\n",
 348 |     "        prompt,\n",
 349 |     "        generation_config=genai.GenerationConfig(\n",
 350 |     "            response_mime_type=\"application/json\",\n",
 351 |     "            response_schema=AnalysisResult\n",
 352 |     "        )\n",
 353 |     "    )\n",
 354 |     "    if response.candidates:\n",
 355 |     "        candidate_content = response.candidates[0].content\n",
 356 |     "        result_text = ''.join(part.text for part in candidate_content.parts)\n",
 357 |     "        try:\n",
 358 |     "            result = json.loads(result_text)\n",
 359 |     "            is_stock_related = result.get('is_stock_related')\n",
 360 |     "            sentiment = result.get('sentiment')\n",
 361 |     "            if is_stock_related is not None and sentiment is not None:\n",
 362 |     "                return is_stock_related, sentiment\n",
 363 |     "            else:\n",
 364 |     "                print(\"Missing expected keys in the response\")\n",
 365 |     "                return None, None\n",
 366 |     "        except json.JSONDecodeError:\n",
 367 |     "            print(\"Failed to decode JSON response\")\n",
 368 |     "            return None, None\n",
 369 |     "    else:\n",
 370 |     "        print(\"No candidates returned\")\n",
 371 |     "        return None, None\n"
 372 |    ]
 373 |   },
 374 |   {
 375 |    "cell_type": "code",
 376 |    "execution_count": 12,
 377 |    "metadata": {},
 378 |    "outputs": [
 379 |     {
 380 |      "name": "stdout",
 381 |      "output_type": "stream",
 382 |      "text": [
 383 |       "Missing expected keys in the response\n",
 384 |       "Missing expected keys in the response\n"
 385 |      ]
 386 |     }
 387 |    ],
 388 |    "source": [
 389 |     "# Apply the analysis to each post\n",
 390 |     "df[['is_stock_related', 'sentiment']] = df['Content'].apply(\n",
 391 |     "    lambda x: pd.Series(analyze_post(x))\n",
 392 |     ")"
 393 |    ]
 394 |   },
 395 |   {
 396 |    "cell_type": "code",
 397 |    "execution_count": 13,
 398 |    "metadata": {},
 399 |    "outputs": [
 400 |     {
 401 |      "data": {
 402 |       "text/html": [
 403 |        "<div>\n",
 404 |        "<style scoped>\n",
 405 |        "    .dataframe tbody tr th:only-of-type {\n",
 406 |        "        vertical-align: middle;\n",
 407 |        "    }\n",
 408 |        "\n",
 409 |        "    .dataframe tbody tr th {\n",
 410 |        "        vertical-align: top;\n",
 411 |        "    }\n",
 412 |        "\n",
 413 |        "    .dataframe thead th {\n",
 414 |        "        text-align: right;\n",
 415 |        "    }\n",
 416 |        "</style>\n",
 417 |        "<table border=\"1\" class=\"dataframe\">\n",
 418 |        "  <thead>\n",
 419 |        "    <tr style=\"text-align: right;\">\n",
 420 |        "      <th></th>\n",
 421 |        "      <th>Date</th>\n",
 422 |        "      <th>Content</th>\n",
 423 |        "      <th>is_stock_related</th>\n",
 424 |        "      <th>sentiment</th>\n",
 425 |        "    </tr>\n",
 426 |        "  </thead>\n",
 427 |        "  <tbody>\n",
 428 |        "    <tr>\n",
 429 |        "      <th>0</th>\n",
 430 |        "      <td>2024-12-31 19:10:07.207000+00:00</td>\n",
 431 |        "      <td>\\n#MarjorieTaylorGreene Went Christmas Shoppin...</td>\n",
 432 |        "      <td>True</td>\n",
 433 |        "      <td>neutral</td>\n",
 434 |        "    </tr>\n",
 435 |        "    <tr>\n",
 436 |        "      <th>1</th>\n",
 437 |        "      <td>2024-12-31 17:39:40.015000+00:00</td>\n",
 438 |        "      <td>Over the past year #AJB and #ADBE swapped from...</td>\n",
 439 |        "      <td>True</td>\n",
 440 |        "      <td>neutral</td>\n",
 441 |        "    </tr>\n",
 442 |        "    <tr>\n",
 443 |        "      <th>2</th>\n",
 444 |        "      <td>2024-12-31 17:39:40.014000+00:00</td>\n",
 445 |        "      <td>The major changes to the port. over the year w...</td>\n",
 446 |        "      <td>True</td>\n",
 447 |        "      <td>negative</td>\n",
 448 |        "    </tr>\n",
 449 |        "    <tr>\n",
 450 |        "      <th>3</th>\n",
 451 |        "      <td>2024-12-31 16:03:55.965884+00:00</td>\n",
 452 |        "      <td>📊 ADBE Market Analysis - Dec 31, 2024\\n\\nCurre...</td>\n",
 453 |        "      <td>True</td>\n",
 454 |        "      <td>negative</td>\n",
 455 |        "    </tr>\n",
 456 |        "    <tr>\n",
 457 |        "      <th>4</th>\n",
 458 |        "      <td>2024-12-30 16:01:16.570000+00:00</td>\n",
 459 |        "      <td>Adobe knows that #DEI is good for people, good...</td>\n",
 460 |        "      <td>True</td>\n",
 461 |        "      <td>positive</td>\n",
 462 |        "    </tr>\n",
 463 |        "    <tr>\n",
 464 |        "      <th>...</th>\n",
 465 |        "      <td>...</td>\n",
 466 |        "      <td>...</td>\n",
 467 |        "      <td>...</td>\n",
 468 |        "      <td>...</td>\n",
 469 |        "    </tr>\n",
 470 |        "    <tr>\n",
 471 |        "      <th>93</th>\n",
 472 |        "      <td>2024-12-12 17:19:22.999000+00:00</td>\n",
 473 |        "      <td>Hello, Investors! 👋\\nStocks were down modestly...</td>\n",
 474 |        "      <td>True</td>\n",
 475 |        "      <td>negative</td>\n",
 476 |        "    </tr>\n",
 477 |        "    <tr>\n",
 478 |        "      <th>94</th>\n",
 479 |        "      <td>2024-12-12 16:53:12.278000+00:00</td>\n",
 480 |        "      <td>Adobe posts record-breaking revenue 📈 but inve...</td>\n",
 481 |        "      <td>True</td>\n",
 482 |        "      <td>negative</td>\n",
 483 |        "    </tr>\n",
 484 |        "    <tr>\n",
 485 |        "      <th>95</th>\n",
 486 |        "      <td>2024-12-12 16:20:44.597227+00:00</td>\n",
 487 |        "      <td>$ADBE Technical Analysis | Dec 12\\nPrice: $549...</td>\n",
 488 |        "      <td>True</td>\n",
 489 |        "      <td>positive</td>\n",
 490 |        "    </tr>\n",
 491 |        "    <tr>\n",
 492 |        "      <th>96</th>\n",
 493 |        "      <td>2024-12-12 15:49:51.063000+00:00</td>\n",
 494 |        "      <td>$ADBE: Adobe shares dropped 13% as its 2025 ou...</td>\n",
 495 |        "      <td>True</td>\n",
 496 |        "      <td>negative</td>\n",
 497 |        "    </tr>\n",
 498 |        "    <tr>\n",
 499 |        "      <th>97</th>\n",
 500 |        "      <td>2024-12-12 15:38:39.435000+00:00</td>\n",
 501 |        "      <td>$ADBE FY-2024: Strong Q3 Perf: Rev Surpasses E...</td>\n",
 502 |        "      <td>True</td>\n",
 503 |        "      <td>positive</td>\n",
 504 |        "    </tr>\n",
 505 |        "  </tbody>\n",
 506 |        "</table>\n",
 507 |        "<p>98 rows × 4 columns</p>\n",
 508 |        "</div>"
 509 |       ],
 510 |       "text/plain": [
 511 |        "                               Date  \\\n",
 512 |        "0  2024-12-31 19:10:07.207000+00:00   \n",
 513 |        "1  2024-12-31 17:39:40.015000+00:00   \n",
 514 |        "2  2024-12-31 17:39:40.014000+00:00   \n",
 515 |        "3  2024-12-31 16:03:55.965884+00:00   \n",
 516 |        "4  2024-12-30 16:01:16.570000+00:00   \n",
 517 |        "..                              ...   \n",
 518 |        "93 2024-12-12 17:19:22.999000+00:00   \n",
 519 |        "94 2024-12-12 16:53:12.278000+00:00   \n",
 520 |        "95 2024-12-12 16:20:44.597227+00:00   \n",
 521 |        "96 2024-12-12 15:49:51.063000+00:00   \n",
 522 |        "97 2024-12-12 15:38:39.435000+00:00   \n",
 523 |        "\n",
 524 |        "                                              Content is_stock_related  \\\n",
 525 |        "0   \\n#MarjorieTaylorGreene Went Christmas Shoppin...             True   \n",
 526 |        "1   Over the past year #AJB and #ADBE swapped from...             True   \n",
 527 |        "2   The major changes to the port. over the year w...             True   \n",
 528 |        "3   📊 ADBE Market Analysis - Dec 31, 2024\\n\\nCurre...             True   \n",
 529 |        "4   Adobe knows that #DEI is good for people, good...             True   \n",
 530 |        "..                                                ...              ...   \n",
 531 |        "93  Hello, Investors! 👋\\nStocks were down modestly...             True   \n",
 532 |        "94  Adobe posts record-breaking revenue 📈 but inve...             True   \n",
 533 |        "95  $ADBE Technical Analysis | Dec 12\\nPrice: $549...             True   \n",
 534 |        "96  $ADBE: Adobe shares dropped 13% as its 2025 ou...             True   \n",
 535 |        "97  $ADBE FY-2024: Strong Q3 Perf: Rev Surpasses E...             True   \n",
 536 |        "\n",
 537 |        "   sentiment  \n",
 538 |        "0    neutral  \n",
 539 |        "1    neutral  \n",
 540 |        "2   negative  \n",
 541 |        "3   negative  \n",
 542 |        "4   positive  \n",
 543 |        "..       ...  \n",
 544 |        "93  negative  \n",
 545 |        "94  negative  \n",
 546 |        "95  positive  \n",
 547 |        "96  negative  \n",
 548 |        "97  positive  \n",
 549 |        "\n",
 550 |        "[98 rows x 4 columns]"
 551 |       ]
 552 |      },
 553 |      "execution_count": 13,
 554 |      "metadata": {},
 555 |      "output_type": "execute_result"
 556 |     }
 557 |    ],
 558 |    "source": [
 559 |     "df.drop(columns='Author',inplace=True)\n",
 560 |     "df"
 561 |    ]
 562 |   },
 563 |   {
 564 |    "cell_type": "code",
 565 |    "execution_count": 15,
 566 |    "metadata": {},
 567 |    "outputs": [
 568 |     {
 569 |      "name": "stderr",
 570 |      "output_type": "stream",
 571 |      "text": [
 572 |       "/var/folders/f0/lv4rn9cj3773mrlxlb1vmf380000gn/T/ipykernel_57740/2799276885.py:5: SettingWithCopyWarning:\n",
 573 |       "\n",
 574 |       "\n",
 575 |       "A value is trying to be set on a copy of a slice from a DataFrame.\n",
 576 |       "Try using .loc[row_indexer,col_indexer] = value instead\n",
 577 |       "\n",
 578 |       "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 579 |       "\n"
 580 |      ]
 581 |     },
 582 |     {
 583 |      "data": {
 584 |       "application/vnd.plotly.v1+json": {
 585 |        "config": {
 586 |         "plotlyServerURL": "https://plot.ly"
 587 |        },
 588 |        "data": [
 589 |         {
 590 |          "hovertemplate": "Date=%{x}<br>Positive Sentiment Score=%{y}<extra></extra>",
 591 |          "legendgroup": "",
 592 |          "line": {
 593 |           "color": "#636efa",
 594 |           "dash": "solid"
 595 |          },
 596 |          "marker": {
 597 |           "symbol": "circle"
 598 |          },
 599 |          "mode": "lines+markers",
 600 |          "name": "",
 601 |          "orientation": "v",
 602 |          "showlegend": false,
 603 |          "type": "scatter",
 604 |          "x": [
 605 |           "2024-12-12",
 606 |           "2024-12-13",
 607 |           "2024-12-14",
 608 |           "2024-12-15",
 609 |           "2024-12-16",
 610 |           "2024-12-17",
 611 |           "2024-12-18",
 612 |           "2024-12-19",
 613 |           "2024-12-20",
 614 |           "2024-12-22",
 615 |           "2024-12-23",
 616 |           "2024-12-24",
 617 |           "2024-12-26",
 618 |           "2024-12-27",
 619 |           "2024-12-30",
 620 |           "2024-12-31"
 621 |          ],
 622 |          "xaxis": "x",
 623 |          "y": [
 624 |           0.18181818181818182,
 625 |           0.18181818181818182,
 626 |           0,
 627 |           0.5,
 628 |           0.25,
 629 |           0,
 630 |           0,
 631 |           0.3333333333333333,
 632 |           0.75,
 633 |           0.6666666666666666,
 634 |           1,
 635 |           0.5,
 636 |           0.5,
 637 |           1,
 638 |           1,
 639 |           0
 640 |          ],
 641 |          "yaxis": "y"
 642 |         }
 643 |        ],
 644 |        "layout": {
 645 |         "legend": {
 646 |          "tracegroupgap": 0
 647 |         },
 648 |         "template": {
 649 |          "data": {
 650 |           "bar": [
 651 |            {
 652 |             "error_x": {
 653 |              "color": "#2a3f5f"
 654 |             },
 655 |             "error_y": {
 656 |              "color": "#2a3f5f"
 657 |             },
 658 |             "marker": {
 659 |              "line": {
 660 |               "color": "#E5ECF6",
 661 |               "width": 0.5
 662 |              },
 663 |              "pattern": {
 664 |               "fillmode": "overlay",
 665 |               "size": 10,
 666 |               "solidity": 0.2
 667 |              }
 668 |             },
 669 |             "type": "bar"
 670 |            }
 671 |           ],
 672 |           "barpolar": [
 673 |            {
 674 |             "marker": {
 675 |              "line": {
 676 |               "color": "#E5ECF6",
 677 |               "width": 0.5
 678 |              },
 679 |              "pattern": {
 680 |               "fillmode": "overlay",
 681 |               "size": 10,
 682 |               "solidity": 0.2
 683 |              }
 684 |             },
 685 |             "type": "barpolar"
 686 |            }
 687 |           ],
 688 |           "carpet": [
 689 |            {
 690 |             "aaxis": {
 691 |              "endlinecolor": "#2a3f5f",
 692 |              "gridcolor": "white",
 693 |              "linecolor": "white",
 694 |              "minorgridcolor": "white",
 695 |              "startlinecolor": "#2a3f5f"
 696 |             },
 697 |             "baxis": {
 698 |              "endlinecolor": "#2a3f5f",
 699 |              "gridcolor": "white",
 700 |              "linecolor": "white",
 701 |              "minorgridcolor": "white",
 702 |              "startlinecolor": "#2a3f5f"
 703 |             },
 704 |             "type": "carpet"
 705 |            }
 706 |           ],
 707 |           "choropleth": [
 708 |            {
 709 |             "colorbar": {
 710 |              "outlinewidth": 0,
 711 |              "ticks": ""
 712 |             },
 713 |             "type": "choropleth"
 714 |            }
 715 |           ],
 716 |           "contour": [
 717 |            {
 718 |             "colorbar": {
 719 |              "outlinewidth": 0,
 720 |              "ticks": ""
 721 |             },
 722 |             "colorscale": [
 723 |              [
 724 |               0,
 725 |               "#0d0887"
 726 |              ],
 727 |              [
 728 |               0.1111111111111111,
 729 |               "#46039f"
 730 |              ],
 731 |              [
 732 |               0.2222222222222222,
 733 |               "#7201a8"
 734 |              ],
 735 |              [
 736 |               0.3333333333333333,
 737 |               "#9c179e"
 738 |              ],
 739 |              [
 740 |               0.4444444444444444,
 741 |               "#bd3786"
 742 |              ],
 743 |              [
 744 |               0.5555555555555556,
 745 |               "#d8576b"
 746 |              ],
 747 |              [
 748 |               0.6666666666666666,
 749 |               "#ed7953"
 750 |              ],
 751 |              [
 752 |               0.7777777777777778,
 753 |               "#fb9f3a"
 754 |              ],
 755 |              [
 756 |               0.8888888888888888,
 757 |               "#fdca26"
 758 |              ],
 759 |              [
 760 |               1,
 761 |               "#f0f921"
 762 |              ]
 763 |             ],
 764 |             "type": "contour"
 765 |            }
 766 |           ],
 767 |           "contourcarpet": [
 768 |            {
 769 |             "colorbar": {
 770 |              "outlinewidth": 0,
 771 |              "ticks": ""
 772 |             },
 773 |             "type": "contourcarpet"
 774 |            }
 775 |           ],
 776 |           "heatmap": [
 777 |            {
 778 |             "colorbar": {
 779 |              "outlinewidth": 0,
 780 |              "ticks": ""
 781 |             },
 782 |             "colorscale": [
 783 |              [
 784 |               0,
 785 |               "#0d0887"
 786 |              ],
 787 |              [
 788 |               0.1111111111111111,
 789 |               "#46039f"
 790 |              ],
 791 |              [
 792 |               0.2222222222222222,
 793 |               "#7201a8"
 794 |              ],
 795 |              [
 796 |               0.3333333333333333,
 797 |               "#9c179e"
 798 |              ],
 799 |              [
 800 |               0.4444444444444444,
 801 |               "#bd3786"
 802 |              ],
 803 |              [
 804 |               0.5555555555555556,
 805 |               "#d8576b"
 806 |              ],
 807 |              [
 808 |               0.6666666666666666,
 809 |               "#ed7953"
 810 |              ],
 811 |              [
 812 |               0.7777777777777778,
 813 |               "#fb9f3a"
 814 |              ],
 815 |              [
 816 |               0.8888888888888888,
 817 |               "#fdca26"
 818 |              ],
 819 |              [
 820 |               1,
 821 |               "#f0f921"
 822 |              ]
 823 |             ],
 824 |             "type": "heatmap"
 825 |            }
 826 |           ],
 827 |           "heatmapgl": [
 828 |            {
 829 |             "colorbar": {
 830 |              "outlinewidth": 0,
 831 |              "ticks": ""
 832 |             },
 833 |             "colorscale": [
 834 |              [
 835 |               0,
 836 |               "#0d0887"
 837 |              ],
 838 |              [
 839 |               0.1111111111111111,
 840 |               "#46039f"
 841 |              ],
 842 |              [
 843 |               0.2222222222222222,
 844 |               "#7201a8"
 845 |              ],
 846 |              [
 847 |               0.3333333333333333,
 848 |               "#9c179e"
 849 |              ],
 850 |              [
 851 |               0.4444444444444444,
 852 |               "#bd3786"
 853 |              ],
 854 |              [
 855 |               0.5555555555555556,
 856 |               "#d8576b"
 857 |              ],
 858 |              [
 859 |               0.6666666666666666,
 860 |               "#ed7953"
 861 |              ],
 862 |              [
 863 |               0.7777777777777778,
 864 |               "#fb9f3a"
 865 |              ],
 866 |              [
 867 |               0.8888888888888888,
 868 |               "#fdca26"
 869 |              ],
 870 |              [
 871 |               1,
 872 |               "#f0f921"
 873 |              ]
 874 |             ],
 875 |             "type": "heatmapgl"
 876 |            }
 877 |           ],
 878 |           "histogram": [
 879 |            {
 880 |             "marker": {
 881 |              "pattern": {
 882 |               "fillmode": "overlay",
 883 |               "size": 10,
 884 |               "solidity": 0.2
 885 |              }
 886 |             },
 887 |             "type": "histogram"
 888 |            }
 889 |           ],
 890 |           "histogram2d": [
 891 |            {
 892 |             "colorbar": {
 893 |              "outlinewidth": 0,
 894 |              "ticks": ""
 895 |             },
 896 |             "colorscale": [
 897 |              [
 898 |               0,
 899 |               "#0d0887"
 900 |              ],
 901 |              [
 902 |               0.1111111111111111,
 903 |               "#46039f"
 904 |              ],
 905 |              [
 906 |               0.2222222222222222,
 907 |               "#7201a8"
 908 |              ],
 909 |              [
 910 |               0.3333333333333333,
 911 |               "#9c179e"
 912 |              ],
 913 |              [
 914 |               0.4444444444444444,
 915 |               "#bd3786"
 916 |              ],
 917 |              [
 918 |               0.5555555555555556,
 919 |               "#d8576b"
 920 |              ],
 921 |              [
 922 |               0.6666666666666666,
 923 |               "#ed7953"
 924 |              ],
 925 |              [
 926 |               0.7777777777777778,
 927 |               "#fb9f3a"
 928 |              ],
 929 |              [
 930 |               0.8888888888888888,
 931 |               "#fdca26"
 932 |              ],
 933 |              [
 934 |               1,
 935 |               "#f0f921"
 936 |              ]
 937 |             ],
 938 |             "type": "histogram2d"
 939 |            }
 940 |           ],
 941 |           "histogram2dcontour": [
 942 |            {
 943 |             "colorbar": {
 944 |              "outlinewidth": 0,
 945 |              "ticks": ""
 946 |             },
 947 |             "colorscale": [
 948 |              [
 949 |               0,
 950 |               "#0d0887"
 951 |              ],
 952 |              [
 953 |               0.1111111111111111,
 954 |               "#46039f"
 955 |              ],
 956 |              [
 957 |               0.2222222222222222,
 958 |               "#7201a8"
 959 |              ],
 960 |              [
 961 |               0.3333333333333333,
 962 |               "#9c179e"
 963 |              ],
 964 |              [
 965 |               0.4444444444444444,
 966 |               "#bd3786"
 967 |              ],
 968 |              [
 969 |               0.5555555555555556,
 970 |               "#d8576b"
 971 |              ],
 972 |              [
 973 |               0.6666666666666666,
 974 |               "#ed7953"
 975 |              ],
 976 |              [
 977 |               0.7777777777777778,
 978 |               "#fb9f3a"
 979 |              ],
 980 |              [
 981 |               0.8888888888888888,
 982 |               "#fdca26"
 983 |              ],
 984 |              [
 985 |               1,
 986 |               "#f0f921"
 987 |              ]
 988 |             ],
 989 |             "type": "histogram2dcontour"
 990 |            }
 991 |           ],
 992 |           "mesh3d": [
 993 |            {
 994 |             "colorbar": {
 995 |              "outlinewidth": 0,
 996 |              "ticks": ""
 997 |             },
 998 |             "type": "mesh3d"
 999 |            }
1000 |           ],
1001 |           "parcoords": [
1002 |            {
1003 |             "line": {
1004 |              "colorbar": {
1005 |               "outlinewidth": 0,
1006 |               "ticks": ""
1007 |              }
1008 |             },
1009 |             "type": "parcoords"
1010 |            }
1011 |           ],
1012 |           "pie": [
1013 |            {
1014 |             "automargin": true,
1015 |             "type": "pie"
1016 |            }
1017 |           ],
1018 |           "scatter": [
1019 |            {
1020 |             "fillpattern": {
1021 |              "fillmode": "overlay",
1022 |              "size": 10,
1023 |              "solidity": 0.2
1024 |             },
1025 |             "type": "scatter"
1026 |            }
1027 |           ],
1028 |           "scatter3d": [
1029 |            {
1030 |             "line": {
1031 |              "colorbar": {
1032 |               "outlinewidth": 0,
1033 |               "ticks": ""
1034 |              }
1035 |             },
1036 |             "marker": {
1037 |              "colorbar": {
1038 |               "outlinewidth": 0,
1039 |               "ticks": ""
1040 |              }
1041 |             },
1042 |             "type": "scatter3d"
1043 |            }
1044 |           ],
1045 |           "scattercarpet": [
1046 |            {
1047 |             "marker": {
1048 |              "colorbar": {
1049 |               "outlinewidth": 0,
1050 |               "ticks": ""
1051 |              }
1052 |             },
1053 |             "type": "scattercarpet"
1054 |            }
1055 |           ],
1056 |           "scattergeo": [
1057 |            {
1058 |             "marker": {
1059 |              "colorbar": {
1060 |               "outlinewidth": 0,
1061 |               "ticks": ""
1062 |              }
1063 |             },
1064 |             "type": "scattergeo"
1065 |            }
1066 |           ],
1067 |           "scattergl": [
1068 |            {
1069 |             "marker": {
1070 |              "colorbar": {
1071 |               "outlinewidth": 0,
1072 |               "ticks": ""
1073 |              }
1074 |             },
1075 |             "type": "scattergl"
1076 |            }
1077 |           ],
1078 |           "scattermapbox": [
1079 |            {
1080 |             "marker": {
1081 |              "colorbar": {
1082 |               "outlinewidth": 0,
1083 |               "ticks": ""
1084 |              }
1085 |             },
1086 |             "type": "scattermapbox"
1087 |            }
1088 |           ],
1089 |           "scatterpolar": [
1090 |            {
1091 |             "marker": {
1092 |              "colorbar": {
1093 |               "outlinewidth": 0,
1094 |               "ticks": ""
1095 |              }
1096 |             },
1097 |             "type": "scatterpolar"
1098 |            }
1099 |           ],
1100 |           "scatterpolargl": [
1101 |            {
1102 |             "marker": {
1103 |              "colorbar": {
1104 |               "outlinewidth": 0,
1105 |               "ticks": ""
1106 |              }
1107 |             },
1108 |             "type": "scatterpolargl"
1109 |            }
1110 |           ],
1111 |           "scatterternary": [
1112 |            {
1113 |             "marker": {
1114 |              "colorbar": {
1115 |               "outlinewidth": 0,
1116 |               "ticks": ""
1117 |              }
1118 |             },
1119 |             "type": "scatterternary"
1120 |            }
1121 |           ],
1122 |           "surface": [
1123 |            {
1124 |             "colorbar": {
1125 |              "outlinewidth": 0,
1126 |              "ticks": ""
1127 |             },
1128 |             "colorscale": [
1129 |              [
1130 |               0,
1131 |               "#0d0887"
1132 |              ],
1133 |              [
1134 |               0.1111111111111111,
1135 |               "#46039f"
1136 |              ],
1137 |              [
1138 |               0.2222222222222222,
1139 |               "#7201a8"
1140 |              ],
1141 |              [
1142 |               0.3333333333333333,
1143 |               "#9c179e"
1144 |              ],
1145 |              [
1146 |               0.4444444444444444,
1147 |               "#bd3786"
1148 |              ],
1149 |              [
1150 |               0.5555555555555556,
1151 |               "#d8576b"
1152 |              ],
1153 |              [
1154 |               0.6666666666666666,
1155 |               "#ed7953"
1156 |              ],
1157 |              [
1158 |               0.7777777777777778,
1159 |               "#fb9f3a"
1160 |              ],
1161 |              [
1162 |               0.8888888888888888,
1163 |               "#fdca26"
1164 |              ],
1165 |              [
1166 |               1,
1167 |               "#f0f921"
1168 |              ]
1169 |             ],
1170 |             "type": "surface"
1171 |            }
1172 |           ],
1173 |           "table": [
1174 |            {
1175 |             "cells": {
1176 |              "fill": {
1177 |               "color": "#EBF0F8"
1178 |              },
1179 |              "line": {
1180 |               "color": "white"
1181 |              }
1182 |             },
1183 |             "header": {
1184 |              "fill": {
1185 |               "color": "#C8D4E3"
1186 |              },
1187 |              "line": {
1188 |               "color": "white"
1189 |              }
1190 |             },
1191 |             "type": "table"
1192 |            }
1193 |           ]
1194 |          },
1195 |          "layout": {
1196 |           "annotationdefaults": {
1197 |            "arrowcolor": "#2a3f5f",
1198 |            "arrowhead": 0,
1199 |            "arrowwidth": 1
1200 |           },
1201 |           "autotypenumbers": "strict",
1202 |           "coloraxis": {
1203 |            "colorbar": {
1204 |             "outlinewidth": 0,
1205 |             "ticks": ""
1206 |            }
1207 |           },
1208 |           "colorscale": {
1209 |            "diverging": [
1210 |             [
1211 |              0,
1212 |              "#8e0152"
1213 |             ],
1214 |             [
1215 |              0.1,
1216 |              "#c51b7d"
1217 |             ],
1218 |             [
1219 |              0.2,
1220 |              "#de77ae"
1221 |             ],
1222 |             [
1223 |              0.3,
1224 |              "#f1b6da"
1225 |             ],
1226 |             [
1227 |              0.4,
1228 |              "#fde0ef"
1229 |             ],
1230 |             [
1231 |              0.5,
1232 |              "#f7f7f7"
1233 |             ],
1234 |             [
1235 |              0.6,
1236 |              "#e6f5d0"
1237 |             ],
1238 |             [
1239 |              0.7,
1240 |              "#b8e186"
1241 |             ],
1242 |             [
1243 |              0.8,
1244 |              "#7fbc41"
1245 |             ],
1246 |             [
1247 |              0.9,
1248 |              "#4d9221"
1249 |             ],
1250 |             [
1251 |              1,
1252 |              "#276419"
1253 |             ]
1254 |            ],
1255 |            "sequential": [
1256 |             [
1257 |              0,
1258 |              "#0d0887"
1259 |             ],
1260 |             [
1261 |              0.1111111111111111,
1262 |              "#46039f"
1263 |             ],
1264 |             [
1265 |              0.2222222222222222,
1266 |              "#7201a8"
1267 |             ],
1268 |             [
1269 |              0.3333333333333333,
1270 |              "#9c179e"
1271 |             ],
1272 |             [
1273 |              0.4444444444444444,
1274 |              "#bd3786"
1275 |             ],
1276 |             [
1277 |              0.5555555555555556,
1278 |              "#d8576b"
1279 |             ],
1280 |             [
1281 |              0.6666666666666666,
1282 |              "#ed7953"
1283 |             ],
1284 |             [
1285 |              0.7777777777777778,
1286 |              "#fb9f3a"
1287 |             ],
1288 |             [
1289 |              0.8888888888888888,
1290 |              "#fdca26"
1291 |             ],
1292 |             [
1293 |              1,
1294 |              "#f0f921"
1295 |             ]
1296 |            ],
1297 |            "sequentialminus": [
1298 |             [
1299 |              0,
1300 |              "#0d0887"
1301 |             ],
1302 |             [
1303 |              0.1111111111111111,
1304 |              "#46039f"
1305 |             ],
1306 |             [
1307 |              0.2222222222222222,
1308 |              "#7201a8"
1309 |             ],
1310 |             [
1311 |              0.3333333333333333,
1312 |              "#9c179e"
1313 |             ],
1314 |             [
1315 |              0.4444444444444444,
1316 |              "#bd3786"
1317 |             ],
1318 |             [
1319 |              0.5555555555555556,
1320 |              "#d8576b"
1321 |             ],
1322 |             [
1323 |              0.6666666666666666,
1324 |              "#ed7953"
1325 |             ],
1326 |             [
1327 |              0.7777777777777778,
1328 |              "#fb9f3a"
1329 |             ],
1330 |             [
1331 |              0.8888888888888888,
1332 |              "#fdca26"
1333 |             ],
1334 |             [
1335 |              1,
1336 |              "#f0f921"
1337 |             ]
1338 |            ]
1339 |           },
1340 |           "colorway": [
1341 |            "#636efa",
1342 |            "#EF553B",
1343 |            "#00cc96",
1344 |            "#ab63fa",
1345 |            "#FFA15A",
1346 |            "#19d3f3",
1347 |            "#FF6692",
1348 |            "#B6E880",
1349 |            "#FF97FF",
1350 |            "#FECB52"
1351 |           ],
1352 |           "font": {
1353 |            "color": "#2a3f5f"
1354 |           },
1355 |           "geo": {
1356 |            "bgcolor": "white",
1357 |            "lakecolor": "white",
1358 |            "landcolor": "#E5ECF6",
1359 |            "showlakes": true,
1360 |            "showland": true,
1361 |            "subunitcolor": "white"
1362 |           },
1363 |           "hoverlabel": {
1364 |            "align": "left"
1365 |           },
1366 |           "hovermode": "closest",
1367 |           "mapbox": {
1368 |            "style": "light"
1369 |           },
1370 |           "paper_bgcolor": "white",
1371 |           "plot_bgcolor": "#E5ECF6",
1372 |           "polar": {
1373 |            "angularaxis": {
1374 |             "gridcolor": "white",
1375 |             "linecolor": "white",
1376 |             "ticks": ""
1377 |            },
1378 |            "bgcolor": "#E5ECF6",
1379 |            "radialaxis": {
1380 |             "gridcolor": "white",
1381 |             "linecolor": "white",
1382 |             "ticks": ""
1383 |            }
1384 |           },
1385 |           "scene": {
1386 |            "xaxis": {
1387 |             "backgroundcolor": "#E5ECF6",
1388 |             "gridcolor": "white",
1389 |             "gridwidth": 2,
1390 |             "linecolor": "white",
1391 |             "showbackground": true,
1392 |             "ticks": "",
1393 |             "zerolinecolor": "white"
1394 |            },
1395 |            "yaxis": {
1396 |             "backgroundcolor": "#E5ECF6",
1397 |             "gridcolor": "white",
1398 |             "gridwidth": 2,
1399 |             "linecolor": "white",
1400 |             "showbackground": true,
1401 |             "ticks": "",
1402 |             "zerolinecolor": "white"
1403 |            },
1404 |            "zaxis": {
1405 |             "backgroundcolor": "#E5ECF6",
1406 |             "gridcolor": "white",
1407 |             "gridwidth": 2,
1408 |             "linecolor": "white",
1409 |             "showbackground": true,
1410 |             "ticks": "",
1411 |             "zerolinecolor": "white"
1412 |            }
1413 |           },
1414 |           "shapedefaults": {
1415 |            "line": {
1416 |             "color": "#2a3f5f"
1417 |            }
1418 |           },
1419 |           "ternary": {
1420 |            "aaxis": {
1421 |             "gridcolor": "white",
1422 |             "linecolor": "white",
1423 |             "ticks": ""
1424 |            },
1425 |            "baxis": {
1426 |             "gridcolor": "white",
1427 |             "linecolor": "white",
1428 |             "ticks": ""
1429 |            },
1430 |            "bgcolor": "#E5ECF6",
1431 |            "caxis": {
1432 |             "gridcolor": "white",
1433 |             "linecolor": "white",
1434 |             "ticks": ""
1435 |            }
1436 |           },
1437 |           "title": {
1438 |            "x": 0.05
1439 |           },
1440 |           "xaxis": {
1441 |            "automargin": true,
1442 |            "gridcolor": "white",
1443 |            "linecolor": "white",
1444 |            "ticks": "",
1445 |            "title": {
1446 |             "standoff": 15
1447 |            },
1448 |            "zerolinecolor": "white",
1449 |            "zerolinewidth": 2
1450 |           },
1451 |           "yaxis": {
1452 |            "automargin": true,
1453 |            "gridcolor": "white",
1454 |            "linecolor": "white",
1455 |            "ticks": "",
1456 |            "title": {
1457 |             "standoff": 15
1458 |            },
1459 |            "zerolinecolor": "white",
1460 |            "zerolinewidth": 2
1461 |           }
1462 |          }
1463 |         },
1464 |         "title": {
1465 |          "text": "Daily Positive Sentiment Score"
1466 |         },
1467 |         "xaxis": {
1468 |          "anchor": "y",
1469 |          "domain": [
1470 |           0,
1471 |           1
1472 |          ],
1473 |          "dtick": "D",
1474 |          "tickformat": "%Y-%m-%d",
1475 |          "title": {
1476 |           "text": "Date"
1477 |          }
1478 |         },
1479 |         "yaxis": {
1480 |          "anchor": "x",
1481 |          "domain": [
1482 |           0,
1483 |           1
1484 |          ],
1485 |          "title": {
1486 |           "text": "Positive Sentiment Score"
1487 |          }
1488 |         }
1489 |        }
1490 |       }
1491 |      },
1492 |      "metadata": {},
1493 |      "output_type": "display_data"
1494 |     }
1495 |    ],
1496 |    "source": [
1497 |     "# Filter out neutral sentiment\n",
1498 |     "filtered_df = df[df['sentiment'] != 'neutral']\n",
1499 |     "\n",
1500 |     "# Extract the date (day only) and calculate daily positive sentiment score\n",
1501 |     "filtered_df['Day'] = filtered_df['Date'].dt.date\n",
1502 |     "daily_sentiment = (\n",
1503 |     "    filtered_df.groupby('Day')['sentiment']\n",
1504 |     "    .apply(lambda x: (x == 'positive').sum() / len(x))\n",
1505 |     "    .reset_index(name='positive_sentiment_score')\n",
1506 |     ")\n",
1507 |     "\n",
1508 |     "# Plot the daily sentiment score\n",
1509 |     "fig = px.line(\n",
1510 |     "    daily_sentiment,\n",
1511 |     "    x='Day',\n",
1512 |     "    y='positive_sentiment_score',\n",
1513 |     "    title='Daily Positive Sentiment Score',\n",
1514 |     "    labels={'positive_sentiment_score': 'Positive Sentiment Score', 'Day': 'Date'},\n",
1515 |     "    markers=True,\n",
1516 |     ")\n",
1517 |     "\n",
1518 |     "fig.update_xaxes(dtick=\"D\", tickformat=\"%Y-%m-%d\")\n",
1519 |     "\n",
1520 |     "\n",
1521 |     "fig"
1522 |    ]
1523 |   }
1524 |  ],
1525 |  "metadata": {
1526 |   "kernelspec": {
1527 |    "display_name": "general_env",
1528 |    "language": "python",
1529 |    "name": "python3"
1530 |   },
1531 |   "language_info": {
1532 |    "codemirror_mode": {
1533 |     "name": "ipython",
1534 |     "version": 3
1535 |    },
1536 |    "file_extension": ".py",
1537 |    "mimetype": "text/x-python",
1538 |    "name": "python",
1539 |    "nbconvert_exporter": "python",
1540 |    "pygments_lexer": "ipython3",
1541 |    "version": "3.12.3"
1542 |   }
1543 |  },
1544 |  "nbformat": 4,
1545 |  "nbformat_minor": 2
1546 | }
1547 | 


--------------------------------------------------------------------------------
/ai_stocks_prediction.py:
--------------------------------------------------------------------------------
  1 | ##############
  2 | #### Deep Charts Youtube Channel: https://www.youtube.com/@DeepCharts
  3 | #### Subscribe for more AI/Machine Learning/Quant Finance Tutorials
  4 | ##############
  5 | 
  6 | 
  7 | ##############
  8 | ### PART 1 ###
  9 | # LIBRARIES ##
 10 | # & OLLAMA  ##
 11 | ##############
 12 | 
 13 | # Data Importing Libraries
 14 | import yfinance as yf
 15 | from finvizfinance.quote import finvizfinance
 16 | 
 17 | # Data Modeling Library
 18 | from statsmodels.tsa.statespace.sarimax import SARIMAX
 19 | 
 20 | # Charts
 21 | import plotly.graph_objects as go
 22 | 
 23 | # Data Manipulation
 24 | import pandas as pd
 25 | import numpy as np
 26 | 
 27 | # Avoid Forecasting on Holidays
 28 | import holidays
 29 | 
 30 | # Create Local LLM Server Connection
 31 | from langchain_community.llms import Ollama
 32 | 
 33 | # Interactive Web App UI
 34 | import streamlit as st
 35 | 
 36 | 
 37 | # Connect to local Ollama server
 38 | llm = Ollama(model='llama3')
 39 | 
 40 | 
 41 | ##############
 42 | ### PART 2 ###
 43 | # FUNCTIONS ##
 44 | ##############
 45 | 
 46 | # Function to classify sentiment
 47 | def classify_sentiment(title):
 48 |     output = llm.invoke(f"Classify the sentiment as 'POSITIVE' or 'NEGATIVE' or 'NEUTRAL' with just that one word only, no additional words or reasoning: {title}")
 49 |     return output.strip()  # Ensure the response is clean and without extra spaces
 50 | 
 51 | # Function to get and process news data
 52 | def get_news_data(ticker):
 53 | 
 54 |     # Data Pull
 55 |     stock = finvizfinance(ticker) 
 56 |     news_df = stock.ticker_news()
 57 | 
 58 |     # Preprocess before putting into LLM
 59 |     news_df['Title'] = news_df['Title'].str.lower()
 60 | 
 61 |     # Classify Sentiment function applied to each row of news_df
 62 |     news_df['sentiment'] = news_df['Title'].apply(classify_sentiment)
 63 | 
 64 |     # Postprocess after putting into LLM
 65 |     news_df['sentiment'] = news_df['sentiment'].str.upper()
 66 |     news_df = news_df[news_df['sentiment'] != 'NEUTRAL']
 67 |     news_df['Date'] = pd.to_datetime(news_df['Date'])
 68 |     news_df['DateOnly'] = news_df['Date'].dt.date
 69 | 
 70 |     return news_df
 71 | 
 72 | # Function to group and process sentiment data
 73 | def process_sentiment_data(news_df):
 74 | 
 75 |     # Reshape data to have df with columns: Date, # of positive Articles, # of negative Articles
 76 |     grouped = news_df.groupby(['DateOnly', 'sentiment']).size().unstack(fill_value=0)
 77 |     grouped = grouped.reindex(columns=['POSITIVE', 'NEGATIVE'], fill_value=0)
 78 | 
 79 |     # Create rolling averages that count number of positive and negative sentiment articles within past t days
 80 |     grouped['7day_avg_positive'] = grouped['POSITIVE'].rolling(window=7, min_periods=1).sum()
 81 |     grouped['7day_avg_negative'] = grouped['NEGATIVE'].rolling(window=7, min_periods=1).sum()
 82 | 
 83 |     # Create "Percent Positive" by creating percentage measure
 84 |     grouped['7day_pct_positive'] = grouped['POSITIVE'] / (grouped['POSITIVE'] + grouped['NEGATIVE'])
 85 |     result_df = grouped.reset_index()
 86 | 
 87 |     return result_df
 88 | 
 89 | # Function to fetch and process stock data
 90 | def get_stock_data(ticker, start_date, end_date):
 91 |     stock_data = yf.download(ticker, start=start_date, end=end_date) # Pull ticker data
 92 |     stock_data['Pct_Change'] = stock_data['Close'].pct_change() * 100 # Transform closing value to percent change in closing value since previous day 
 93 |     return stock_data
 94 | 
 95 | # Function to combine sentiment and stock data
 96 | def combine_data(result_df, stock_data):
 97 |     combined_df = result_df.set_index('DateOnly').join(stock_data[['Pct_Change']], how='inner')
 98 |     combined_df['lagged_7day_pct_positive'] = combined_df['7day_pct_positive'].shift(1) # Lag sentiment feature by 1 day for temporal alignment
 99 |     return combined_df
100 | 
101 | # Function to calculate Pearson correlation
102 | def calculate_correlation(combined_df):
103 |     correlation_pct_change = combined_df[['lagged_7day_pct_positive', 'Pct_Change']].corr().iloc[0, 1]
104 |     return correlation_pct_change
105 | 
106 | # Function to get future dates excluding weekends and holidays
107 | def get_future_dates(start_date, num_days):
108 |     us_holidays = holidays.US()
109 |     future_dates = []
110 |     current_date = start_date
111 |     while len(future_dates) < num_days:
112 |         if current_date.weekday() < 5 and current_date not in us_holidays:
113 |             future_dates.append(current_date)
114 |         current_date += pd.Timedelta(days=1)
115 |     return future_dates
116 | 
117 | # Function to fit ARIMAX model and forecast
118 | def fit_and_forecast(combined_df, forecast_steps=3):
119 |     endog = combined_df['Pct_Change'].dropna()  # Dependent variable
120 |     exog = combined_df['lagged_7day_pct_positive'].dropna()  # Predictor variable
121 |     endog = endog.loc[exog.index]  # Align variables
122 |     model = SARIMAX(endog, exog=exog, order=(1, 1, 1))  # ARIMAX model
123 |     fit = model.fit(disp=False)  # Fit model
124 |     
125 |     future_dates = get_future_dates(combined_df.index[-1], forecast_steps)  # Future dates
126 |     future_exog = combined_df['lagged_7day_pct_positive'][-forecast_steps:].values.reshape(-1, 1)  # Future exogenous values
127 |     
128 |     forecast = fit.get_forecast(steps=forecast_steps, exog=future_exog)  # Get forecast
129 |     forecast_mean = forecast.predicted_mean  # Predicted mean
130 |     forecast_ci = forecast.conf_int()  # Confidence intervals
131 |     
132 |     return forecast_mean, forecast_ci, future_dates  # Return results
133 | 
134 | 
135 | # Function to create and display plot
136 | def create_plot(combined_df, forecast_mean, forecast_ci, forecast_index):
137 |     # Standardize the sentiment proportion
138 |     sentiment_std = (combined_df['7day_pct_positive'] - combined_df['7day_pct_positive'].mean()) / combined_df['7day_pct_positive'].std()
139 | 
140 |     fig = go.Figure()
141 |     
142 |     # Add standardized sentiment proportion
143 |     fig.add_trace(go.Scatter(
144 |         x=combined_df.index, 
145 |         y=sentiment_std, 
146 |         name='Standardized Sentiment Proportion', 
147 |         line=dict(color='blue'), 
148 |         mode='lines'
149 |     ))
150 |     
151 |     # Add stock percentage change
152 |     fig.add_trace(go.Scatter(
153 |         x=combined_df.index, 
154 |         y=combined_df['Pct_Change'], 
155 |         name='Stock Pct Change', 
156 |         line=dict(color='green'), 
157 |         yaxis='y2', 
158 |         mode='lines'
159 |     ))
160 |     
161 |     # Add forecasted stock percentage change
162 |     fig.add_trace(go.Scatter(
163 |         x=forecast_index, 
164 |         y=forecast_mean, 
165 |         name='Forecasted Pct Change', 
166 |         line=dict(color='red'), 
167 |         mode='lines'
168 |     ))
169 |     
170 |     # Add confidence intervals for the forecast
171 |     fig.add_trace(go.Scatter(
172 |         x=np.concatenate([forecast_index, forecast_index[::-1]]),
173 |         y=np.concatenate([forecast_ci.iloc[:, 0], forecast_ci.iloc[:, 1][::-1]]),
174 |         fill='toself',
175 |         fillcolor='rgba(255,0,0,0.2)',
176 |         line=dict(color='rgba(255,255,255,0)'),
177 |         hoverinfo="skip",
178 |         showlegend=False
179 |     ))
180 |     
181 |     # Update layout with appropriate y-axis ranges
182 |     fig.update_layout(
183 |         title='Sentiment Proportion and Stock Percentage Change with Forecast',
184 |         xaxis_title='Date',
185 |         yaxis=dict(
186 |             title='Standardized Sentiment Proportion',
187 |             titlefont=dict(color='blue')
188 |         ),
189 |         yaxis2=dict(
190 |             title='Stock Pct Change',
191 |             titlefont=dict(color='green'),
192 |             overlaying='y',
193 |             side='right'
194 |         ),
195 |         template='plotly_dark'
196 |     )
197 |     st.plotly_chart(fig)
198 | 
199 | 
200 | ##############
201 | ### PART 3 ###
202 | # STREAMLIT ##
203 | ##############
204 | 
205 | # Streamlit app
206 | st.sidebar.title("Predicting Stock Prices by News Sentiment")
207 | ticker = st.sidebar.text_input("Enter stock ticker (e.g., SBUX):", value='SBUX')
208 | run_button = st.sidebar.button("Run Analysis")
209 | 
210 | if run_button:
211 |     news_df = get_news_data(ticker)
212 |     result_df = process_sentiment_data(news_df)
213 |     start_date = result_df['DateOnly'].min().strftime('%Y-%m-%d')
214 |     end_date = result_df['DateOnly'].max().strftime('%Y-%m-%d')
215 |     stock_data = get_stock_data(ticker, start_date, end_date)
216 |     combined_df = combine_data(result_df, stock_data)
217 |     correlation_pct_change = calculate_correlation(combined_df)
218 |     st.write(f'Pearson correlation between lagged sentiment score and stock percentage change: {correlation_pct_change}')
219 |     forecast_mean, forecast_ci, forecast_index = fit_and_forecast(combined_df)
220 |     create_plot(combined_df, forecast_mean, forecast_ci, forecast_index)
221 | 
222 | 
223 | 
224 | 
225 | 
226 | 
227 |     
228 | 


--------------------------------------------------------------------------------
/app_streamlit_app_builder_ai.py:
--------------------------------------------------------------------------------
  1 | # Source: Deep Charts YouTube Channel (https://www.youtube.com/@DeepCharts)
  2 | 
  3 | # Build Streamlit Apps Within a Streamlit App
  4 | # Vibe coded with Gemini 2.5 Pro Experimental
  5 | 
  6 | # Project Setup:
  7 | 
  8 | # Create a project folder: e.g., streamlit_ide_prototype
  9 | # Create a Python file: app.py inside the folder.
 10 | # Create a sub-folder: workspace inside the project folder. This is where the AI will create/edit files.
 11 | # Install libraries:
 12 | # pip install streamlit google-generativeai python-dotenv stremalit-option-menu streamlit-ace streamlit-antd-components
 13 | # API Key:
 14 | # Get your Gemini API key (from Google AI Studio).
 15 | # Create a file named .env in your project folder.
 16 | # Add your API key to the .env file:
 17 | # GOOGLE_API_KEY="YOUR_API_KEY_HERE"
 18 | # Alternatively, for deployment, use Streamlit Secrets Management. For local testing, .env is often easier.
 19 | 
 20 | ######
 21 | 
 22 | import streamlit as st
 23 | import google.generativeai as genai
 24 | import os
 25 | from pathlib import Path
 26 | import json
 27 | import time
 28 | from dotenv import load_dotenv
 29 | import subprocess # Needed to run other Streamlit apps (the preview)
 30 | import socket    # Needed to find an open network port for the preview
 31 | import sys       # Needed to get the path to the current Python executable
 32 | 
 33 | # --- UI Components ---
 34 | # These libraries provide pre-built UI elements like menus and the code editor.
 35 | from streamlit_option_menu import option_menu
 36 | from streamlit_ace import st_ace
 37 | import streamlit_antd_components as sac # Using for specific buttons (Save/Delete group)
 38 | 
 39 | # --- Configuration ---
 40 | st.set_page_config(
 41 |     layout="wide",
 42 |     page_title="AI App Gen" # Shorter title
 43 | )
 44 | load_dotenv() # Load API keys from a file named .env in the same directory
 45 | 
 46 | # --- Constants ---
 47 | # Where generated Python app files will be saved
 48 | WORKSPACE_DIR = Path("workspace_st_apps")
 49 | WORKSPACE_DIR.mkdir(exist_ok=True) # Create the directory if it doesn't exist
 50 | 
 51 | # Code editor appearance settings
 52 | ACE_DEFAULT_THEME = "monokai"
 53 | ACE_DEFAULT_KEYBINDING = "vscode"
 54 | 
 55 | # Which Google AI model to use for generating code
 56 | GEMINI_MODEL_NAME = "gemini-2.5-pro-exp-03-25"
 57 | 
 58 | # Instructions for the Google AI model
 59 | # This tells the AI how to format its responses (as JSON commands)
 60 | GEMINI_SYSTEM_PROMPT = f"""
 61 | You are an AI assistant helping create Streamlit applications.
 62 | Your goal is to manage Python files in a workspace based on user requests.
 63 | Respond *only* with a valid JSON array containing commands. Do not add any explanations before or after the JSON array.
 64 | 
 65 | Available commands:
 66 | 1.  `{{"action": "create_update", "filename": "app_name.py", "content": "FULL_PYTHON_CODE_HERE"}}`
 67 |     - Use this to create a new Python file or completely overwrite an existing one.
 68 |     - Provide the *entire* file content. Escape backslashes (`\\\\`) and double quotes (`\\"`). Ensure newlines are `\\n`.
 69 |     - Do *not* include ```python markdown blocks or shebangs (`#!/usr/bin/env python`) in the "content".
 70 | 2.  `{{"action": "delete", "filename": "old_app.py"}}`
 71 |     - Use this to delete a Python file from the workspace.
 72 | 3.  `{{"action": "chat", "content": "Your message here."}}`
 73 |     - Use this *only* if you need to ask for clarification, report an issue you can't fix with file actions, or confirm understanding.
 74 | 
 75 | Current Python files in workspace: {', '.join([f.name for f in WORKSPACE_DIR.iterdir() if f.is_file() and f.suffix == '.py']) if WORKSPACE_DIR.exists() else 'None'}
 76 | 
 77 | Example Interaction:
 78 | User: Create a simple hello world app called hello.py
 79 | AI: `[{{"action": "create_update", "filename": "hello.py", "content": "import streamlit as st\\n\\nst.title('Hello World!')\\nst.write('This is a simple app.')"}}`
 80 | 
 81 | Ensure your entire response is *only* the JSON array `[...]`.
 82 | """
 83 | 
 84 | # --- API Client Setup ---
 85 | try:
 86 |     google_api_key = os.getenv("GOOGLE_API_KEY")
 87 |     if not google_api_key:
 88 |         # Stop the app if the API key is missing
 89 |         st.error("🔴 Google API Key not found. Please set `GOOGLE_API_KEY` in a `.env` file.")
 90 |         st.stop() # Halt execution
 91 |     # Configure the Gemini library with the key
 92 |     genai.configure(api_key=google_api_key)
 93 |     # Create the AI model object
 94 |     model = genai.GenerativeModel(GEMINI_MODEL_NAME)
 95 | except Exception as e:
 96 |     st.error(f"🔴 Failed to set up Google AI: {e}")
 97 |     st.stop()
 98 | 
 99 | # --- Session State ---
100 | # Streamlit reruns the script on interaction. Session state stores data
101 | # between reruns, like chat history or which file is selected.
102 | def initialize_session_state():
103 |     """Sets up default values in Streamlit's session state dictionary."""
104 |     state_defaults = {
105 |         "messages": [],             # List to store chat messages (user and AI)
106 |         "selected_file": None,      # Name of the file currently shown in the editor
107 |         "file_content_on_load": "", # Content of the selected file when loaded (read-only)
108 |         "preview_process": None,    # Stores the running preview process object
109 |         "preview_port": None,       # Port number used by the preview
110 |         "preview_url": None,        # URL to access the preview
111 |         "preview_file": None,       # Name of the file being previewed
112 |         "editor_unsaved_content": "", # Current text typed into the editor
113 |         "last_saved_content": "",   # Content that was last successfully saved to disk
114 |     }
115 |     for key, default_value in state_defaults.items():
116 |         if key not in st.session_state:
117 |             st.session_state[key] = default_value
118 | 
119 | initialize_session_state() # Run the initialization
120 | 
121 | # --- File System Functions ---
122 | def get_workspace_python_files():
123 |     """Gets a list of all '.py' filenames in the workspace directory."""
124 |     if not WORKSPACE_DIR.is_dir():
125 |         return [] # Return empty list if directory doesn't exist
126 |     try:
127 |         # List files, filter for .py, sort alphabetically
128 |         python_files = sorted([
129 |             f.name for f in WORKSPACE_DIR.iterdir() if f.is_file() and f.suffix == '.py'
130 |         ])
131 |         return python_files
132 |     except Exception as e:
133 |         st.error(f"Error reading workspace directory: {e}")
134 |         return []
135 | 
136 | def read_file(filename):
137 |     """Reads the text content of a file from the workspace."""
138 |     if not filename: # Check if filename is provided
139 |         return None
140 |     # Prevent accessing files outside the workspace (basic security)
141 |     if ".." in filename or filename.startswith(("/", "\\")):
142 |         st.error(f"Invalid file path: {filename}")
143 |         return None
144 | 
145 |     filepath = WORKSPACE_DIR / filename # Combine directory and filename
146 |     try:
147 |         with open(filepath, "r", encoding="utf-8") as f:
148 |             return f.read() # Return the file's text content
149 |     except FileNotFoundError:
150 |         st.warning(f"File not found: {filename}")
151 |         return None # Indicate file doesn't exist
152 |     except Exception as e:
153 |         st.error(f"Error reading file '{filename}': {e}")
154 |         return None
155 | 
156 | def save_file(filename, content):
157 |     """Writes text content to a file in the workspace."""
158 |     if not filename:
159 |         return False # Cannot save without a filename
160 |     if ".." in filename or filename.startswith(("/", "\\")):
161 |         st.error(f"Invalid file path: {filename}")
162 |         return False
163 | 
164 |     filepath = WORKSPACE_DIR / filename
165 |     try:
166 |         # Write the content to the file (overwrites if it exists)
167 |         with open(filepath, "w", encoding="utf-8") as f:
168 |             f.write(content)
169 |         return True # Indicate success
170 |     except Exception as e:
171 |         st.error(f"Error saving file '{filename}': {e}")
172 |         return False # Indicate failure
173 | 
174 | def delete_file(filename):
175 |     """Deletes a file from the workspace and updates app state."""
176 |     if not filename:
177 |         return False
178 |     if ".." in filename or filename.startswith(("/", "\\")):
179 |         st.error(f"Invalid file path: {filename}")
180 |         return False
181 | 
182 |     filepath = WORKSPACE_DIR / filename
183 |     try:
184 |         if filepath.is_file():
185 |             os.remove(filepath) # Delete the actual file
186 |             st.toast(f"Deleted: {filename}", icon="🗑️")
187 | 
188 |             # If the deleted file was being previewed, stop the preview
189 |             if st.session_state.preview_file == filename:
190 |                 stop_preview() # Call the function to stop the process
191 | 
192 |             # If the deleted file was selected in the editor, clear the selection
193 |             if st.session_state.selected_file == filename:
194 |                 st.session_state.selected_file = None
195 |                 st.session_state.file_content_on_load = ""
196 |                 st.session_state.editor_unsaved_content = ""
197 |                 st.session_state.last_saved_content = ""
198 |             return True # Indicate success
199 |         else:
200 |             st.warning(f"Could not delete: File '{filename}' not found.")
201 |             return False
202 |     except Exception as e:
203 |         st.error(f"Error deleting file '{filename}': {e}")
204 |         return False
205 | 
206 | # --- AI Interaction Functions ---
207 | 
208 | def _clean_ai_response_text(ai_response_text):
209 |     """Removes potential code fences (```json ... ```) from AI response."""
210 |     text = ai_response_text.strip()
211 |     if text.startswith("```json"):
212 |         text = text[7:-3].strip() # Remove ```json and ```
213 |     elif text.startswith("```"):
214 |         text = text[3:-3].strip() # Remove ``` and ```
215 |     return text
216 | 
217 | def parse_and_execute_ai_commands(ai_response_text):
218 |     """
219 |     Parses the AI's JSON response and performs the requested file actions.
220 |     Returns the list of commands (for chat history display).
221 |     """
222 |     cleaned_text = _clean_ai_response_text(ai_response_text)
223 |     executed_commands_list = [] # To store commands for chat display
224 | 
225 |     try:
226 |         # Attempt to convert the cleaned text into a Python list of dictionaries
227 |         commands = json.loads(cleaned_text)
228 | 
229 |         # Check if the result is actually a list
230 |         if not isinstance(commands, list):
231 |             st.error("AI response was valid JSON, but not a list of commands.")
232 |             # Return a chat message indicating the error for display
233 |             return [{"action": "chat", "content": f"AI Error: Response was not a list. Response: {cleaned_text}"}]
234 | 
235 |         # Process each command dictionary in the list
236 |         for command_data in commands:
237 |             # Ensure the command is a dictionary before processing
238 |             if not isinstance(command_data, dict):
239 |                 st.warning(f"AI sent an invalid command format (not a dict): {command_data}")
240 |                 executed_commands_list.append({"action": "chat", "content": f"AI Error: Invalid command format: {command_data}"})
241 |                 continue # Skip to the next command
242 | 
243 |             # Add the command to the list we return (used for displaying AI actions)
244 |             executed_commands_list.append(command_data)
245 | 
246 |             # Get action details from the dictionary
247 |             action = command_data.get("action")
248 |             filename = command_data.get("filename")
249 |             content = command_data.get("content")
250 | 
251 |             # --- Execute the action ---
252 |             if action == "create_update":
253 |                 if filename and content is not None:
254 |                     success = save_file(filename, content)
255 |                     if success:
256 |                         st.toast(f"AI saved: {filename}", icon="💾")
257 |                         # If this file is currently open in the editor, update the editor's content
258 |                         if st.session_state.selected_file == filename:
259 |                             st.session_state.file_content_on_load = content
260 |                             st.session_state.last_saved_content = content
261 |                             st.session_state.editor_unsaved_content = content
262 |                     else:
263 |                         st.error(f"AI command failed: Could not save '{filename}'.")
264 |                         # Add error details to chat display list
265 |                         executed_commands_list.append({"action": "chat", "content": f"Error: Failed saving {filename}"})
266 |                 else:
267 |                     st.warning("AI 'create_update' command missing filename or content.")
268 |                     executed_commands_list.append({"action": "chat", "content": "AI Warning: Invalid create_update"})
269 | 
270 |             elif action == "delete":
271 |                 if filename:
272 |                     success = delete_file(filename)
273 |                     if not success:
274 |                          st.error(f"AI command failed: Could not delete '{filename}'.")
275 |                          executed_commands_list.append({"action": "chat", "content": f"Error: Failed deleting {filename}"})
276 |                 else:
277 |                     st.warning("AI 'delete' command missing filename.")
278 |                     executed_commands_list.append({"action": "chat", "content": "AI Warning: Invalid delete"})
279 | 
280 |             elif action == "chat":
281 |                 # No action needed here, the chat message is already in executed_commands_list
282 |                 # and will be displayed in the chat history.
283 |                 pass
284 | 
285 |             else:
286 |                 # Handle unrecognized actions from the AI
287 |                 st.warning(f"AI sent unknown action: '{action}'.")
288 |                 executed_commands_list.append({"action": "chat", "content": f"AI Warning: Unknown action '{action}'"})
289 | 
290 |         return executed_commands_list # Return the list for chat display
291 | 
292 |     except json.JSONDecodeError:
293 |         st.error(f"AI response was not valid JSON.\nRaw response:\n```\n{cleaned_text}\n```")
294 |         # Return a chat message indicating the JSON error for display
295 |         return [{"action": "chat", "content": f"AI Error: Invalid JSON received. Response: {ai_response_text}"}]
296 |     except Exception as e:
297 |         st.error(f"Error processing AI commands: {e}")
298 |         return [{"action": "chat", "content": f"Error processing commands: {e}"}]
299 | 
300 | def _prepare_gemini_history(chat_history, system_prompt):
301 |     """Formats chat history for the Gemini API call."""
302 |     gemini_history = []
303 |     # Start with the system prompt (instructions for the AI)
304 |     gemini_history.append({"role": "user", "parts": [{"text": system_prompt}]})
305 |     # Gemini requires a model response to start the turn properly after a system prompt
306 |     gemini_history.append({"role": "model", "parts": [{"text": json.dumps([{"action": "chat", "content": "Understood. I will respond only with JSON commands."}])}]})
307 | 
308 |     # Add the actual user/assistant messages from session state
309 |     for msg in chat_history:
310 |         role = msg["role"] # "user" or "assistant"
311 |         content = msg["content"]
312 |         api_role = "model" if role == "assistant" else "user" # Map to API roles
313 | 
314 |         # Convert assistant messages (which are lists of commands) back to JSON strings
315 |         if role == "assistant" and isinstance(content, list):
316 |             try:
317 |                 content_str = json.dumps(content)
318 |             except Exception:
319 |                 content_str = str(content) # Fallback if conversion fails
320 |         else:
321 |             content_str = str(content) # User messages are already strings
322 | 
323 |         if content_str: # Avoid sending empty messages
324 |             gemini_history.append({"role": api_role, "parts": [{"text": content_str}]})
325 | 
326 |     return gemini_history
327 | 
328 | def ask_gemini_ai(chat_history):
329 |     """Sends the conversation history to the Gemini AI and returns its response."""
330 | 
331 |     # Get current list of files to include in the prompt context
332 |     current_files = get_workspace_python_files()
333 |     file_list_info = f"Current Python files: {', '.join(current_files) if current_files else 'None'}"
334 |     # Update the system prompt with the current file list
335 |     updated_system_prompt = GEMINI_SYSTEM_PROMPT.replace(
336 |         "Current Python files: ...", # Placeholder text to replace
337 |         file_list_info
338 |     )
339 | 
340 |     # Prepare the history in the format the API expects
341 |     gemini_api_history = _prepare_gemini_history(chat_history, updated_system_prompt)
342 | 
343 |     try:
344 |         # Make the API call to Google
345 |         # print(f"DEBUG: Sending history:\n{json.dumps(gemini_api_history, indent=2)}") # Uncomment for debugging API calls
346 |         response = model.generate_content(gemini_api_history)
347 |         # print(f"DEBUG: Received response:\n{response.text}") # Uncomment for debugging API calls
348 |         return response.text # Return the AI's raw text response
349 | 
350 |     except Exception as e:
351 |         # Handle potential errors during the API call
352 |         error_message = f"Gemini API call failed: {type(e).__name__}"
353 |         st.error(f"🔴 {error_message}: {e}")
354 | 
355 |         # Try to give a more user-friendly error message for common issues
356 |         error_content = f"AI Error: {str(e)[:150]}..." # Default message
357 |         if "API key not valid" in str(e):
358 |             error_content = "AI Error: Invalid Google API Key."
359 |         elif "429" in str(e) or "quota" in str(e).lower() or "resource has been exhausted" in str(e).lower():
360 |             error_content = "AI Error: API Quota or Rate Limit Exceeded."
361 |         # Handle cases where the AI's response might be blocked for safety
362 |         try:
363 |              if response and response.prompt_feedback and response.prompt_feedback.block_reason:
364 |                  error_content = f"AI Error: Input blocked by safety filters ({response.prompt_feedback.block_reason})."
365 |              elif response and response.candidates and response.candidates[0].finish_reason != 'STOP':
366 |                   error_content = f"AI Error: Response stopped ({response.candidates[0].finish_reason}). May be due to safety filters or length limits."
367 |         except Exception:
368 |              pass # Ignore errors during safety check parsing
369 | 
370 |         # Return the error as a JSON chat command so it appears in the chat history
371 |         return json.dumps([{"action": "chat", "content": error_content}])
372 | 
373 | # --- Live Preview Process Management ---
374 | def _find_available_port():
375 |     """Finds an unused network port."""
376 |     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
377 |         s.bind(('', 0)) # Bind to port 0 to let the OS choose a free port
378 |         return s.getsockname()[1] # Return the chosen port number
379 | 
380 | def stop_preview():
381 |     """Stops the currently running Streamlit preview process."""
382 |     process_to_stop = st.session_state.get("preview_process")
383 |     pid = getattr(process_to_stop, 'pid', None) # Get process ID if available
384 | 
385 |     if process_to_stop and pid:
386 |         st.info(f"Stopping preview process (PID: {pid})...")
387 |         try:
388 |             # Check if the process is still running
389 |             if process_to_stop.poll() is None:
390 |                 # Ask the process to terminate gracefully
391 |                 process_to_stop.terminate()
392 |                 try:
393 |                     # Wait up to 3 seconds for it to close
394 |                     process_to_stop.wait(timeout=3)
395 |                     st.toast(f"Preview process {pid} stopped.", icon="⏹️")
396 |                 except subprocess.TimeoutExpired:
397 |                     # If it didn't stop, force kill it
398 |                     st.warning(f"Preview process {pid} did not stop gracefully, killing...")
399 |                     if process_to_stop.poll() is None: # Check again before kill
400 |                         process_to_stop.kill()
401 |                         process_to_stop.wait(timeout=1) # Brief wait for kill
402 |                         st.toast(f"Preview process {pid} killed.", icon="💀")
403 |             else:
404 |                 # Process was already finished
405 |                 st.warning(f"Preview process {pid} had already stopped.")
406 |         except ProcessLookupError:
407 |             st.warning(f"Preview process {pid} not found (already gone?).")
408 |         except Exception as e:
409 |             st.error(f"Error trying to stop preview process {pid}: {e}")
410 | 
411 |     # Always clear the preview state variables after attempting to stop
412 |     st.session_state.preview_process = None
413 |     st.session_state.preview_port = None
414 |     st.session_state.preview_url = None
415 |     st.session_state.preview_file = None
416 |     st.rerun() # Update the UI immediately
417 | 
418 | def start_preview(python_filename):
419 |     """Starts a Streamlit app preview in a separate process."""
420 |     filepath = WORKSPACE_DIR / python_filename
421 |     # Basic check: ensure the file exists and is a Python file
422 |     if not filepath.is_file() or filepath.suffix != '.py':
423 |         st.error(f"Cannot preview: '{python_filename}' is not a valid Python file.")
424 |         return False
425 | 
426 |     # Stop any currently running preview first
427 |     if st.session_state.get("preview_process"):
428 |         st.warning("Stopping existing preview first...")
429 |         stop_preview() # This function will rerun, so we might need to adjust flow
430 |         # Let's add a small delay here AFTER stop_preview (which reruns) handles its part.
431 |         # This might mean the button needs to be clicked twice sometimes, but simplifies state.
432 |         # A more complex approach would involve flags in session state.
433 |         time.sleep(0.5) # Brief pause
434 | 
435 |     with st.spinner(f"Starting preview for `{python_filename}`..."):
436 |         try:
437 |             port = _find_available_port()
438 |             # Command to run: python -m streamlit run <filepath> --port <port> [options]
439 |             command = [
440 |                 sys.executable, # Use the same Python interpreter running this script
441 |                 "-m", "streamlit", "run",
442 |                 str(filepath.resolve()), # Use the full path to the file
443 |                 "--server.port", str(port),
444 |                 "--server.headless", "true", # Don't open a browser automatically
445 |                 "--server.runOnSave", "false", # Don't automatically rerun on save
446 |                 "--server.fileWatcherType", "none" # Don't watch for file changes
447 |             ]
448 | 
449 |             # Start the command as a new process
450 |             preview_proc = subprocess.Popen(
451 |                 command,
452 |                 stdout=subprocess.PIPE, # Capture output (optional)
453 |                 stderr=subprocess.PIPE, # Capture errors
454 |                 text=True, encoding='utf-8'
455 |             )
456 | 
457 |             # Give Streamlit a moment to start up or fail
458 |             time.sleep(2.5) # Wait a bit
459 | 
460 |             # Check if the process started successfully (is still running)
461 |             if preview_proc.poll() is None:
462 |                 # Success! Store process info in session state
463 |                 st.session_state.preview_process = preview_proc
464 |                 st.session_state.preview_port = port
465 |                 st.session_state.preview_url = f"http://localhost:{port}"
466 |                 st.session_state.preview_file = python_filename
467 |                 st.success(f"Preview started: {st.session_state.preview_url}")
468 |                 st.toast(f"Preview running for {python_filename}", icon="🚀")
469 |                 return True
470 |             else:
471 |                 # Failure: Process ended quickly, likely an error
472 |                 st.error(f"Preview failed to start for `{python_filename}`.")
473 |                 # Try to show error output from the failed process
474 |                 try:
475 |                     stderr_output = preview_proc.stderr.read()
476 |                     if stderr_output:
477 |                         st.error("Preview Error Output:")
478 |                         st.code(stderr_output, language=None)
479 |                     else: # If no stderr, maybe there was stdout?
480 |                          stdout_output = preview_proc.stdout.read()
481 |                          if stdout_output:
482 |                               st.error("Preview Output (may contain errors):")
483 |                               st.code(stdout_output, language=None)
484 |                 except Exception as read_e:
485 |                     st.error(f"Could not read output from failed preview process: {read_e}")
486 |                 # Clear any partial state
487 |                 st.session_state.preview_process = None
488 |                 return False
489 |         except Exception as e:
490 |             st.error(f"Error trying to start preview process: {e}")
491 |             st.session_state.preview_process = None # Ensure clean state
492 |             return False
493 | 
494 | # --- Streamlit App UI ---
495 | 
496 | st.title("🤖 AI Streamlit App Generator")
497 | 
498 | # --- Sidebar ---
499 | with st.sidebar:
500 |     st.header("💬 Chat & Controls")
501 |     st.divider()
502 | 
503 |     # --- Chat History Display ---
504 |     chat_container = st.container(height=400)
505 |     with chat_container:
506 |         if not st.session_state.messages:
507 |             st.info("Chat history is empty. Type your instructions below.")
508 |         else:
509 |             # Loop through messages stored in session state
510 |             for message in st.session_state.messages:
511 |                 role = message["role"] # "user" or "assistant"
512 |                 content = message["content"]
513 |                 avatar = "🧑‍💻" if role == "user" else "🤖"
514 | 
515 |                 # Display message using Streamlit's chat message element
516 |                 with st.chat_message(role, avatar=avatar):
517 |                     if role == "assistant" and isinstance(content, list):
518 |                         # Assistant message contains commands - format them nicely
519 |                         file_actions_summary = ""
520 |                         chat_responses = []
521 |                         code_snippets = []
522 | 
523 |                         for command in content:
524 |                             if not isinstance(command, dict): continue # Skip malformed
525 | 
526 |                             action = command.get("action")
527 |                             filename = command.get("filename")
528 |                             cmd_content = command.get("content")
529 | 
530 |                             if action == "create_update":
531 |                                 file_actions_summary += f"📝 **Saved:** `{filename}`\n"
532 |                                 if cmd_content:
533 |                                     code_snippets.append({"filename": filename, "content": cmd_content})
534 |                             elif action == "delete":
535 |                                 file_actions_summary += f"🗑️ **Deleted:** `{filename}`\n"
536 |                             elif action == "chat":
537 |                                 chat_responses.append(str(cmd_content or "..."))
538 |                             else:
539 |                                 file_actions_summary += f"⚠️ **Unknown Action:** `{action}`\n"
540 | 
541 |                         # Display the formatted summary and chat responses
542 |                         full_display_text = (file_actions_summary + "\n".join(chat_responses)).strip()
543 |                         if full_display_text:
544 |                             st.markdown(full_display_text)
545 |                         else: # Handle cases where AI might return empty actions
546 |                              st.markdown("(AI performed no displayable actions)")
547 | 
548 |                         # Show code snippets in collapsible sections
549 |                         for snippet in code_snippets:
550 |                             with st.expander(f"View Code for `{snippet['filename']}`", expanded=False):
551 |                                 st.code(snippet['content'], language="python")
552 | 
553 |                     elif isinstance(content, str):
554 |                         # Simple text message (from user or AI chat action)
555 |                         st.write(content)
556 |                     else:
557 |                         # Fallback for unexpected content type
558 |                         st.write(f"Unexpected message format: {content}")
559 | 
560 |     # --- Chat Input Box ---
561 |     user_prompt = st.chat_input("Tell the AI what to do (e.g., 'Create hello.py')")
562 |     if user_prompt:
563 |         # 1. Add user's message to the chat history (in session state)
564 |         st.session_state.messages.append({"role": "user", "content": user_prompt})
565 | 
566 |         # 2. Show a spinner while waiting for the AI
567 |         with st.spinner("🧠 AI Thinking..."):
568 |             # 3. Send the *entire* chat history to the AI
569 |             ai_response_text = ask_gemini_ai(st.session_state.messages)
570 |             # 4. Parse the AI's response and execute file commands
571 |             ai_commands_executed = parse_and_execute_ai_commands(ai_response_text)
572 | 
573 |         # 5. Add the AI's response (the list of executed commands) to chat history
574 |         st.session_state.messages.append({"role": "assistant", "content": ai_commands_executed})
575 | 
576 |         # 6. Rerun the script immediately to show the new messages and update file list/editor
577 |         st.rerun()
578 | 
579 |     st.divider()
580 | 
581 |     # --- Status Info ---
582 |     st.subheader("Status & Info")
583 |     st.success(f"Using AI model: {GEMINI_MODEL_NAME}", icon="✅")
584 |     st.warning(
585 |         "**Notes:** Review AI code before running previews. `create_update` overwrites files.",
586 |     )
587 | 
588 | 
589 | # --- Main Area Tabs ---
590 | selected_tab = option_menu(
591 |     menu_title=None,
592 |     options=["Workspace", "Live Preview"],
593 |     icons=["folder-fill", "play-btn-fill"],
594 |     orientation="horizontal",
595 |     key="main_tab_menu"
596 |     # Removed custom styles for simplicity
597 | )
598 | 
599 | # --- Workspace Tab ---
600 | if selected_tab == "Workspace":
601 |     st.header("📂 Workspace & Editor")
602 |     st.divider()
603 | 
604 |     # Create two columns: one for file list, one for editor
605 |     file_list_col, editor_col = st.columns([0.3, 0.7]) # 30% width for files, 70% for editor
606 | 
607 |     with file_list_col:
608 |         st.subheader("Files")
609 |         python_files = get_workspace_python_files()
610 | 
611 |         # Prepare options for the dropdown menu
612 |         select_options = ["--- Select a file ---"] + python_files
613 |         current_selection_in_state = st.session_state.get("selected_file")
614 | 
615 |         # Find the index of the currently selected file to set the dropdown default
616 |         try:
617 |             current_index = select_options.index(current_selection_in_state) if current_selection_in_state else 0
618 |         except ValueError:
619 |             current_index = 0 # If file in state doesn't exist, default to "Select"
620 | 
621 |         # The dropdown widget
622 |         selected_option = st.selectbox(
623 |             "Edit file:",
624 |             options=select_options,
625 |             index=current_index,
626 |             key="file_selector_dropdown",
627 |             label_visibility="collapsed" # Hide the label "Edit file:"
628 |         )
629 | 
630 |         # --- Handle File Selection Change ---
631 |         # If the dropdown selection is different from what's stored in session state...
632 |         newly_selected_filename = selected_option if selected_option != "--- Select a file ---" else None
633 |         if newly_selected_filename != current_selection_in_state:
634 |             st.session_state.selected_file = newly_selected_filename # Update state
635 |             # Read the content of the newly selected file
636 |             file_content = read_file(newly_selected_filename) if newly_selected_filename else ""
637 |             # Handle case where file read failed (e.g., it was deleted)
638 |             if file_content is None and newly_selected_filename:
639 |                  file_content = f"# ERROR: Could not read file '{newly_selected_filename}'"
640 | 
641 |             # Update session state with the file's content for the editor
642 |             st.session_state.file_content_on_load = file_content
643 |             st.session_state.editor_unsaved_content = file_content # Start editor with file content
644 |             st.session_state.last_saved_content = file_content     # Mark as saved initially
645 |             st.rerun() # Rerun script to load the new file into the editor
646 | 
647 |     with editor_col:
648 |         st.subheader("Code Editor")
649 |         selected_filename = st.session_state.selected_file
650 | 
651 |         if selected_filename:
652 |             st.caption(f"Editing: `{selected_filename}`")
653 | 
654 |             # Display the Ace code editor widget
655 |             editor_current_text = st_ace(
656 |                 value=st.session_state.get('editor_unsaved_content', ''), # Show unsaved content
657 |                 language="python",
658 |                 theme=ACE_DEFAULT_THEME,
659 |                 keybinding=ACE_DEFAULT_KEYBINDING,
660 |                 font_size=14, tab_size=4, wrap=True,
661 |                 auto_update=False, # Don't trigger reruns on every keystroke
662 |                 key=f"ace_editor_{selected_filename}" # Unique key helps reset state on file change
663 |             )
664 | 
665 |             # Check if the editor's current text is different from the last saved text
666 |             has_unsaved_changes = (editor_current_text != st.session_state.last_saved_content)
667 | 
668 |             # If the text in the editor box changes, update our 'unsaved' state variable
669 |             if editor_current_text != st.session_state.editor_unsaved_content:
670 |                 st.session_state.editor_unsaved_content = editor_current_text
671 |                 st.rerun() # Rerun to update the 'Save Changes' button state
672 | 
673 |             # --- Editor Action Buttons ---
674 |             # Using sac.buttons here for the nice grouped layout with icons.
675 |             editor_buttons = [
676 |                 sac.ButtonsItem(label="💾 Save Changes", icon="save", disabled=not has_unsaved_changes),
677 |                 sac.ButtonsItem(label="🗑️ Delete File", icon="trash", color="red"),
678 |             ]
679 |             clicked_editor_button = sac.buttons(
680 |                  items=editor_buttons, index=None, format_func='title',
681 |                  align='end', size='small', return_index=False,
682 |                  key="editor_action_buttons"
683 |             )
684 | 
685 |             # --- Handle Button Clicks ---
686 |             if clicked_editor_button == "💾 Save Changes":
687 |                 if save_file(selected_filename, editor_current_text):
688 |                     # Update state to reflect the save
689 |                     st.session_state.file_content_on_load = editor_current_text
690 |                     st.session_state.last_saved_content = editor_current_text
691 |                     st.toast(f"Saved: `{selected_filename}`", icon="💾")
692 |                     time.sleep(0.5) # Let toast message show
693 |                     st.rerun() # Rerun to disable the save button
694 |                 else:
695 |                     st.error(f"Error: Could not save '{selected_filename}'.")
696 | 
697 |             elif clicked_editor_button == "🗑️ Delete File":
698 |                  # Use sac.confirm_button for a confirmation pop-up
699 |                  needs_confirmation = True # Flag to show confirmation
700 |                  if needs_confirmation:
701 |                       confirmed = sac.confirm_button(
702 |                           f"Delete `{selected_filename}`?", # Confirmation message
703 |                           color="error", key="confirm_delete_button"
704 |                       )
705 |                       if confirmed:
706 |                           if delete_file(selected_filename):
707 |                               # Deletion successful, file list and editor will update on rerun
708 |                               st.rerun()
709 |                           # No 'else' needed, delete_file shows errors
710 | 
711 |             # Show a warning if there are unsaved changes
712 |             if has_unsaved_changes:
713 |                 st.warning("You have unsaved changes.")
714 | 
715 |         else:
716 |             # Show a placeholder message if no file is selected
717 |             st.info("Select a Python file from the list on the left to view or edit.")
718 |             st_ace(value="# Select a file...", language="python", readonly=True, key="ace_placeholder")
719 | 
720 | # --- Live Preview Tab ---
721 | elif selected_tab == "Live Preview":
722 |     st.header("▶️ Live Preview")
723 |     st.divider()
724 |     st.warning("⚠️ Running AI-generated code can have unintended consequences. Review code first!")
725 | 
726 |     # Get preview status from session state
727 |     is_preview_running = st.session_state.get("preview_process") is not None
728 |     file_being_previewed = st.session_state.get("preview_file")
729 |     preview_url = st.session_state.get("preview_url")
730 |     selected_file_for_preview = st.session_state.get("selected_file") # File selected in Workspace
731 | 
732 |     # --- Preview Controls ---
733 |     st.subheader("Controls")
734 |     if not selected_file_for_preview:
735 |         st.info("Select a file in the 'Workspace' tab to enable preview controls.")
736 |         # Allow stopping a preview even if no file is selected
737 |         if is_preview_running:
738 |             st.warning(f"Preview is running for: `{file_being_previewed}`")
739 |             if st.button(f"⏹️ Stop Preview ({file_being_previewed})", key="stop_other_preview"):
740 |                 stop_preview() # Will stop and rerun
741 |     else:
742 |         # Controls for the file selected in the Workspace
743 |         st.write(f"File selected for preview: `{selected_file_for_preview}`")
744 |         is_python = selected_file_for_preview.endswith(".py")
745 | 
746 |         if not is_python:
747 |             st.error("Cannot preview: Selected file is not a Python (.py) file.")
748 |         else:
749 |             # Layout Run and Stop buttons side-by-side
750 |             run_col, stop_col = st.columns(2)
751 |             with run_col:
752 |                 # Disable Run button if a preview is already running
753 |                 run_disabled = is_preview_running
754 |                 if st.button("🚀 Run Preview", disabled=run_disabled, type="primary", use_container_width=True):
755 |                     if start_preview(selected_file_for_preview):
756 |                         st.rerun() # Rerun to show the preview iframe
757 |             with stop_col:
758 |                 # Disable Stop button if no preview is running OR if the running preview
759 |                 # is for a DIFFERENT file than the one currently selected in the workspace.
760 |                 stop_disabled = not is_preview_running or (file_being_previewed != selected_file_for_preview)
761 |                 if st.button("⏹️ Stop Preview", disabled=stop_disabled, use_container_width=True):
762 |                     stop_preview() # Will stop and rerun
763 | 
764 |     st.divider()
765 | 
766 |     # --- Preview Display ---
767 |     st.subheader("Preview Window")
768 |     if is_preview_running:
769 |         # Check if the running preview matches the file selected in the workspace
770 |         if file_being_previewed == selected_file_for_preview:
771 |             st.info(f"Showing preview for `{file_being_previewed}`")
772 |             st.caption(f"URL: {preview_url}")
773 |             # Check if the process is still alive before showing iframe
774 |             live_process = st.session_state.preview_process
775 |             if live_process and live_process.poll() is None:
776 |                 # Display the running Streamlit app in an iframe
777 |                 st.components.v1.iframe(preview_url, height=600, scrolling=True)
778 |             else:
779 |                 # The process died unexpectedly
780 |                 st.warning(f"Preview for `{file_being_previewed}` stopped unexpectedly.")
781 |                 # Attempt to show error output if available
782 |                 if live_process:
783 |                      try:
784 |                          stderr = live_process.stderr.read()
785 |                          if stderr:
786 |                               with st.expander("Show error output from stopped process"): st.code(stderr)
787 |                      except Exception: pass # Ignore errors reading output
788 |                 # Clear the dead process state (stop_preview handles this and reruns)
789 |                 if live_process: # Check again in case state changed
790 |                      stop_preview()
791 |         else:
792 |             # A preview is running, but not for the file selected in the workspace
793 |             st.warning(f"Preview is running for `{file_being_previewed}`. Select that file in the Workspace to see it here, or stop it using the controls above.")
794 |     else:
795 |         # No preview is currently running
796 |         st.info("Click 'Run Preview' on a selected Python file to see it here.")


--------------------------------------------------------------------------------
/fast.py:
--------------------------------------------------------------------------------
  1 | ## Deep Charts Youtube Channel: https://www.youtube.com/@DeepCharts
  2 | ## Subscribe for more AI/Machine Learning/Data Science Tutorials
  3 | 
  4 | ##################################
  5 | ## 1. Data Import
  6 | ##################################
  7 | 
  8 | import os
  9 | import markdown
 10 | import pandas as pd
 11 | from fasthtml.common import *
 12 | from fastcore.basics import NotStr
 13 | import plotly.express as px
 14 | import nfl_data_py as nfl
 15 | 
 16 | 
 17 | 
 18 | ##################################
 19 | ## 2. Initialize FastHTML app
 20 | ##################################
 21 | 
 22 | app, rt = fast_app()
 23 | 
 24 | 
 25 | 
 26 | ##################################
 27 | ## 3. Input and Process Markdown Blog Files
 28 | ##################################
 29 | 
 30 | # Directory containing Markdown files
 31 | POSTS_DIR = 'posts'
 32 | 
 33 | # Load and convert Markdown files to HTML
 34 | def load_posts():
 35 |     posts = []
 36 |     # List all Markdown files with their full paths
 37 |     md_files = [os.path.join(POSTS_DIR, f) for f in os.listdir(POSTS_DIR) if f.endswith('.md')]
 38 |     # Sort files by last modified time in descending order
 39 |     md_files.sort(key=os.path.getmtime, reverse=True)
 40 |     for filepath in md_files:
 41 |         with open(filepath, 'r', encoding='utf-8') as file:
 42 |             html_content = markdown.markdown(file.read())
 43 |             title = os.path.basename(filepath).replace('_', ' ').replace('.md', '').title()
 44 |             posts.append({"title": title, "content": html_content})
 45 |     return posts
 46 | 
 47 | 
 48 | 
 49 | ##################################
 50 | ## 4. Function to import, wrangle, and graph data 
 51 | ##################################
 52 | 
 53 | # Generate NFL Cumulative Offensive Yards Chart
 54 | def generate_offensive_yards_chart():
 55 |     # Fetch play-by-play data for the 2024 season
 56 |     df = nfl.import_pbp_data([2024])
 57 | 
 58 |     # Filter for rushing and passing plays
 59 |     rushing_plays = df[df['play_type'] == 'run']
 60 |     passing_plays = df[df['play_type'] == 'pass']
 61 | 
 62 |     # Group by offensive team and week, then sum yards gained
 63 |     weekly_rushing_yards = rushing_plays.groupby(['posteam', 'week'])['yards_gained'].sum().reset_index()
 64 |     weekly_passing_yards = passing_plays.groupby(['posteam', 'week'])['yards_gained'].sum().reset_index()
 65 | 
 66 |     # Add a 'play_type' column
 67 |     weekly_rushing_yards['play_type'] = 'Rushing'
 68 |     weekly_passing_yards['play_type'] = 'Passing'
 69 | 
 70 |     # Combine the dataframes
 71 |     combined_df = pd.concat([weekly_rushing_yards, weekly_passing_yards])
 72 | 
 73 |     # Pivot the table to have teams as columns and weeks as rows
 74 |     pivot_df = combined_df.pivot_table(index='week', columns=['posteam', 'play_type'], values='yards_gained', fill_value=0)
 75 | 
 76 |     # Calculate cumulative yards
 77 |     cumulative_yards = pivot_df.cumsum()
 78 | 
 79 |     # Reset index for plotting
 80 |     cumulative_yards = cumulative_yards.reset_index()
 81 |     cumulative_yards.columns = ['week'] + [f'{team}_{ptype}' for team, ptype in cumulative_yards.columns[1:]]
 82 | 
 83 |     # Melt the dataframe for Plotly Express
 84 |     melted_df = cumulative_yards.melt(id_vars=['week'], var_name='team_playtype', value_name='cumulative_yards')
 85 |     melted_df[['team', 'play_type']] = melted_df['team_playtype'].str.split('_', expand=True)
 86 | 
 87 |     # Create Plotly Express figure
 88 |     fig = px.line(melted_df, x='week', y='cumulative_yards', color='team', facet_col='play_type',
 89 |                   title='Cumulative Offensive Yards by Week (2024 Season)',
 90 |                   labels={'week': 'Week', 'cumulative_yards': 'Cumulative Yards'},
 91 |                   category_orders={'play_type': ['Rushing', 'Passing']})
 92 | 
 93 |     fig.update_layout(legend_title_text='Team')
 94 |     fig.update_xaxes(type='category')
 95 | 
 96 |     return fig.to_html(full_html=False, include_plotlyjs='cdn')
 97 | 
 98 | 
 99 | 
100 | ##################################
101 | ## 5. Homepage Route for Content Layout
102 | ##################################
103 | 
104 | @rt('/')
105 | def home():
106 |     posts = load_posts()
107 |     chart_html = generate_offensive_yards_chart()
108 |     
109 |     # Create a list of article components for each post
110 |     article_posts = [
111 |         Article(
112 |             H1(post['title'], cls='post-title'),
113 |             Div(NotStr(post['content']))
114 |         )
115 |         for post in posts
116 |     ]
117 |     return Html(
118 |         Head(
119 |             Title('Deep Charts: NFL Yards Tracker'),
120 |             Link(rel='stylesheet', href='https://cdn.jsdelivr.net/npm/@picocss/pico@latest/css/pico.min.css'),
121 |             Style("""
122 |                 .header { 
123 |                     text-align: center; 
124 |                     padding: 1em; 
125 |                     background-color: #f8f9fa; 
126 |                     position: fixed; 
127 |                     top: 0; 
128 |                     width: 100%; 
129 |                     z-index: 10; 
130 |                 }
131 |                 .container { 
132 |                     display: flex; 
133 |                     max-width: 100%; 
134 |                     margin-top: 80px; /* Space for the fixed header */
135 |                 }
136 |                 .posts { 
137 |                     flex: 2; 
138 |                     overflow-y: auto; 
139 |                     height: calc(100vh - 80px); /* Adjust for header */
140 |                     padding: 1em; 
141 |                     margin-right: 40%; 
142 |                     box-sizing: border-box; 
143 |                 }
144 |                 .chart { 
145 |                     flex: 1; 
146 |                     position: fixed; 
147 |                     right: 0; 
148 |                     top: 80px; /* Space for the fixed header */
149 |                     width: 40%; 
150 |                     height: calc(100vh - 80px); /* Adjust for header */
151 |                     padding: 1em; 
152 |                     box-sizing: border-box; 
153 |                 }
154 |                 h1.post-title { 
155 |                     font-size: 1.5em; 
156 |                     font-weight: bold; 
157 |                 }
158 |                 article { 
159 |                     margin-bottom: 2em; 
160 |                 }
161 |             """)
162 |         ),
163 |         Body(
164 |             Div(
165 |                 H1('Deep Charts: NFL Yards Tracker', cls='header'),
166 |                 Div(
167 |                     Div(*article_posts, cls="posts"),
168 |                     Div(NotStr(chart_html), cls="chart"),
169 |                     cls="container"
170 |                 )
171 |             )
172 |         )
173 |     )
174 | 
175 | 
176 | 
177 | ##################################
178 | ## 6. Serve the App
179 | ##################################
180 | 
181 | serve()
182 | 
183 | 
184 | 
185 | 
186 | 
187 | 
188 | 
189 | 
190 | 


--------------------------------------------------------------------------------
/scikit-ollama-tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Source: Deep Charts Youtube Channel: https://www.youtube.com/@DeepCharts"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "# AI Sentiment Analysis with Ollama and Scikit-Ollama"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "## Import Libraries"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 1,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import pandas as pd\n",
 31 |     "from finvizfinance.quote import finvizfinance\n",
 32 |     "from skollama.models.ollama.classification.zero_shot import ZeroShotOllamaClassifier\n",
 33 |     "from skollama.models.ollama.classification.few_shot import FewShotOllamaClassifier"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "## Pull Stock News Headline Data"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "# Initialize the finvizfinance object for INTC\n",
 50 |     "stock = finvizfinance('INTC')\n",
 51 |     "\n",
 52 |     "# Fetch the latest news articles\n",
 53 |     "news_df = stock.ticker_news()\n",
 54 |     "\n",
 55 |     "news_df.head()"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "Data Wrangling (Reorder dataframe, remove headlines without company name in headline)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "# Reorder Columns\n",
 72 |     "news_df = news_df[['Date','Link','Title']]\n",
 73 |     "\n",
 74 |     "# Define the keywords to filter by\n",
 75 |     "keywords = ['INTC', 'Intel']\n",
 76 |     "\n",
 77 |     "# Create a regex pattern by joining keywords with '|'\n",
 78 |     "pattern = '|'.join(keywords)\n",
 79 |     "\n",
 80 |     "# Filter the DataFrame using str.contains\n",
 81 |     "filtered_news_df = news_df[news_df['Title'].str.contains(pattern, case=False, na=False)]\n",
 82 |     "\n",
 83 |     "filtered_news_df.head()"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "## Run Zero Shot Classifier"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "# Initialize the ZeroShotOllamaClassifier\n",
100 |     "clf = ZeroShotOllamaClassifier(model='llama3')\n",
101 |     "\n",
102 |     "# Define the candidate labels\n",
103 |     "candidate_labels = ['positive', 'negative', 'neutral']\n",
104 |     "\n",
105 |     "# Fit the classifier (no training data needed for zero-shot)\n",
106 |     "clf.fit(None, candidate_labels)\n",
107 |     "\n",
108 |     "# Predict the sentiment of each news title as a new colum in our DataFrame\n",
109 |     "filtered_news_df['Sentiment_zero'] = clf.predict(filtered_news_df['Title'])\n"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "filtered_news_df[['Title','Sentiment_zero']]"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "## Train and Run Few Shot Classifier"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {},
131 |    "source": [
132 |     "1. Start by randomly selecting a few training examples from the original dataset"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "# Randomly select 6 headlines for few-shot training and add a training indicator\n",
142 |     "few_shot_df = filtered_news_df.sample(n=7, random_state=1)\n",
143 |     "filtered_news_df['Few Shot Training Example'] = filtered_news_df.index.isin(few_shot_df.index)\n",
144 |     "\n",
145 |     "# View training examples\n",
146 |     "list(few_shot_df['Title'])"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "2. Manually review each training example and give human guided label assignment"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 7,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "# Manually assigned labels corresponding to the selected headlines\n",
163 |     "# Fill in below based on above headlines\n",
164 |     "user_labels = [\n",
165 |     "    'neutral',\n",
166 |     "    'negative',\n",
167 |     "    'neutral',\n",
168 |     "    'positive',\n",
169 |     "    'positive',\n",
170 |     "    'neutral',\n",
171 |     "    'positive'\n",
172 |     "]\n",
173 |     "\n",
174 |     "# Add the user-provided labels to the few-shot DataFrame\n",
175 |     "few_shot_df['User_Sentiment'] = user_labels"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "markdown",
180 |    "metadata": {},
181 |    "source": [
182 |     "3. Initialize and run few shot classifier on the rest of the dataset"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": [
191 |     "# Initialize the FewShotOllamaClassifier\n",
192 |     "few_shot_clf = FewShotOllamaClassifier(model='llama3')\n",
193 |     "\n",
194 |     "# Fit the classifier with user-provided examples directly from the DataFrame columns\n",
195 |     "few_shot_clf.fit(few_shot_df['Title'], few_shot_df['User_Sentiment'])\n",
196 |     "\n",
197 |     "# Predict the sentiment of all news titles in the filtered DataFrame\n",
198 |     "filtered_news_df['Sentiment_few'] = few_shot_clf.predict(filtered_news_df['Title'])\n"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "filtered_news_df_2 = filtered_news_df[['Title','Sentiment_zero','Sentiment_few','Few Shot Training Example']]\n",
208 |     "filtered_news_df_2"
209 |    ]
210 |   }
211 |  ],
212 |  "metadata": {
213 |   "kernelspec": {
214 |    "display_name": "general_env",
215 |    "language": "python",
216 |    "name": "python3"
217 |   },
218 |   "language_info": {
219 |    "codemirror_mode": {
220 |     "name": "ipython",
221 |     "version": 3
222 |    },
223 |    "file_extension": ".py",
224 |    "mimetype": "text/x-python",
225 |    "name": "python",
226 |    "nbconvert_exporter": "python",
227 |    "pygments_lexer": "ipython3",
228 |    "version": "3.12.3"
229 |   }
230 |  },
231 |  "nbformat": 4,
232 |  "nbformat_minor": 2
233 | }
234 | 


--------------------------------------------------------------------------------
/stock_sentiment_agents.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Stock Sentiment Agent Workflow"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts)"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "## LIbraries\n",
 24 |     "from phi.agent import Agent\n",
 25 |     "from phi.model.openai import OpenAIChat\n",
 26 |     "from phi.tools.googlesearch import GoogleSearch\n",
 27 |     "from phi.tools.yfinance import YFinanceTools"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "## Put Open AI API key into Python environment\n",
 37 |     "import os\n",
 38 |     "os.environ[\"OPENAI_API_KEY\"] = 'sk-xxxxxxxx'"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 3,
 44 |    "metadata": {},
 45 |    "outputs": [
 46 |     {
 47 |      "data": {
 48 |       "application/vnd.jupyter.widget-view+json": {
 49 |        "model_id": "ed0b90db992446c4a4d7a9625f5ed101",
 50 |        "version_major": 2,
 51 |        "version_minor": 0
 52 |       },
 53 |       "text/plain": [
 54 |        "Output()"
 55 |       ]
 56 |      },
 57 |      "metadata": {},
 58 |      "output_type": "display_data"
 59 |     },
 60 |     {
 61 |      "data": {
 62 |       "text/html": [
 63 |        "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
 64 |       ],
 65 |       "text/plain": []
 66 |      },
 67 |      "metadata": {},
 68 |      "output_type": "display_data"
 69 |     }
 70 |    ],
 71 |    "source": [
 72 |     "## Create Agents\n",
 73 |     "\n",
 74 |     "# Sentiment Agent\n",
 75 |     "sentiment_agent = Agent(\n",
 76 |     "    name=\"Sentiment Agent\",\n",
 77 |     "    role=\"Search and interpret news articles.\",\n",
 78 |     "    model=OpenAIChat(id=\"gpt-4o\"),\n",
 79 |     "    tools=[GoogleSearch()],\n",
 80 |     "    instructions=[\n",
 81 |     "        \"Find relevant news articles for each company and analyze the sentiment.\",\n",
 82 |     "        \"Provide sentiment scores from 1 (negative) to 10 (positive) with reasoning and sources.\"\n",
 83 |     "        \"Cite your sources. Be specific and provide links.\"\n",
 84 |     "    ],\n",
 85 |     "    show_tool_calls=True,\n",
 86 |     "    markdown=True,\n",
 87 |     ")\n",
 88 |     "\n",
 89 |     "# Finance Agent\n",
 90 |     "finance_agent = Agent(\n",
 91 |     "    name=\"Finance Agent\",\n",
 92 |     "    role=\"Get financial data and interpret trends.\",\n",
 93 |     "    model=OpenAIChat(id=\"gpt-4o\"),\n",
 94 |     "    tools=[YFinanceTools(stock_price=True, analyst_recommendations=True, company_info=True)],\n",
 95 |     "    instructions=[\n",
 96 |     "        \"Retrieve stock prices, analyst recommendations, and key financial data.\",\n",
 97 |     "        \"Focus on trends and present the data in tables with key insights.\"\n",
 98 |     "    ],\n",
 99 |     "    show_tool_calls=True,\n",
100 |     "    markdown=True,\n",
101 |     ")\n",
102 |     "\n",
103 |     "# Analyst Agent\n",
104 |     "analyst_agent = Agent(\n",
105 |     "    name=\"Analyst Agent\",\n",
106 |     "    role=\"Ensure thoroughness and draw conclusions.\",\n",
107 |     "    model=OpenAIChat(id=\"gpt-4o\"),\n",
108 |     "    instructions=[\n",
109 |     "        \"Check outputs for accuracy and completeness.\",\n",
110 |     "        \"Synthesize data to provide a final sentiment score (1-10) with justification.\"\n",
111 |     "    ],\n",
112 |     "    show_tool_calls=True,\n",
113 |     "    markdown=True,\n",
114 |     ")\n",
115 |     "\n",
116 |     "# Team of Agents\n",
117 |     "agent_team = Agent(\n",
118 |     "    model=OpenAIChat(id=\"gpt-4o\"),\n",
119 |     "    team=[sentiment_agent, finance_agent, analyst_agent],\n",
120 |     "    instructions=[\n",
121 |     "        \"Combine the expertise of all agents to provide a cohesive, well-supported response.\",\n",
122 |     "        \"Always include references and dates for all data points and sources.\",\n",
123 |     "        \"Present all data in structured tables for clarity.\",\n",
124 |     "        \"Explain the methodology used to arrive at the sentiment scores.\"\n",
125 |     "    ],\n",
126 |     "    show_tool_calls=True,\n",
127 |     "    markdown=True,\n",
128 |     ")\n",
129 |     "\n",
130 |     "## Run Agent Team\n",
131 |     "\n",
132 |     "# Final Prompt\n",
133 |     "agent_team.print_response(\n",
134 |     "    \"Analyze the sentiment for the following companies during the week of December 2nd-6th, 2024: NVDA, MSFT. \\n\\n\"\n",
135 |     "    \"1. **Sentiment Analysis**: Search for relevant news articles and interpret th–e sentiment for each company. Provide sentiment scores on a scale of 1 to 10, explain your reasoning, and cite your sources.\\n\\n\"\n",
136 |     "    \"2. **Financial Data**: Analyze stock price movements, analyst recommendations, and any notable financial data. Highlight key trends or events, and present the data in tables.\\n\\n\"\n",
137 |     "    \"3. **Consolidated Analysis**: Combine the insights from sentiment analysis and financial data to assign a final sentiment score (1-10) for each company. Justify the scores and provide a summary of the most important findings.\\n\\n\"\n",
138 |     "    \"Ensure your response is accurate, comprehensive, and includes references to sources with publication dates.\",\n",
139 |     "    stream=True\n",
140 |     ")"
141 |    ]
142 |   }
143 |  ],
144 |  "metadata": {
145 |   "kernelspec": {
146 |    "display_name": "general_env",
147 |    "language": "python",
148 |    "name": "python3"
149 |   },
150 |   "language_info": {
151 |    "codemirror_mode": {
152 |     "name": "ipython",
153 |     "version": 3
154 |    },
155 |    "file_extension": ".py",
156 |    "mimetype": "text/x-python",
157 |    "name": "python",
158 |    "nbconvert_exporter": "python",
159 |    "pygments_lexer": "ipython3",
160 |    "version": "3.12.3"
161 |   }
162 |  },
163 |  "nbformat": 4,
164 |  "nbformat_minor": 2
165 | }
166 | 


--------------------------------------------------------------------------------
/stocks_dashboard.py:
--------------------------------------------------------------------------------
  1 | # Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts)
  2 | 
  3 | import streamlit as st
  4 | import plotly.express as px
  5 | import plotly.graph_objects as go
  6 | import pandas as pd
  7 | import yfinance as yf
  8 | from datetime import datetime, timedelta
  9 | import pytz
 10 | import ta
 11 | 
 12 | ##########################################################################################
 13 | ## PART 1: Define Functions for Pulling, Processing, and Creating Techincial Indicators ##
 14 | ##########################################################################################
 15 | 
 16 | # Fetch stock data based on the ticker, period, and interval
 17 | def fetch_stock_data(ticker, period, interval):
 18 |     end_date = datetime.now()
 19 |     if period == '1wk':
 20 |         start_date = end_date - timedelta(days=7)
 21 |         data = yf.download(ticker, start=start_date, end=end_date, interval=interval)
 22 |     else:
 23 |         data = yf.download(ticker, period=period, interval=interval)
 24 |     return data
 25 | 
 26 | # Process data to ensure it is timezone-aware and has the correct format
 27 | def process_data(data):
 28 |     if data.index.tzinfo is None:
 29 |         data.index = data.index.tz_localize('UTC')
 30 |     data.index = data.index.tz_convert('US/Eastern')
 31 |     data.reset_index(inplace=True)
 32 |     data.rename(columns={'Date': 'Datetime'}, inplace=True)
 33 |     return data
 34 | 
 35 | # Calculate basic metrics from the stock data
 36 | def calculate_metrics(data):
 37 |     last_close = data['Close'].iloc[-1]
 38 |     prev_close = data['Close'].iloc[0]
 39 |     change = last_close - prev_close
 40 |     pct_change = (change / prev_close) * 100
 41 |     high = data['High'].max()
 42 |     low = data['Low'].min()
 43 |     volume = data['Volume'].sum()
 44 |     return last_close, change, pct_change, high, low, volume
 45 | 
 46 | # Add simple moving average (SMA) and exponential moving average (EMA) indicators
 47 | def add_technical_indicators(data):
 48 |     data['SMA_20'] = ta.trend.sma_indicator(data['Close'], window=20)
 49 |     data['EMA_20'] = ta.trend.ema_indicator(data['Close'], window=20)
 50 |     return data
 51 | 
 52 | ###############################################
 53 | ## PART 2: Creating the Dashboard App layout ##
 54 | ###############################################
 55 | 
 56 | 
 57 | # Set up Streamlit page layout
 58 | st.set_page_config(layout="wide")
 59 | st.title('Real Time Stock Dashboard')
 60 | 
 61 | 
 62 | # 2A: SIDEBAR PARAMETERS ############
 63 | 
 64 | # Sidebar for user input parameters
 65 | st.sidebar.header('Chart Parameters')
 66 | ticker = st.sidebar.text_input('Ticker', 'ADBE')
 67 | time_period = st.sidebar.selectbox('Time Period', ['1d', '1wk', '1mo', '1y', 'max'])
 68 | chart_type = st.sidebar.selectbox('Chart Type', ['Candlestick', 'Line'])
 69 | indicators = st.sidebar.multiselect('Technical Indicators', ['SMA 20', 'EMA 20'])
 70 | 
 71 | # Mapping of time periods to data intervals
 72 | interval_mapping = {
 73 |     '1d': '1m',
 74 |     '1wk': '30m',
 75 |     '1mo': '1d',
 76 |     '1y': '1wk',
 77 |     'max': '1wk'
 78 | }
 79 | 
 80 | 
 81 | # 2B: MAIN CONTENT AREA ############
 82 | 
 83 | # Update the dashboard based on user input
 84 | if st.sidebar.button('Update'):
 85 |     data = fetch_stock_data(ticker, time_period, interval_mapping[time_period])
 86 |     data = process_data(data)
 87 |     data = add_technical_indicators(data)
 88 |     
 89 |     last_close, change, pct_change, high, low, volume = calculate_metrics(data)
 90 |     
 91 |     # Display main metrics
 92 |     st.metric(label=f"{ticker} Last Price", value=f"{last_close:.2f} USD", delta=f"{change:.2f} ({pct_change:.2f}%)")
 93 |     
 94 |     col1, col2, col3 = st.columns(3)
 95 |     col1.metric("High", f"{high:.2f} USD")
 96 |     col2.metric("Low", f"{low:.2f} USD")
 97 |     col3.metric("Volume", f"{volume:,}")
 98 |     
 99 |     # Plot the stock price chart
100 |     fig = go.Figure()
101 |     if chart_type == 'Candlestick':
102 |         fig.add_trace(go.Candlestick(x=data['Datetime'],
103 |                                      open=data['Open'],
104 |                                      high=data['High'],
105 |                                      low=data['Low'],
106 |                                      close=data['Close']))
107 |     else:
108 |         fig = px.line(data, x='Datetime', y='Close')
109 |     
110 |     # Add selected technical indicators to the chart
111 |     for indicator in indicators:
112 |         if indicator == 'SMA 20':
113 |             fig.add_trace(go.Scatter(x=data['Datetime'], y=data['SMA_20'], name='SMA 20'))
114 |         elif indicator == 'EMA 20':
115 |             fig.add_trace(go.Scatter(x=data['Datetime'], y=data['EMA_20'], name='EMA 20'))
116 |     
117 |     # Format graph
118 |     fig.update_layout(title=f'{ticker} {time_period.upper()} Chart',
119 |                       xaxis_title='Time',
120 |                       yaxis_title='Price (USD)',
121 |                       height=600)
122 |     st.plotly_chart(fig, use_container_width=True)
123 |     
124 |     # Display historical data and technical indicators
125 |     st.subheader('Historical Data')
126 |     st.dataframe(data[['Datetime', 'Open', 'High', 'Low', 'Close', 'Volume']])
127 |     
128 |     st.subheader('Technical Indicators')
129 |     st.dataframe(data[['Datetime', 'SMA_20', 'EMA_20']])
130 | 
131 | 
132 | # 2C: SIDEBAR PRICES ############
133 | 
134 | # Sidebar section for real-time stock prices of selected symbols
135 | st.sidebar.header('Real-Time Stock Prices')
136 | stock_symbols = ['AAPL', 'GOOGL', 'AMZN', 'MSFT']
137 | for symbol in stock_symbols:
138 |     real_time_data = fetch_stock_data(symbol, '1d', '1m')
139 |     if not real_time_data.empty:
140 |         real_time_data = process_data(real_time_data)
141 |         last_price = real_time_data['Close'].iloc[-1]
142 |         change = last_price - real_time_data['Open'].iloc[0]
143 |         pct_change = (change / real_time_data['Open'].iloc[0]) * 100
144 |         st.sidebar.metric(f"{symbol}", f"{last_price:.2f} USD", f"{change:.2f} ({pct_change:.2f}%)")
145 | 
146 | # Sidebar information section
147 | st.sidebar.subheader('About')
148 | st.sidebar.info('This dashboard provides stock data and technical indicators for various time periods. Use the sidebar to customize your view.')
149 | 
150 | 
151 | 


--------------------------------------------------------------------------------
/structured_outputs.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Structured Outputs: From Text to Tabular Data"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts)"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "### Import Libraries"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 2,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "from ollama import chat\n",
 31 |     "from pydantic import BaseModel\n",
 32 |     "import pandas as pd\n",
 33 |     "from gnews import GNews"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "### Pull News Headline Data"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "\n",
 50 |     "# Fetch news articles\n",
 51 |     "google_news = GNews()\n",
 52 |     "news = google_news.get_news(\"NVDA\")\n",
 53 |     "\n",
 54 |     "# Extract top 6 news titles\n",
 55 |     "news_titles = [article['title'] for article in news[:6]]\n",
 56 |     "news_titles"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "### LLM Model and Structured Outputs"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "# Define BaseModel for news analysis\n",
 73 |     "class NewsAnalysis(BaseModel):\n",
 74 |     "    sentiment: str  \n",
 75 |     "    future_looking: bool  \n",
 76 |     "\n",
 77 |     "# Initialize an empty list to store results\n",
 78 |     "results = []\n",
 79 |     "\n",
 80 |     "# Loop through the news titles and analyze each\n",
 81 |     "for title in news_titles:\n",
 82 |     "    response = chat(\n",
 83 |     "        messages=[\n",
 84 |     "            {\n",
 85 |     "                'role': 'user',\n",
 86 |     "                'content': f\"\"\"Analyze the following title for sentiment (positive, negative, or neutral) \n",
 87 |     "                               and whether it provides future-looking financial insight, predictions, or \n",
 88 |     "                               guidance on whether to buy/hold/sell the stock (True or False): {title}\n",
 89 |     "                \"\"\",\n",
 90 |     "            }\n",
 91 |     "        ],\n",
 92 |     "        model='llama3.2',\n",
 93 |     "        format=NewsAnalysis.model_json_schema(),\n",
 94 |     "    )\n",
 95 |     "\n",
 96 |     "    # Parse the response into the NewsAnalysis model\n",
 97 |     "    sentiment_analysis = NewsAnalysis.model_validate_json(response['message']['content'])\n",
 98 |     "\n",
 99 |     "    # Append the results to the list\n",
100 |     "    results.append({\n",
101 |     "        'title': title,\n",
102 |     "        'sentiment': sentiment_analysis.sentiment,\n",
103 |     "        'future_looking': sentiment_analysis.future_looking\n",
104 |     "    })\n",
105 |     "\n",
106 |     "# Convert the results to a DataFrame\n",
107 |     "df = pd.DataFrame(results)\n",
108 |     "df\n"
109 |    ]
110 |   }
111 |  ],
112 |  "metadata": {
113 |   "kernelspec": {
114 |    "display_name": "structured_output",
115 |    "language": "python",
116 |    "name": "python3"
117 |   },
118 |   "language_info": {
119 |    "codemirror_mode": {
120 |     "name": "ipython",
121 |     "version": 3
122 |    },
123 |    "file_extension": ".py",
124 |    "mimetype": "text/x-python",
125 |    "name": "python",
126 |    "nbconvert_exporter": "python",
127 |    "pygments_lexer": "ipython3",
128 |    "version": "3.9.21"
129 |   }
130 |  },
131 |  "nbformat": 4,
132 |  "nbformat_minor": 2
133 | }
134 | 


--------------------------------------------------------------------------------