├── AI_Technical_Analysis.py ├── AutoScraper Tutorial.ipynb ├── LLM_timeseries_crypto.ipynb ├── Pandas_Top_5_Tricks.ipynb ├── README.md ├── Technical_Indicators_For_Machine_Learning.ipynb ├── Yelp API Notebook.ipynb ├── ai_coding_agent_tutorial.ipynb ├── ai_image_generator.py ├── ai_sentiment_analysis_gemini.ipynb ├── ai_stocks_prediction.py ├── app_streamlit_app_builder_ai.py ├── fast.py ├── flight_delay_ML_project.ipynb ├── scikit-ollama-tutorial.ipynb ├── stock_sentiment_agents.ipynb ├── stocks_dashboard.py └── structured_outputs.ipynb /AI_Technical_Analysis.py: -------------------------------------------------------------------------------- 1 | ## Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts) 2 | 3 | #### NOTE: Set yfinance to the following version to get chart working: "pip install yfinance==0.2.40" 4 | 5 | import streamlit as st 6 | import yfinance as yf 7 | import pandas as pd 8 | import plotly.graph_objects as go 9 | import ollama 10 | import tempfile 11 | import base64 12 | import os 13 | 14 | # Set up Streamlit app 15 | st.set_page_config(layout="wide") 16 | st.title("AI-Powered Technical Stock Analysis Dashboard") 17 | st.sidebar.header("Configuration") 18 | 19 | # Input for stock ticker and date range 20 | ticker = st.sidebar.text_input("Enter Stock Ticker (e.g., AAPL):", "AAPL") 21 | start_date = st.sidebar.date_input("Start Date", value=pd.to_datetime("2023-01-01")) 22 | end_date = st.sidebar.date_input("End Date", value=pd.to_datetime("2024-12-14")) 23 | 24 | # Fetch stock data 25 | if st.sidebar.button("Fetch Data"): 26 | st.session_state["stock_data"] = yf.download(ticker, start=start_date, end=end_date) 27 | st.success("Stock data loaded successfully!") 28 | 29 | # Check if data is available 30 | if "stock_data" in st.session_state: 31 | data = st.session_state["stock_data"] 32 | 33 | # Plot candlestick chart 34 | fig = go.Figure(data=[ 35 | go.Candlestick( 36 | x=data.index, 37 | open=data['Open'], 38 | high=data['High'], 39 | low=data['Low'], 40 | close=data['Close'], 41 | name="Candlestick" # Replace "trace 0" with "Candlestick" 42 | ) 43 | ]) 44 | 45 | # Sidebar: Select technical indicators 46 | st.sidebar.subheader("Technical Indicators") 47 | indicators = st.sidebar.multiselect( 48 | "Select Indicators:", 49 | ["20-Day SMA", "20-Day EMA", "20-Day Bollinger Bands", "VWAP"], 50 | default=["20-Day SMA"] 51 | ) 52 | 53 | # Helper function to add indicators to the chart 54 | def add_indicator(indicator): 55 | if indicator == "20-Day SMA": 56 | sma = data['Close'].rolling(window=20).mean() 57 | fig.add_trace(go.Scatter(x=data.index, y=sma, mode='lines', name='SMA (20)')) 58 | elif indicator == "20-Day EMA": 59 | ema = data['Close'].ewm(span=20).mean() 60 | fig.add_trace(go.Scatter(x=data.index, y=ema, mode='lines', name='EMA (20)')) 61 | elif indicator == "20-Day Bollinger Bands": 62 | sma = data['Close'].rolling(window=20).mean() 63 | std = data['Close'].rolling(window=20).std() 64 | bb_upper = sma + 2 * std 65 | bb_lower = sma - 2 * std 66 | fig.add_trace(go.Scatter(x=data.index, y=bb_upper, mode='lines', name='BB Upper')) 67 | fig.add_trace(go.Scatter(x=data.index, y=bb_lower, mode='lines', name='BB Lower')) 68 | elif indicator == "VWAP": 69 | data['VWAP'] = (data['Close'] * data['Volume']).cumsum() / data['Volume'].cumsum() 70 | fig.add_trace(go.Scatter(x=data.index, y=data['VWAP'], mode='lines', name='VWAP')) 71 | 72 | # Add selected indicators to the chart 73 | for indicator in indicators: 74 | add_indicator(indicator) 75 | 76 | fig.update_layout(xaxis_rangeslider_visible=False) 77 | st.plotly_chart(fig) 78 | 79 | # Analyze chart with LLaMA 3.2 Vision 80 | st.subheader("AI-Powered Analysis") 81 | if st.button("Run AI Analysis"): 82 | with st.spinner("Analyzing the chart, please wait..."): 83 | # Save chart as a temporary image 84 | with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile: 85 | fig.write_image(tmpfile.name) 86 | tmpfile_path = tmpfile.name 87 | 88 | # Read image and encode to Base64 89 | with open(tmpfile_path, "rb") as image_file: 90 | image_data = base64.b64encode(image_file.read()).decode('utf-8') 91 | 92 | # Prepare AI analysis request 93 | messages = [{ 94 | 'role': 'user', 95 | 'content': """You are a Stock Trader specializing in Technical Analysis at a top financial institution. 96 | Analyze the stock chart's technical indicators and provide a buy/hold/sell recommendation. 97 | Base your recommendation only on the candlestick chart and the displayed technical indicators. 98 | First, provide the recommendation, then, provide your detailed reasoning. 99 | """, 100 | 'images': [image_data] 101 | }] 102 | response = ollama.chat(model='llama3.2-vision', messages=messages) 103 | 104 | # Display AI analysis result 105 | st.write("**AI Analysis Results:**") 106 | st.write(response["message"]["content"]) 107 | 108 | # Clean up temporary file 109 | os.remove(tmpfile_path) 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /AutoScraper Tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# AutoScraper Tutorial" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "### Example 1: Pulling Text" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from autoscraper import AutoScraper\n", 31 | "\n", 32 | "# Web Page to Scrape from\n", 33 | "url = 'https://www.noaa.gov/media-releases'\n", 34 | "\n", 35 | "# Example Text to Pull\n", 36 | "# Note: Change below to most recent news release headline on 'https://www.noaa.gov/media-releases'\n", 37 | "news_list = [\"Applications now open nationwide for community-led heat-monitoring campaigns\"]\n", 38 | "\n", 39 | "# Initialize AutoScraper\n", 40 | "scraper = AutoScraper()\n", 41 | "\n", 42 | "# Build Model\n", 43 | "news_result = scraper.build(url, news_list)\n", 44 | "\n", 45 | "# Review Results\n", 46 | "news_result" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "### Example 2: Pulling Tabular Data" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "from autoscraper import AutoScraper\n", 63 | "\n", 64 | "# Web Page to Scrape from\n", 65 | "url = 'https://en.wikipedia.org/wiki/List_of_counties_in_California'\n", 66 | "\n", 67 | "# Example Text to Pull\n", 68 | "county_list = [\"Alameda County\",\"Yuba County\"]\n", 69 | "est_list = [\"1,622,188\",\"85,722\"]\n", 70 | "\n", 71 | "\n", 72 | "# Initialize AutoScraper\n", 73 | "scraper = AutoScraper()\n", 74 | "\n", 75 | "# Build Model\n", 76 | "county_result = scraper.build(url, county_list)\n", 77 | "est_result = scraper.build(url, est_list)\n", 78 | "\n", 79 | "# Review Results\n", 80 | "print(county_result)\n", 81 | "print(est_result)\n" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "import pandas as pd\n", 91 | "\n", 92 | "# Convert Lists to Dictionary\n", 93 | "data = {'County': county_result, 'Estimated Population': est_result}\n", 94 | "\n", 95 | "# Convert Dictionary to Dataframe\n", 96 | "df = pd.DataFrame(data)\n", 97 | "\n", 98 | "df" 99 | ] 100 | } 101 | ], 102 | "metadata": { 103 | "kernelspec": { 104 | "display_name": "general_env", 105 | "language": "python", 106 | "name": "python3" 107 | }, 108 | "language_info": { 109 | "codemirror_mode": { 110 | "name": "ipython", 111 | "version": 3 112 | }, 113 | "file_extension": ".py", 114 | "mimetype": "text/x-python", 115 | "name": "python", 116 | "nbconvert_exporter": "python", 117 | "pygments_lexer": "ipython3", 118 | "version": "3.12.3" 119 | } 120 | }, 121 | "nbformat": 4, 122 | "nbformat_minor": 2 123 | } 124 | -------------------------------------------------------------------------------- /LLM_timeseries_crypto.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Predicting Crypto with LLMs" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Libraries" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 10, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import pandas as pd\n", 24 | "import numpy as np\n", 25 | "from datetime import datetime, timedelta\n", 26 | "import yfinance as yf\n", 27 | "from statsmodels.tsa.arima.model import ARIMA\n", 28 | "from langchain_community.llms import Ollama" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "## Pull Crypto data" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 11, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "# Pull stock data from yfinance for the past month\n", 45 | "def pull_stocks(ticker):\n", 46 | " end_date = datetime.today()\n", 47 | " start_date = end_date - timedelta(days=30)\n", 48 | " stock_data = yf.Ticker(ticker)\n", 49 | " stock_df = stock_data.history(start=start_date, end=end_date)\n", 50 | " stock_df.index = stock_df.index.tz_localize(None) # Ensure stock data is timezone-naive\n", 51 | " stock_df = stock_df.reset_index()\n", 52 | " stock_df['Date'] = stock_df['Date'].dt.strftime('%Y-%m-%d')\n", 53 | "\n", 54 | " stock_df['pct_change'] = stock_df['Close'].pct_change()\n", 55 | "\n", 56 | " stock_df = stock_df[stock_df['pct_change'].notna()==True]\n", 57 | "\n", 58 | " stock_df = stock_df[['Date','pct_change']]\n", 59 | "\n", 60 | " actual_final = stock_df.tail(1)\n", 61 | "\n", 62 | " # stock_df = stock_df.iloc[:-1]\n", 63 | "\n", 64 | " return stock_df, actual_final\n", 65 | "\n", 66 | "btc, btc_final = pull_stocks('BTC-USD')\n", 67 | "eth, eth_final = pull_stocks('ETH-USD')\n", 68 | "xrp, xrp_final = pull_stocks('XRP-USD')\n", 69 | "\n" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "## Run ARIMA" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 12, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "name": "stderr", 86 | "output_type": "stream", 87 | "text": [ 88 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n", 89 | " self._init_dates(dates, freq)\n", 90 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n", 91 | " self._init_dates(dates, freq)\n", 92 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n", 93 | " self._init_dates(dates, freq)\n", 94 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n", 95 | " self._init_dates(dates, freq)\n", 96 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n", 97 | " self._init_dates(dates, freq)\n", 98 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n", 99 | " self._init_dates(dates, freq)\n", 100 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.\n", 101 | " warn('Non-invertible starting MA parameters found.'\n", 102 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n", 103 | " self._init_dates(dates, freq)\n", 104 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n", 105 | " self._init_dates(dates, freq)\n", 106 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n", 107 | " self._init_dates(dates, freq)\n" 108 | ] 109 | }, 110 | { 111 | "name": "stdout", 112 | "output_type": "stream", 113 | "text": [ 114 | "Predicted percentage change for next day: -0.007978\n", 115 | "Predicted percentage change for next day: -0.005279\n", 116 | "Predicted percentage change for next day: -0.006455\n" 117 | ] 118 | } 119 | ], 120 | "source": [ 121 | "def arima(timeseries_df):\n", 122 | " # Ensure 'Date' is the index and in datetime format\n", 123 | " timeseries_df.set_index('Date', inplace=True)\n", 124 | " timeseries_df.index = pd.to_datetime(timeseries_df.index)\n", 125 | "\n", 126 | " # Remove the last row (assumed to be NaN)\n", 127 | " timeseries_df = timeseries_df[:-1]\n", 128 | "\n", 129 | " # Convert percentage strings to float if necessary\n", 130 | " if timeseries_df['pct_change'].dtype == 'object':\n", 131 | " timeseries_df['pct_change'] = timeseries_df['pct_change'].str.rstrip('%').astype('float') / 100.0\n", 132 | "\n", 133 | " # Fit ARIMA model\n", 134 | " model = ARIMA(timeseries_df['pct_change'].dropna(), order=(1, 1, 1))\n", 135 | " results = model.fit()\n", 136 | "\n", 137 | " # Predict the next day's percentage change\n", 138 | " forecast = results.forecast(steps=1)\n", 139 | " predicted_pct_change = forecast.values[0]\n", 140 | "\n", 141 | " print(f\"Predicted percentage change for next day: {predicted_pct_change:.6f}\")\n", 142 | "\n", 143 | "arima(btc)\n", 144 | "arima(eth)\n", 145 | "arima(xrp)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "## Prepare data for LLM" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 13, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "from io import StringIO\n", 162 | "\n", 163 | "def convert_to_csv_string(timeseries):\n", 164 | "\n", 165 | " timeseries = timeseries.reset_index()\n", 166 | "\n", 167 | " timeseries['pct_change'] = np.round(timeseries['pct_change'], 6)\n", 168 | "\n", 169 | " # Remove final row\n", 170 | " timeseries = timeseries.iloc[:-1]\n", 171 | "\n", 172 | " # Convert DataFrame to CSV string\n", 173 | " csv_buffer = StringIO()\n", 174 | " timeseries.to_csv(csv_buffer, index=False)\n", 175 | " csv_string = csv_buffer.getvalue()\n", 176 | "\n", 177 | " return csv_string\n", 178 | "\n", 179 | "\n", 180 | "btc_for_llm = convert_to_csv_string(btc)\n", 181 | "eth_for_llm = convert_to_csv_string(eth)\n", 182 | "xrp_for_llm = convert_to_csv_string(xrp)\n" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 14, 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "name": "stdout", 192 | "output_type": "stream", 193 | "text": [ 194 | "Date,pct_change\n", 195 | "2024-05-31,-0.012778\n", 196 | "2024-06-01,0.003193\n", 197 | "2024-06-02,0.00066\n", 198 | "2024-06-03,0.015545\n", 199 | "2024-06-04,0.025623\n", 200 | "2024-06-05,0.007299\n", 201 | "2024-06-06,-0.004581\n", 202 | "2024-06-07,-0.019992\n", 203 | "2024-06-08,-0.000531\n", 204 | "2024-06-09,0.004938\n", 205 | "2024-06-10,-0.001949\n", 206 | "2024-06-11,-0.031365\n", 207 | "2024-06-12,0.013503\n", 208 | "2024-06-13,-0.021758\n", 209 | "2024-06-14,-0.011165\n", 210 | "2024-06-15,0.002725\n", 211 | "2024-06-16,0.006769\n", 212 | "2024-06-17,-0.002232\n", 213 | "2024-06-18,-0.020297\n", 214 | "2024-06-19,-0.00277\n", 215 | "2024-06-20,-0.002026\n", 216 | "2024-06-21,-0.011298\n", 217 | "2024-06-22,0.00244\n", 218 | "2024-06-23,-0.016681\n", 219 | "2024-06-24,-0.045954\n", 220 | "2024-06-25,0.025337\n", 221 | "2024-06-26,-0.016073\n", 222 | "2024-06-27,0.013049\n", 223 | "\n" 224 | ] 225 | } 226 | ], 227 | "source": [ 228 | "print(btc_for_llm)" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "## Run LLM" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "Note: This requires having an active local Ollama server connection running and installing the llama3, mistral, and gemma3 models" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 15, 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "## Note: Change the date of the prediction/forecast for your own use (here, 2024-06-28)\n", 252 | "\n", 253 | "def predict_timeseries(timeseries):\n", 254 | " output = llm.invoke(f\"\"\"\n", 255 | " You are a large language model with time series forecasting capabilities.\n", 256 | " Predict the percent change for the day immediately after the end of the provided time series (2024-06-28).\n", 257 | " Use only your model capabilities, not any other method.\n", 258 | " The data is in the format of a csv file.\n", 259 | " The dataset includes:\n", 260 | " - Date\n", 261 | " - Percent change in the cryptocurrency from the previous day\n", 262 | " Provide only the forecasted percent change for 2024-06-28 as a point estimate. \n", 263 | " Do not include any other text or context, just the one value:\n", 264 | " {timeseries}\n", 265 | " \"\"\")\n", 266 | " return output.strip()\n" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 16, 272 | "metadata": {}, 273 | "outputs": [ 274 | { 275 | "name": "stdout", 276 | "output_type": "stream", 277 | "text": [ 278 | "0.008937 (rounded to four decimal places)\n", 279 | "0.0198 (rounded to two decimal places)\n", 280 | "0.00895 (rounded to four decimal places)\n" 281 | ] 282 | } 283 | ], 284 | "source": [ 285 | "llm = Ollama(model=\"mistral\", temperature=0)\n", 286 | "\n", 287 | "print(predict_timeseries(btc_for_llm))\n", 288 | "print(predict_timeseries(eth_for_llm))\n", 289 | "print(predict_timeseries(xrp_for_llm))" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 17, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "name": "stdout", 299 | "output_type": "stream", 300 | "text": [ 301 | "0.011345\n", 302 | "0.011345\n", 303 | "0.005211\n" 304 | ] 305 | } 306 | ], 307 | "source": [ 308 | "llm = Ollama(model=\"llama3\", temperature=0)\n", 309 | "\n", 310 | "print(predict_timeseries(btc_for_llm))\n", 311 | "print(predict_timeseries(eth_for_llm))\n", 312 | "print(predict_timeseries(xrp_for_llm))\n" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 18, 318 | "metadata": {}, 319 | "outputs": [ 320 | { 321 | "name": "stdout", 322 | "output_type": "stream", 323 | "text": [ 324 | "0.012778\n", 325 | "0.012345\n", 326 | "0.005678\n" 327 | ] 328 | } 329 | ], 330 | "source": [ 331 | "llm = Ollama(model=\"gemma2\", temperature=0)\n", 332 | "\n", 333 | "print(predict_timeseries(btc_for_llm))\n", 334 | "print(predict_timeseries(eth_for_llm))\n", 335 | "print(predict_timeseries(xrp_for_llm))\n" 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": {}, 341 | "source": [ 342 | "## Actual values for predicted day" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 19, 348 | "metadata": {}, 349 | "outputs": [ 350 | { 351 | "data": { 352 | "text/html": [ 353 | "
\n", 354 | "\n", 367 | "\n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | "
Datepct_change
292024-06-28-0.020853
\n", 383 | "
" 384 | ], 385 | "text/plain": [ 386 | " Date pct_change\n", 387 | "29 2024-06-28 -0.020853" 388 | ] 389 | }, 390 | "execution_count": 19, 391 | "metadata": {}, 392 | "output_type": "execute_result" 393 | } 394 | ], 395 | "source": [ 396 | "btc_final" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": 20, 402 | "metadata": {}, 403 | "outputs": [ 404 | { 405 | "data": { 406 | "text/html": [ 407 | "
\n", 408 | "\n", 421 | "\n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | "
Datepct_change
292024-06-28-0.020659
\n", 437 | "
" 438 | ], 439 | "text/plain": [ 440 | " Date pct_change\n", 441 | "29 2024-06-28 -0.020659" 442 | ] 443 | }, 444 | "execution_count": 20, 445 | "metadata": {}, 446 | "output_type": "execute_result" 447 | } 448 | ], 449 | "source": [ 450 | "eth_final" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": 21, 456 | "metadata": {}, 457 | "outputs": [ 458 | { 459 | "data": { 460 | "text/html": [ 461 | "
\n", 462 | "\n", 475 | "\n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | "
Datepct_change
292024-06-28-0.007943
\n", 491 | "
" 492 | ], 493 | "text/plain": [ 494 | " Date pct_change\n", 495 | "29 2024-06-28 -0.007943" 496 | ] 497 | }, 498 | "execution_count": 21, 499 | "metadata": {}, 500 | "output_type": "execute_result" 501 | } 502 | ], 503 | "source": [ 504 | "xrp_final" 505 | ] 506 | } 507 | ], 508 | "metadata": { 509 | "kernelspec": { 510 | "display_name": "general_env", 511 | "language": "python", 512 | "name": "python3" 513 | }, 514 | "language_info": { 515 | "codemirror_mode": { 516 | "name": "ipython", 517 | "version": 3 518 | }, 519 | "file_extension": ".py", 520 | "mimetype": "text/x-python", 521 | "name": "python", 522 | "nbconvert_exporter": "python", 523 | "pygments_lexer": "ipython3", 524 | "version": "3.1.-1" 525 | } 526 | }, 527 | "nbformat": 4, 528 | "nbformat_minor": 2 529 | } 530 | -------------------------------------------------------------------------------- /Pandas_Top_5_Tricks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Source: @DeepCharts Youtube Channel" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# Top 5 Pandas Tips and Tricks" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import pandas as pd" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "### 1. Merging with the Indicator Argument" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/html": [ 41 | "
\n", 42 | "\n", 55 | "\n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | "
keyvalue1value2_merge
0A1.0NaNleft_only
1B2.04.0both
2C3.05.0both
3DNaN6.0right_only
\n", 96 | "
" 97 | ], 98 | "text/plain": [ 99 | " key value1 value2 _merge\n", 100 | "0 A 1.0 NaN left_only\n", 101 | "1 B 2.0 4.0 both\n", 102 | "2 C 3.0 5.0 both\n", 103 | "3 D NaN 6.0 right_only" 104 | ] 105 | }, 106 | "execution_count": 3, 107 | "metadata": {}, 108 | "output_type": "execute_result" 109 | } 110 | ], 111 | "source": [ 112 | "df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value1': [1, 2, 3]})\n", 113 | "df2 = pd.DataFrame({'key': ['B', 'C', 'D'], 'value2': [4, 5, 6]})\n", 114 | "\n", 115 | "merged = pd.merge(df1, df2, on='key', how='outer', indicator=True)\n", 116 | "merged" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "### 2. Custom Chaining with pipe" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 5, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "text/html": [ 134 | "
\n", 135 | "\n", 148 | "\n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | "
QuantityPriceTotal
3202505000
1151502250
2102002000
\n", 178 | "
" 179 | ], 180 | "text/plain": [ 181 | " Quantity Price Total\n", 182 | "3 20 250 5000\n", 183 | "1 15 150 2250\n", 184 | "2 10 200 2000" 185 | ] 186 | }, 187 | "execution_count": 5, 188 | "metadata": {}, 189 | "output_type": "execute_result" 190 | } 191 | ], 192 | "source": [ 193 | "df = pd.DataFrame({\n", 194 | " 'Quantity': [10, 15, 10, 20],\n", 195 | " 'Price': [100, 150, 200, 250]\n", 196 | "})\n", 197 | "\n", 198 | "\n", 199 | "# Custom function to calculate Total\n", 200 | "def add_total(df):\n", 201 | " df['Total'] = df['Quantity'] * df['Price']\n", 202 | " return df\n", 203 | "\n", 204 | "# Method chaining with pipe\n", 205 | "result = (\n", 206 | " df\n", 207 | " .pipe(add_total)\n", 208 | " .query('Total > 1000')\n", 209 | " .sort_values('Total', ascending=False)\n", 210 | ")\n", 211 | "\n", 212 | "result" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "### 3. Window Functions (Moving Average and Cumulative Sum)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 7, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "data": { 229 | "text/html": [ 230 | "
\n", 231 | "\n", 244 | "\n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | "
DateSales3-Day MACumulative Sales
02023-01-01100NaN100.0
12023-01-02150NaN250.0
22023-01-03200150.0450.0
32023-01-04250200.0700.0
42023-01-05300250.01000.0
52023-01-06350300.01350.0
62023-01-07400350.01750.0
\n", 306 | "
" 307 | ], 308 | "text/plain": [ 309 | " Date Sales 3-Day MA Cumulative Sales\n", 310 | "0 2023-01-01 100 NaN 100.0\n", 311 | "1 2023-01-02 150 NaN 250.0\n", 312 | "2 2023-01-03 200 150.0 450.0\n", 313 | "3 2023-01-04 250 200.0 700.0\n", 314 | "4 2023-01-05 300 250.0 1000.0\n", 315 | "5 2023-01-06 350 300.0 1350.0\n", 316 | "6 2023-01-07 400 350.0 1750.0" 317 | ] 318 | }, 319 | "execution_count": 7, 320 | "metadata": {}, 321 | "output_type": "execute_result" 322 | } 323 | ], 324 | "source": [ 325 | "# Sample DataFrame\n", 326 | "df = pd.DataFrame({\n", 327 | " 'Date': pd.date_range('2023-01-01', periods=7),\n", 328 | " 'Sales': [100, 150, 200, 250, 300, 350, 400]\n", 329 | "})\n", 330 | "\n", 331 | "# Rolling average\n", 332 | "df['3-Day MA'] = df['Sales'].rolling(window=3).mean()\n", 333 | "\n", 334 | "# Cumulative sum\n", 335 | "df['Cumulative Sales'] = df['Sales'].expanding().sum()\n", 336 | "\n", 337 | "df" 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": {}, 343 | "source": [ 344 | "### 4. Identify Duplicates and Drop Duplicates" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 9, 350 | "metadata": {}, 351 | "outputs": [ 352 | { 353 | "data": { 354 | "text/html": [ 355 | "
\n", 356 | "\n", 369 | "\n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | "
IDName
01Alice
12Bob
22Bob
33Charlie
44David
54David
\n", 410 | "
" 411 | ], 412 | "text/plain": [ 413 | " ID Name\n", 414 | "0 1 Alice\n", 415 | "1 2 Bob\n", 416 | "2 2 Bob\n", 417 | "3 3 Charlie\n", 418 | "4 4 David\n", 419 | "5 4 David" 420 | ] 421 | }, 422 | "execution_count": 9, 423 | "metadata": {}, 424 | "output_type": "execute_result" 425 | } 426 | ], 427 | "source": [ 428 | "# Sample DataFrame with duplicates\n", 429 | "df = pd.DataFrame({\n", 430 | " 'ID': [1, 2, 2, 3, 4, 4],\n", 431 | " 'Name': ['Alice', 'Bob', 'Bob', 'Charlie', 'David', 'David']\n", 432 | "})\n", 433 | "df" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 11, 439 | "metadata": {}, 440 | "outputs": [ 441 | { 442 | "data": { 443 | "text/html": [ 444 | "
\n", 445 | "\n", 458 | "\n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | "
IDName
12Bob
22Bob
44David
54David
\n", 489 | "
" 490 | ], 491 | "text/plain": [ 492 | " ID Name\n", 493 | "1 2 Bob\n", 494 | "2 2 Bob\n", 495 | "4 4 David\n", 496 | "5 4 David" 497 | ] 498 | }, 499 | "execution_count": 11, 500 | "metadata": {}, 501 | "output_type": "execute_result" 502 | } 503 | ], 504 | "source": [ 505 | "# Identify duplicates\n", 506 | "duplicates = df[df.duplicated(subset='ID', keep=False)]\n", 507 | "duplicates\n" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": 13, 513 | "metadata": {}, 514 | "outputs": [ 515 | { 516 | "data": { 517 | "text/html": [ 518 | "
\n", 519 | "\n", 532 | "\n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | "
IDName
01Alice
12Bob
33Charlie
44David
\n", 563 | "
" 564 | ], 565 | "text/plain": [ 566 | " ID Name\n", 567 | "0 1 Alice\n", 568 | "1 2 Bob\n", 569 | "3 3 Charlie\n", 570 | "4 4 David" 571 | ] 572 | }, 573 | "execution_count": 13, 574 | "metadata": {}, 575 | "output_type": "execute_result" 576 | } 577 | ], 578 | "source": [ 579 | "# Remove duplicates, keep first\n", 580 | "df_cleaned = df.drop_duplicates(subset='ID')\n", 581 | "df_cleaned" 582 | ] 583 | }, 584 | { 585 | "cell_type": "markdown", 586 | "metadata": {}, 587 | "source": [ 588 | "### 5. Binning Data with cut and qcut" 589 | ] 590 | }, 591 | { 592 | "cell_type": "code", 593 | "execution_count": 15, 594 | "metadata": {}, 595 | "outputs": [ 596 | { 597 | "data": { 598 | "text/html": [ 599 | "
\n", 600 | "\n", 613 | "\n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | "
AgeIncome
02225000
12527000
22930000
33432000
44540000
55250000
66160000
77070000
88080000
99090000
\n", 674 | "
" 675 | ], 676 | "text/plain": [ 677 | " Age Income\n", 678 | "0 22 25000\n", 679 | "1 25 27000\n", 680 | "2 29 30000\n", 681 | "3 34 32000\n", 682 | "4 45 40000\n", 683 | "5 52 50000\n", 684 | "6 61 60000\n", 685 | "7 70 70000\n", 686 | "8 80 80000\n", 687 | "9 90 90000" 688 | ] 689 | }, 690 | "execution_count": 15, 691 | "metadata": {}, 692 | "output_type": "execute_result" 693 | } 694 | ], 695 | "source": [ 696 | "# Sample data\n", 697 | "data = {\n", 698 | " 'Age': [22, 25, 29, 34, 45, 52, 61, 70, 80, 90],\n", 699 | " 'Income': [25000, 27000, 30000, 32000, 40000, 50000, 60000, 70000, 80000, 90000]\n", 700 | "}\n", 701 | "\n", 702 | "df = pd.DataFrame(data)\n", 703 | "df" 704 | ] 705 | }, 706 | { 707 | "cell_type": "code", 708 | "execution_count": 17, 709 | "metadata": {}, 710 | "outputs": [ 711 | { 712 | "data": { 713 | "text/html": [ 714 | "
\n", 715 | "\n", 728 | "\n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | "
AgeIncomeAge Group
02225000Young Adult
12527000Young Adult
22930000Young Adult
33432000Young Adult
44540000Adult
55250000Adult
66160000Senior
77070000Senior
88080000Senior
99090000Senior
\n", 800 | "
" 801 | ], 802 | "text/plain": [ 803 | " Age Income Age Group\n", 804 | "0 22 25000 Young Adult\n", 805 | "1 25 27000 Young Adult\n", 806 | "2 29 30000 Young Adult\n", 807 | "3 34 32000 Young Adult\n", 808 | "4 45 40000 Adult\n", 809 | "5 52 50000 Adult\n", 810 | "6 61 60000 Senior\n", 811 | "7 70 70000 Senior\n", 812 | "8 80 80000 Senior\n", 813 | "9 90 90000 Senior" 814 | ] 815 | }, 816 | "execution_count": 17, 817 | "metadata": {}, 818 | "output_type": "execute_result" 819 | } 820 | ], 821 | "source": [ 822 | "# Equal-width binning for Age\n", 823 | "age_bins = [0, 18, 35, 60, 100]\n", 824 | "age_labels = ['Child', 'Young Adult', 'Adult', 'Senior']\n", 825 | "df['Age Group'] = pd.cut(df['Age'], bins=age_bins, labels=age_labels)\n", 826 | "df" 827 | ] 828 | }, 829 | { 830 | "cell_type": "code", 831 | "execution_count": 19, 832 | "metadata": {}, 833 | "outputs": [ 834 | { 835 | "data": { 836 | "text/html": [ 837 | "
\n", 838 | "\n", 851 | "\n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | "
AgeIncomeAge GroupIncome Quartile
02225000Young AdultQ1
12527000Young AdultQ1
22930000Young AdultQ1
33432000Young AdultQ2
44540000AdultQ2
55250000AdultQ3
66160000SeniorQ3
77070000SeniorQ4
88080000SeniorQ4
99090000SeniorQ4
\n", 934 | "
" 935 | ], 936 | "text/plain": [ 937 | " Age Income Age Group Income Quartile\n", 938 | "0 22 25000 Young Adult Q1\n", 939 | "1 25 27000 Young Adult Q1\n", 940 | "2 29 30000 Young Adult Q1\n", 941 | "3 34 32000 Young Adult Q2\n", 942 | "4 45 40000 Adult Q2\n", 943 | "5 52 50000 Adult Q3\n", 944 | "6 61 60000 Senior Q3\n", 945 | "7 70 70000 Senior Q4\n", 946 | "8 80 80000 Senior Q4\n", 947 | "9 90 90000 Senior Q4" 948 | ] 949 | }, 950 | "execution_count": 19, 951 | "metadata": {}, 952 | "output_type": "execute_result" 953 | } 954 | ], 955 | "source": [ 956 | "# Quantile-based binning for Income\n", 957 | "df['Income Quartile'] = pd.qcut(df['Income'], 4, labels=['Q1', 'Q2', 'Q3', 'Q4'])\n", 958 | "df" 959 | ] 960 | }, 961 | { 962 | "cell_type": "markdown", 963 | "metadata": {}, 964 | "source": [ 965 | "### BONUS. Interpolating Data" 966 | ] 967 | }, 968 | { 969 | "cell_type": "code", 970 | "execution_count": 21, 971 | "metadata": {}, 972 | "outputs": [ 973 | { 974 | "data": { 975 | "text/html": [ 976 | "
\n", 977 | "\n", 990 | "\n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | "
TimeValue
02020-01-011.0
12020-01-02NaN
22020-01-03NaN
32020-01-044.0
42020-01-055.0
\n", 1026 | "
" 1027 | ], 1028 | "text/plain": [ 1029 | " Time Value\n", 1030 | "0 2020-01-01 1.0\n", 1031 | "1 2020-01-02 NaN\n", 1032 | "2 2020-01-03 NaN\n", 1033 | "3 2020-01-04 4.0\n", 1034 | "4 2020-01-05 5.0" 1035 | ] 1036 | }, 1037 | "execution_count": 21, 1038 | "metadata": {}, 1039 | "output_type": "execute_result" 1040 | } 1041 | ], 1042 | "source": [ 1043 | "import numpy as np\n", 1044 | "\n", 1045 | "df = pd.DataFrame({'Time': pd.date_range(start='1/1/2020', periods=5, freq='D'),\n", 1046 | " 'Value': [1, np.nan, np.nan, 4, 5]})\n", 1047 | "df" 1048 | ] 1049 | }, 1050 | { 1051 | "cell_type": "code", 1052 | "execution_count": 23, 1053 | "metadata": {}, 1054 | "outputs": [ 1055 | { 1056 | "data": { 1057 | "text/html": [ 1058 | "
\n", 1059 | "\n", 1072 | "\n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | "
TimeValueInterpolated
02020-01-011.01.0
12020-01-02NaN2.0
22020-01-03NaN3.0
32020-01-044.04.0
42020-01-055.05.0
\n", 1114 | "
" 1115 | ], 1116 | "text/plain": [ 1117 | " Time Value Interpolated\n", 1118 | "0 2020-01-01 1.0 1.0\n", 1119 | "1 2020-01-02 NaN 2.0\n", 1120 | "2 2020-01-03 NaN 3.0\n", 1121 | "3 2020-01-04 4.0 4.0\n", 1122 | "4 2020-01-05 5.0 5.0" 1123 | ] 1124 | }, 1125 | "execution_count": 23, 1126 | "metadata": {}, 1127 | "output_type": "execute_result" 1128 | } 1129 | ], 1130 | "source": [ 1131 | "df['Interpolated'] = df['Value'].interpolate(method='linear')\n", 1132 | "df" 1133 | ] 1134 | } 1135 | ], 1136 | "metadata": { 1137 | "kernelspec": { 1138 | "display_name": "general_env", 1139 | "language": "python", 1140 | "name": "python3" 1141 | }, 1142 | "language_info": { 1143 | "codemirror_mode": { 1144 | "name": "ipython", 1145 | "version": 3 1146 | }, 1147 | "file_extension": ".py", 1148 | "mimetype": "text/x-python", 1149 | "name": "python", 1150 | "nbconvert_exporter": "python", 1151 | "pygments_lexer": "ipython3", 1152 | "version": "3.12.3" 1153 | } 1154 | }, 1155 | "nbformat": 4, 1156 | "nbformat_minor": 2 1157 | } 1158 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # projects -------------------------------------------------------------------------------- /ai_coding_agent_tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Coding Agents with smolagents and Gemini Flash" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Setup" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "### Libraries" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMModel\n", 31 | "import os" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "### Gemini API Key" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 1, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "os.environ[\"GEMINI_API_KEY\"] = \"API Key Goes Here\"" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "# Gemini\n", 57 | "model = LiteLLMModel(model_id=\"gemini/gemini-1.5-flash\")" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 14, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "# Ollama (Llama3.2)\n", 67 | "# model = LiteLLMModel(model_id=\"ollama/llama3.2\")\n" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "## Agent Creation" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "# Define the Feature Selection Agent\n", 84 | "feature_selection_agent = CodeAgent(\n", 85 | " tools=[DuckDuckGoSearchTool], # search internet if necessary\n", 86 | " additional_authorized_imports=['pandas','statsmodels','sklearn','numpy','json'], # packages for code interpreter\n", 87 | " model=model # model set above\n", 88 | ")\n" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "### Set Task Prompt" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 6, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "# Task for the agent\n", 105 | "task = \"\"\"\n", 106 | "1. Load the Diabetes dataset from the 'sklearn' library using the following code:\n", 107 | " from sklearn.datasets import load_diabetes\n", 108 | " import pandas as pd\n", 109 | "\n", 110 | " # Load the dataset\n", 111 | " data, target = load_diabetes(return_X_y=True, as_frame=False)\n", 112 | "\n", 113 | " # Create a DataFrame\n", 114 | " df = pd.DataFrame(data, columns=load_diabetes().feature_names)\n", 115 | " df['target'] = target\n", 116 | "2. Split data with a train/test split of 75%/25%\n", 117 | "3. Create a linear regression model on the training data predicting the target variable using the \"sklearn\" or \"statsmodels\" library.\n", 118 | "4. Execute on a strategy of combination of up to 3 predictors that attains the lowest root mean square error (RMSE) on the testing data. \n", 119 | " (You can't use the target variable).\n", 120 | "5. Use feature engineering as needed to improve model performance.\n", 121 | "6. Based on the lowest RMSE of each model for the testing data, provide a final list of predictors for the top 5 models\n", 122 | "7. Output as a table\n", 123 | "\"\"\"" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "## Execute the agent and task" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "result = feature_selection_agent.run(task)" 140 | ] 141 | } 142 | ], 143 | "metadata": { 144 | "kernelspec": { 145 | "display_name": "smol_env", 146 | "language": "python", 147 | "name": "python3" 148 | }, 149 | "language_info": { 150 | "codemirror_mode": { 151 | "name": "ipython", 152 | "version": 3 153 | }, 154 | "file_extension": ".py", 155 | "mimetype": "text/x-python", 156 | "name": "python", 157 | "nbconvert_exporter": "python", 158 | "pygments_lexer": "ipython3", 159 | "version": "3.12.8" 160 | } 161 | }, 162 | "nbformat": 4, 163 | "nbformat_minor": 2 164 | } 165 | -------------------------------------------------------------------------------- /ai_image_generator.py: -------------------------------------------------------------------------------- 1 | # Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts) 2 | 3 | # PART 1: LIBRARY IMPORTS 4 | 5 | import streamlit as st 6 | import replicate 7 | import os 8 | import requests 9 | from PIL import Image 10 | from io import BytesIO 11 | 12 | 13 | # PART 2: SETUP REPLICATE CREDENTIALS AND AUTHENTICATION 14 | 15 | # Set up your Replicate API key (optionally from environment variable) 16 | REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN") # You can store your API key in an environment variable 17 | 18 | if REPLICATE_API_TOKEN is None: 19 | st.error("Replicate API token not found. Please set it in your environment.") 20 | st.stop() 21 | 22 | # Authenticate with Replicate using the API token 23 | replicate.Client(api_token=REPLICATE_API_TOKEN) 24 | 25 | 26 | # PART 3: STREAMLIT WEBAPP 27 | 28 | # Initialize session state for storing the generated image URL 29 | if 'image_url' not in st.session_state: 30 | st.session_state['image_url'] = None 31 | 32 | 33 | # PART 3A: SIDEBAR OPTIONS 34 | 35 | # Sidebar inputs 36 | with st.sidebar: 37 | 38 | # Title of the app 39 | st.title('AI Image Generation: Flux Schnell') 40 | 41 | st.header("Prompt and Options") 42 | 43 | # Input box for the user to type the prompt (using text_area for multiline input) 44 | prompt = st.text_area('Enter a prompt to generate an image', height=50) 45 | 46 | # Checkbox to enable or disable random seed 47 | use_random_seed = st.checkbox('Use Random Seed', value=True) 48 | 49 | # Slider for random seed (only if the checkbox is checked) 50 | if use_random_seed: 51 | random_seed = st.slider('Random Seed', 0, 1000, 435) 52 | else: 53 | random_seed = None 54 | 55 | # Slider for output quality 56 | output_quality = st.slider('Output Quality', 50, 100, 80) 57 | 58 | # Create two columns for Generate and Download buttons 59 | col1, col2 = st.columns([1, 1]) 60 | 61 | # Button to submit the prompt and generate image 62 | generate_button = col1.button('Generate Image') 63 | 64 | 65 | # PART 4A: MAIN CONTENT AREA (IMAGE GENERATION AND ACCESS) 66 | 67 | # Check if the button was pressed and if there is a prompt 68 | if generate_button and prompt: 69 | with st.spinner('Generating image...'): 70 | try: 71 | # Call the Flux Schnell model on Replicate 72 | input_data = { 73 | "prompt": prompt, 74 | "aspect_ratio": '3:2', # Set the aspect ratio 75 | "quality": output_quality # Set the output quality 76 | } 77 | 78 | # Add random seed only if it's enabled 79 | if random_seed is not None: 80 | input_data["seed"] = random_seed 81 | 82 | # Use replicate.run to invoke the model 83 | output = replicate.run( 84 | "black-forest-labs/flux-schnell", # Model name 85 | input=input_data # Input to the model 86 | ) 87 | 88 | # Store the generated image URL in session state 89 | st.session_state['image_url'] = output[0] # Assuming the image is the first element in output 90 | 91 | except Exception as e: 92 | st.error(f"An error occurred: {e}") 93 | 94 | # If an image URL is present in session state, display the image and download button 95 | if st.session_state['image_url']: 96 | # Display the image 97 | st.image(st.session_state['image_url'], caption='Generated Image') 98 | 99 | # Download the image from the URL 100 | response = requests.get(st.session_state['image_url']) 101 | image = Image.open(BytesIO(response.content)) 102 | 103 | # Convert the image to a binary stream and save it as .jpg 104 | img_buffer = BytesIO() 105 | image.save(img_buffer, format="JPEG") 106 | img_buffer.seek(0) 107 | 108 | # Display the download button in the second column 109 | with col2: 110 | st.download_button( 111 | label="Download Image", 112 | data=img_buffer, 113 | file_name="generated_image.jpg", 114 | mime="image/jpeg" 115 | ) -------------------------------------------------------------------------------- /ai_sentiment_analysis_gemini.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# AI Pipeline: Blue Sky Scraper + Gemini Flash Sentiment Analysis" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Libraries" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import requests\n", 24 | "import pandas as pd\n", 25 | "import google.generativeai as genai\n", 26 | "import enum\n", 27 | "from typing_extensions import TypedDict\n", 28 | "import json\n", 29 | "import plotly.express as px" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "## 1. Configuration" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "### Authentication and API Keys" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 2, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "# Replace with your Bluesky handle and password\n", 53 | "BLUESKY_HANDLE = 'handle goes here'\n", 54 | "BLUESKY_PASSWORD = 'password goes here'\n", 55 | "\n", 56 | "# Replace with your Google AI Studio API key\n", 57 | "genai.configure(api_key='api key goes here')" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "### Gemini Model" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 3, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "model = genai.GenerativeModel(\"gemini-1.5-flash\") # gemini-2.0-flash-exp" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "### Stock (or keyword to analyze)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 4, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "search_term = 'ADBE'" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "### Number of posts to return" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "n = 100 # Number of latest posts to retrieve" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "## 2. Blue Sky Web Scraper" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 6, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "# Authenticate and obtain access token\n", 122 | "auth_response = requests.post(\n", 123 | " 'https://bsky.social/xrpc/com.atproto.server.createSession',\n", 124 | " json={'identifier': BLUESKY_HANDLE, 'password': BLUESKY_PASSWORD}\n", 125 | ")\n", 126 | "auth_response.raise_for_status()\n", 127 | "access_token = auth_response.json().get('accessJwt')\n" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 7, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "# Set up the request headers with the access token\n", 137 | "headers = {'Authorization': f'Bearer {access_token}'}\n", 138 | "\n", 139 | "# Define the search parameters\n", 140 | "params = {\n", 141 | " 'q': search_term,\n", 142 | " 'sort': 'latest',\n", 143 | " 'limit': n\n", 144 | "}\n", 145 | "\n", 146 | "# Perform the search request\n", 147 | "search_response = requests.get(\n", 148 | " 'https://bsky.social/xrpc/app.bsky.feed.searchPosts',\n", 149 | " headers=headers,\n", 150 | " params=params\n", 151 | ")\n", 152 | "search_response.raise_for_status()\n", 153 | "posts = search_response.json().get('posts', [])" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 8, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "# Extract data and create a list of dictionaries\n", 163 | "data = []\n", 164 | "for post in posts:\n", 165 | " author = post.get('author', {}).get('handle', 'Unknown')\n", 166 | " content = post.get('record', {}).get('text', 'No content')\n", 167 | " created_at = post.get('record', {}).get('createdAt', 'Unknown date')\n", 168 | " data.append({'Date': created_at, 'Content': content, 'Author': author})\n", 169 | "\n", 170 | "# Convert list of dictionaries to DataFrame\n", 171 | "df = pd.DataFrame(data)\n", 172 | "\n", 173 | "# Convert 'Date' column to datetime format for better handling\n", 174 | "df['Date'] = pd.to_datetime(df['Date'], errors='coerce')" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 9, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "text/html": [ 185 | "
\n", 186 | "\n", 199 | "\n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | "
DateContent
02024-12-31 19:10:07.207000+00:00\\n#MarjorieTaylorGreene Went Christmas Shoppin...
12024-12-31 17:39:40.015000+00:00Over the past year #AJB and #ADBE swapped from...
22024-12-31 17:39:40.014000+00:00The major changes to the port. over the year w...
32024-12-31 16:03:55.965884+00:00📊 ADBE Market Analysis - Dec 31, 2024\\n\\nCurre...
42024-12-30 16:01:16.570000+00:00Adobe knows that #DEI is good for people, good...
.........
932024-12-12 17:19:22.999000+00:00Hello, Investors! 👋\\nStocks were down modestly...
942024-12-12 16:53:12.278000+00:00Adobe posts record-breaking revenue 📈 but inve...
952024-12-12 16:20:44.597227+00:00$ADBE Technical Analysis | Dec 12\\nPrice: $549...
962024-12-12 15:49:51.063000+00:00$ADBE: Adobe shares dropped 13% as its 2025 ou...
972024-12-12 15:38:39.435000+00:00$ADBE FY-2024: Strong Q3 Perf: Rev Surpasses E...
\n", 265 | "

98 rows × 2 columns

\n", 266 | "
" 267 | ], 268 | "text/plain": [ 269 | " Date \\\n", 270 | "0 2024-12-31 19:10:07.207000+00:00 \n", 271 | "1 2024-12-31 17:39:40.015000+00:00 \n", 272 | "2 2024-12-31 17:39:40.014000+00:00 \n", 273 | "3 2024-12-31 16:03:55.965884+00:00 \n", 274 | "4 2024-12-30 16:01:16.570000+00:00 \n", 275 | ".. ... \n", 276 | "93 2024-12-12 17:19:22.999000+00:00 \n", 277 | "94 2024-12-12 16:53:12.278000+00:00 \n", 278 | "95 2024-12-12 16:20:44.597227+00:00 \n", 279 | "96 2024-12-12 15:49:51.063000+00:00 \n", 280 | "97 2024-12-12 15:38:39.435000+00:00 \n", 281 | "\n", 282 | " Content \n", 283 | "0 \\n#MarjorieTaylorGreene Went Christmas Shoppin... \n", 284 | "1 Over the past year #AJB and #ADBE swapped from... \n", 285 | "2 The major changes to the port. over the year w... \n", 286 | "3 📊 ADBE Market Analysis - Dec 31, 2024\\n\\nCurre... \n", 287 | "4 Adobe knows that #DEI is good for people, good... \n", 288 | ".. ... \n", 289 | "93 Hello, Investors! 👋\\nStocks were down modestly... \n", 290 | "94 Adobe posts record-breaking revenue 📈 but inve... \n", 291 | "95 $ADBE Technical Analysis | Dec 12\\nPrice: $549... \n", 292 | "96 $ADBE: Adobe shares dropped 13% as its 2025 ou... \n", 293 | "97 $ADBE FY-2024: Strong Q3 Perf: Rev Surpasses E... \n", 294 | "\n", 295 | "[98 rows x 2 columns]" 296 | ] 297 | }, 298 | "execution_count": 9, 299 | "metadata": {}, 300 | "output_type": "execute_result" 301 | } 302 | ], 303 | "source": [ 304 | "# Display the DataFrame\n", 305 | "df[['Date','Content']]" 306 | ] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "metadata": {}, 311 | "source": [ 312 | "## 3. Google Gemini Sentiment Analysis" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 10, 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [ 321 | "class Sentiment(enum.Enum):\n", 322 | " POSITIVE = \"positive\"\n", 323 | " NEGATIVE = \"negative\"\n", 324 | " NEUTRAL = \"neutral\"\n", 325 | "\n", 326 | "class AnalysisResult(TypedDict):\n", 327 | " is_stock_related: bool\n", 328 | " sentiment: Sentiment\n" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 11, 334 | "metadata": {}, 335 | "outputs": [], 336 | "source": [ 337 | "\n", 338 | "def analyze_post(content: str) -> AnalysisResult:\n", 339 | " prompt = f\"\"\"\n", 340 | " Analyze the following post and determine:\n", 341 | " 1. Whether it is related to the company, {search_term}, and relates to or discusses \n", 342 | " past, current, or future stock performance of {search_term} explicitly.\n", 343 | " 2. If related, classify the sentiment as positive, negative, or neutral.\n", 344 | "\n", 345 | " Post: \"{content}\"\n", 346 | " \"\"\"\n", 347 | " response = model.generate_content(\n", 348 | " prompt,\n", 349 | " generation_config=genai.GenerationConfig(\n", 350 | " response_mime_type=\"application/json\",\n", 351 | " response_schema=AnalysisResult\n", 352 | " )\n", 353 | " )\n", 354 | " if response.candidates:\n", 355 | " candidate_content = response.candidates[0].content\n", 356 | " result_text = ''.join(part.text for part in candidate_content.parts)\n", 357 | " try:\n", 358 | " result = json.loads(result_text)\n", 359 | " is_stock_related = result.get('is_stock_related')\n", 360 | " sentiment = result.get('sentiment')\n", 361 | " if is_stock_related is not None and sentiment is not None:\n", 362 | " return is_stock_related, sentiment\n", 363 | " else:\n", 364 | " print(\"Missing expected keys in the response\")\n", 365 | " return None, None\n", 366 | " except json.JSONDecodeError:\n", 367 | " print(\"Failed to decode JSON response\")\n", 368 | " return None, None\n", 369 | " else:\n", 370 | " print(\"No candidates returned\")\n", 371 | " return None, None\n" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 12, 377 | "metadata": {}, 378 | "outputs": [ 379 | { 380 | "name": "stdout", 381 | "output_type": "stream", 382 | "text": [ 383 | "Missing expected keys in the response\n", 384 | "Missing expected keys in the response\n" 385 | ] 386 | } 387 | ], 388 | "source": [ 389 | "# Apply the analysis to each post\n", 390 | "df[['is_stock_related', 'sentiment']] = df['Content'].apply(\n", 391 | " lambda x: pd.Series(analyze_post(x))\n", 392 | ")" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 13, 398 | "metadata": {}, 399 | "outputs": [ 400 | { 401 | "data": { 402 | "text/html": [ 403 | "
\n", 404 | "\n", 417 | "\n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | "
DateContentis_stock_relatedsentiment
02024-12-31 19:10:07.207000+00:00\\n#MarjorieTaylorGreene Went Christmas Shoppin...Trueneutral
12024-12-31 17:39:40.015000+00:00Over the past year #AJB and #ADBE swapped from...Trueneutral
22024-12-31 17:39:40.014000+00:00The major changes to the port. over the year w...Truenegative
32024-12-31 16:03:55.965884+00:00📊 ADBE Market Analysis - Dec 31, 2024\\n\\nCurre...Truenegative
42024-12-30 16:01:16.570000+00:00Adobe knows that #DEI is good for people, good...Truepositive
...............
932024-12-12 17:19:22.999000+00:00Hello, Investors! 👋\\nStocks were down modestly...Truenegative
942024-12-12 16:53:12.278000+00:00Adobe posts record-breaking revenue 📈 but inve...Truenegative
952024-12-12 16:20:44.597227+00:00$ADBE Technical Analysis | Dec 12\\nPrice: $549...Truepositive
962024-12-12 15:49:51.063000+00:00$ADBE: Adobe shares dropped 13% as its 2025 ou...Truenegative
972024-12-12 15:38:39.435000+00:00$ADBE FY-2024: Strong Q3 Perf: Rev Surpasses E...Truepositive
\n", 507 | "

98 rows × 4 columns

\n", 508 | "
" 509 | ], 510 | "text/plain": [ 511 | " Date \\\n", 512 | "0 2024-12-31 19:10:07.207000+00:00 \n", 513 | "1 2024-12-31 17:39:40.015000+00:00 \n", 514 | "2 2024-12-31 17:39:40.014000+00:00 \n", 515 | "3 2024-12-31 16:03:55.965884+00:00 \n", 516 | "4 2024-12-30 16:01:16.570000+00:00 \n", 517 | ".. ... \n", 518 | "93 2024-12-12 17:19:22.999000+00:00 \n", 519 | "94 2024-12-12 16:53:12.278000+00:00 \n", 520 | "95 2024-12-12 16:20:44.597227+00:00 \n", 521 | "96 2024-12-12 15:49:51.063000+00:00 \n", 522 | "97 2024-12-12 15:38:39.435000+00:00 \n", 523 | "\n", 524 | " Content is_stock_related \\\n", 525 | "0 \\n#MarjorieTaylorGreene Went Christmas Shoppin... True \n", 526 | "1 Over the past year #AJB and #ADBE swapped from... True \n", 527 | "2 The major changes to the port. over the year w... True \n", 528 | "3 📊 ADBE Market Analysis - Dec 31, 2024\\n\\nCurre... True \n", 529 | "4 Adobe knows that #DEI is good for people, good... True \n", 530 | ".. ... ... \n", 531 | "93 Hello, Investors! 👋\\nStocks were down modestly... True \n", 532 | "94 Adobe posts record-breaking revenue 📈 but inve... True \n", 533 | "95 $ADBE Technical Analysis | Dec 12\\nPrice: $549... True \n", 534 | "96 $ADBE: Adobe shares dropped 13% as its 2025 ou... True \n", 535 | "97 $ADBE FY-2024: Strong Q3 Perf: Rev Surpasses E... True \n", 536 | "\n", 537 | " sentiment \n", 538 | "0 neutral \n", 539 | "1 neutral \n", 540 | "2 negative \n", 541 | "3 negative \n", 542 | "4 positive \n", 543 | ".. ... \n", 544 | "93 negative \n", 545 | "94 negative \n", 546 | "95 positive \n", 547 | "96 negative \n", 548 | "97 positive \n", 549 | "\n", 550 | "[98 rows x 4 columns]" 551 | ] 552 | }, 553 | "execution_count": 13, 554 | "metadata": {}, 555 | "output_type": "execute_result" 556 | } 557 | ], 558 | "source": [ 559 | "df.drop(columns='Author',inplace=True)\n", 560 | "df" 561 | ] 562 | }, 563 | { 564 | "cell_type": "code", 565 | "execution_count": 15, 566 | "metadata": {}, 567 | "outputs": [ 568 | { 569 | "name": "stderr", 570 | "output_type": "stream", 571 | "text": [ 572 | "/var/folders/f0/lv4rn9cj3773mrlxlb1vmf380000gn/T/ipykernel_57740/2799276885.py:5: SettingWithCopyWarning:\n", 573 | "\n", 574 | "\n", 575 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 576 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 577 | "\n", 578 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 579 | "\n" 580 | ] 581 | }, 582 | { 583 | "data": { 584 | "application/vnd.plotly.v1+json": { 585 | "config": { 586 | "plotlyServerURL": "https://plot.ly" 587 | }, 588 | "data": [ 589 | { 590 | "hovertemplate": "Date=%{x}
Positive Sentiment Score=%{y}", 591 | "legendgroup": "", 592 | "line": { 593 | "color": "#636efa", 594 | "dash": "solid" 595 | }, 596 | "marker": { 597 | "symbol": "circle" 598 | }, 599 | "mode": "lines+markers", 600 | "name": "", 601 | "orientation": "v", 602 | "showlegend": false, 603 | "type": "scatter", 604 | "x": [ 605 | "2024-12-12", 606 | "2024-12-13", 607 | "2024-12-14", 608 | "2024-12-15", 609 | "2024-12-16", 610 | "2024-12-17", 611 | "2024-12-18", 612 | "2024-12-19", 613 | "2024-12-20", 614 | "2024-12-22", 615 | "2024-12-23", 616 | "2024-12-24", 617 | "2024-12-26", 618 | "2024-12-27", 619 | "2024-12-30", 620 | "2024-12-31" 621 | ], 622 | "xaxis": "x", 623 | "y": [ 624 | 0.18181818181818182, 625 | 0.18181818181818182, 626 | 0, 627 | 0.5, 628 | 0.25, 629 | 0, 630 | 0, 631 | 0.3333333333333333, 632 | 0.75, 633 | 0.6666666666666666, 634 | 1, 635 | 0.5, 636 | 0.5, 637 | 1, 638 | 1, 639 | 0 640 | ], 641 | "yaxis": "y" 642 | } 643 | ], 644 | "layout": { 645 | "legend": { 646 | "tracegroupgap": 0 647 | }, 648 | "template": { 649 | "data": { 650 | "bar": [ 651 | { 652 | "error_x": { 653 | "color": "#2a3f5f" 654 | }, 655 | "error_y": { 656 | "color": "#2a3f5f" 657 | }, 658 | "marker": { 659 | "line": { 660 | "color": "#E5ECF6", 661 | "width": 0.5 662 | }, 663 | "pattern": { 664 | "fillmode": "overlay", 665 | "size": 10, 666 | "solidity": 0.2 667 | } 668 | }, 669 | "type": "bar" 670 | } 671 | ], 672 | "barpolar": [ 673 | { 674 | "marker": { 675 | "line": { 676 | "color": "#E5ECF6", 677 | "width": 0.5 678 | }, 679 | "pattern": { 680 | "fillmode": "overlay", 681 | "size": 10, 682 | "solidity": 0.2 683 | } 684 | }, 685 | "type": "barpolar" 686 | } 687 | ], 688 | "carpet": [ 689 | { 690 | "aaxis": { 691 | "endlinecolor": "#2a3f5f", 692 | "gridcolor": "white", 693 | "linecolor": "white", 694 | "minorgridcolor": "white", 695 | "startlinecolor": "#2a3f5f" 696 | }, 697 | "baxis": { 698 | "endlinecolor": "#2a3f5f", 699 | "gridcolor": "white", 700 | "linecolor": "white", 701 | "minorgridcolor": "white", 702 | "startlinecolor": "#2a3f5f" 703 | }, 704 | "type": "carpet" 705 | } 706 | ], 707 | "choropleth": [ 708 | { 709 | "colorbar": { 710 | "outlinewidth": 0, 711 | "ticks": "" 712 | }, 713 | "type": "choropleth" 714 | } 715 | ], 716 | "contour": [ 717 | { 718 | "colorbar": { 719 | "outlinewidth": 0, 720 | "ticks": "" 721 | }, 722 | "colorscale": [ 723 | [ 724 | 0, 725 | "#0d0887" 726 | ], 727 | [ 728 | 0.1111111111111111, 729 | "#46039f" 730 | ], 731 | [ 732 | 0.2222222222222222, 733 | "#7201a8" 734 | ], 735 | [ 736 | 0.3333333333333333, 737 | "#9c179e" 738 | ], 739 | [ 740 | 0.4444444444444444, 741 | "#bd3786" 742 | ], 743 | [ 744 | 0.5555555555555556, 745 | "#d8576b" 746 | ], 747 | [ 748 | 0.6666666666666666, 749 | "#ed7953" 750 | ], 751 | [ 752 | 0.7777777777777778, 753 | "#fb9f3a" 754 | ], 755 | [ 756 | 0.8888888888888888, 757 | "#fdca26" 758 | ], 759 | [ 760 | 1, 761 | "#f0f921" 762 | ] 763 | ], 764 | "type": "contour" 765 | } 766 | ], 767 | "contourcarpet": [ 768 | { 769 | "colorbar": { 770 | "outlinewidth": 0, 771 | "ticks": "" 772 | }, 773 | "type": "contourcarpet" 774 | } 775 | ], 776 | "heatmap": [ 777 | { 778 | "colorbar": { 779 | "outlinewidth": 0, 780 | "ticks": "" 781 | }, 782 | "colorscale": [ 783 | [ 784 | 0, 785 | "#0d0887" 786 | ], 787 | [ 788 | 0.1111111111111111, 789 | "#46039f" 790 | ], 791 | [ 792 | 0.2222222222222222, 793 | "#7201a8" 794 | ], 795 | [ 796 | 0.3333333333333333, 797 | "#9c179e" 798 | ], 799 | [ 800 | 0.4444444444444444, 801 | "#bd3786" 802 | ], 803 | [ 804 | 0.5555555555555556, 805 | "#d8576b" 806 | ], 807 | [ 808 | 0.6666666666666666, 809 | "#ed7953" 810 | ], 811 | [ 812 | 0.7777777777777778, 813 | "#fb9f3a" 814 | ], 815 | [ 816 | 0.8888888888888888, 817 | "#fdca26" 818 | ], 819 | [ 820 | 1, 821 | "#f0f921" 822 | ] 823 | ], 824 | "type": "heatmap" 825 | } 826 | ], 827 | "heatmapgl": [ 828 | { 829 | "colorbar": { 830 | "outlinewidth": 0, 831 | "ticks": "" 832 | }, 833 | "colorscale": [ 834 | [ 835 | 0, 836 | "#0d0887" 837 | ], 838 | [ 839 | 0.1111111111111111, 840 | "#46039f" 841 | ], 842 | [ 843 | 0.2222222222222222, 844 | "#7201a8" 845 | ], 846 | [ 847 | 0.3333333333333333, 848 | "#9c179e" 849 | ], 850 | [ 851 | 0.4444444444444444, 852 | "#bd3786" 853 | ], 854 | [ 855 | 0.5555555555555556, 856 | "#d8576b" 857 | ], 858 | [ 859 | 0.6666666666666666, 860 | "#ed7953" 861 | ], 862 | [ 863 | 0.7777777777777778, 864 | "#fb9f3a" 865 | ], 866 | [ 867 | 0.8888888888888888, 868 | "#fdca26" 869 | ], 870 | [ 871 | 1, 872 | "#f0f921" 873 | ] 874 | ], 875 | "type": "heatmapgl" 876 | } 877 | ], 878 | "histogram": [ 879 | { 880 | "marker": { 881 | "pattern": { 882 | "fillmode": "overlay", 883 | "size": 10, 884 | "solidity": 0.2 885 | } 886 | }, 887 | "type": "histogram" 888 | } 889 | ], 890 | "histogram2d": [ 891 | { 892 | "colorbar": { 893 | "outlinewidth": 0, 894 | "ticks": "" 895 | }, 896 | "colorscale": [ 897 | [ 898 | 0, 899 | "#0d0887" 900 | ], 901 | [ 902 | 0.1111111111111111, 903 | "#46039f" 904 | ], 905 | [ 906 | 0.2222222222222222, 907 | "#7201a8" 908 | ], 909 | [ 910 | 0.3333333333333333, 911 | "#9c179e" 912 | ], 913 | [ 914 | 0.4444444444444444, 915 | "#bd3786" 916 | ], 917 | [ 918 | 0.5555555555555556, 919 | "#d8576b" 920 | ], 921 | [ 922 | 0.6666666666666666, 923 | "#ed7953" 924 | ], 925 | [ 926 | 0.7777777777777778, 927 | "#fb9f3a" 928 | ], 929 | [ 930 | 0.8888888888888888, 931 | "#fdca26" 932 | ], 933 | [ 934 | 1, 935 | "#f0f921" 936 | ] 937 | ], 938 | "type": "histogram2d" 939 | } 940 | ], 941 | "histogram2dcontour": [ 942 | { 943 | "colorbar": { 944 | "outlinewidth": 0, 945 | "ticks": "" 946 | }, 947 | "colorscale": [ 948 | [ 949 | 0, 950 | "#0d0887" 951 | ], 952 | [ 953 | 0.1111111111111111, 954 | "#46039f" 955 | ], 956 | [ 957 | 0.2222222222222222, 958 | "#7201a8" 959 | ], 960 | [ 961 | 0.3333333333333333, 962 | "#9c179e" 963 | ], 964 | [ 965 | 0.4444444444444444, 966 | "#bd3786" 967 | ], 968 | [ 969 | 0.5555555555555556, 970 | "#d8576b" 971 | ], 972 | [ 973 | 0.6666666666666666, 974 | "#ed7953" 975 | ], 976 | [ 977 | 0.7777777777777778, 978 | "#fb9f3a" 979 | ], 980 | [ 981 | 0.8888888888888888, 982 | "#fdca26" 983 | ], 984 | [ 985 | 1, 986 | "#f0f921" 987 | ] 988 | ], 989 | "type": "histogram2dcontour" 990 | } 991 | ], 992 | "mesh3d": [ 993 | { 994 | "colorbar": { 995 | "outlinewidth": 0, 996 | "ticks": "" 997 | }, 998 | "type": "mesh3d" 999 | } 1000 | ], 1001 | "parcoords": [ 1002 | { 1003 | "line": { 1004 | "colorbar": { 1005 | "outlinewidth": 0, 1006 | "ticks": "" 1007 | } 1008 | }, 1009 | "type": "parcoords" 1010 | } 1011 | ], 1012 | "pie": [ 1013 | { 1014 | "automargin": true, 1015 | "type": "pie" 1016 | } 1017 | ], 1018 | "scatter": [ 1019 | { 1020 | "fillpattern": { 1021 | "fillmode": "overlay", 1022 | "size": 10, 1023 | "solidity": 0.2 1024 | }, 1025 | "type": "scatter" 1026 | } 1027 | ], 1028 | "scatter3d": [ 1029 | { 1030 | "line": { 1031 | "colorbar": { 1032 | "outlinewidth": 0, 1033 | "ticks": "" 1034 | } 1035 | }, 1036 | "marker": { 1037 | "colorbar": { 1038 | "outlinewidth": 0, 1039 | "ticks": "" 1040 | } 1041 | }, 1042 | "type": "scatter3d" 1043 | } 1044 | ], 1045 | "scattercarpet": [ 1046 | { 1047 | "marker": { 1048 | "colorbar": { 1049 | "outlinewidth": 0, 1050 | "ticks": "" 1051 | } 1052 | }, 1053 | "type": "scattercarpet" 1054 | } 1055 | ], 1056 | "scattergeo": [ 1057 | { 1058 | "marker": { 1059 | "colorbar": { 1060 | "outlinewidth": 0, 1061 | "ticks": "" 1062 | } 1063 | }, 1064 | "type": "scattergeo" 1065 | } 1066 | ], 1067 | "scattergl": [ 1068 | { 1069 | "marker": { 1070 | "colorbar": { 1071 | "outlinewidth": 0, 1072 | "ticks": "" 1073 | } 1074 | }, 1075 | "type": "scattergl" 1076 | } 1077 | ], 1078 | "scattermapbox": [ 1079 | { 1080 | "marker": { 1081 | "colorbar": { 1082 | "outlinewidth": 0, 1083 | "ticks": "" 1084 | } 1085 | }, 1086 | "type": "scattermapbox" 1087 | } 1088 | ], 1089 | "scatterpolar": [ 1090 | { 1091 | "marker": { 1092 | "colorbar": { 1093 | "outlinewidth": 0, 1094 | "ticks": "" 1095 | } 1096 | }, 1097 | "type": "scatterpolar" 1098 | } 1099 | ], 1100 | "scatterpolargl": [ 1101 | { 1102 | "marker": { 1103 | "colorbar": { 1104 | "outlinewidth": 0, 1105 | "ticks": "" 1106 | } 1107 | }, 1108 | "type": "scatterpolargl" 1109 | } 1110 | ], 1111 | "scatterternary": [ 1112 | { 1113 | "marker": { 1114 | "colorbar": { 1115 | "outlinewidth": 0, 1116 | "ticks": "" 1117 | } 1118 | }, 1119 | "type": "scatterternary" 1120 | } 1121 | ], 1122 | "surface": [ 1123 | { 1124 | "colorbar": { 1125 | "outlinewidth": 0, 1126 | "ticks": "" 1127 | }, 1128 | "colorscale": [ 1129 | [ 1130 | 0, 1131 | "#0d0887" 1132 | ], 1133 | [ 1134 | 0.1111111111111111, 1135 | "#46039f" 1136 | ], 1137 | [ 1138 | 0.2222222222222222, 1139 | "#7201a8" 1140 | ], 1141 | [ 1142 | 0.3333333333333333, 1143 | "#9c179e" 1144 | ], 1145 | [ 1146 | 0.4444444444444444, 1147 | "#bd3786" 1148 | ], 1149 | [ 1150 | 0.5555555555555556, 1151 | "#d8576b" 1152 | ], 1153 | [ 1154 | 0.6666666666666666, 1155 | "#ed7953" 1156 | ], 1157 | [ 1158 | 0.7777777777777778, 1159 | "#fb9f3a" 1160 | ], 1161 | [ 1162 | 0.8888888888888888, 1163 | "#fdca26" 1164 | ], 1165 | [ 1166 | 1, 1167 | "#f0f921" 1168 | ] 1169 | ], 1170 | "type": "surface" 1171 | } 1172 | ], 1173 | "table": [ 1174 | { 1175 | "cells": { 1176 | "fill": { 1177 | "color": "#EBF0F8" 1178 | }, 1179 | "line": { 1180 | "color": "white" 1181 | } 1182 | }, 1183 | "header": { 1184 | "fill": { 1185 | "color": "#C8D4E3" 1186 | }, 1187 | "line": { 1188 | "color": "white" 1189 | } 1190 | }, 1191 | "type": "table" 1192 | } 1193 | ] 1194 | }, 1195 | "layout": { 1196 | "annotationdefaults": { 1197 | "arrowcolor": "#2a3f5f", 1198 | "arrowhead": 0, 1199 | "arrowwidth": 1 1200 | }, 1201 | "autotypenumbers": "strict", 1202 | "coloraxis": { 1203 | "colorbar": { 1204 | "outlinewidth": 0, 1205 | "ticks": "" 1206 | } 1207 | }, 1208 | "colorscale": { 1209 | "diverging": [ 1210 | [ 1211 | 0, 1212 | "#8e0152" 1213 | ], 1214 | [ 1215 | 0.1, 1216 | "#c51b7d" 1217 | ], 1218 | [ 1219 | 0.2, 1220 | "#de77ae" 1221 | ], 1222 | [ 1223 | 0.3, 1224 | "#f1b6da" 1225 | ], 1226 | [ 1227 | 0.4, 1228 | "#fde0ef" 1229 | ], 1230 | [ 1231 | 0.5, 1232 | "#f7f7f7" 1233 | ], 1234 | [ 1235 | 0.6, 1236 | "#e6f5d0" 1237 | ], 1238 | [ 1239 | 0.7, 1240 | "#b8e186" 1241 | ], 1242 | [ 1243 | 0.8, 1244 | "#7fbc41" 1245 | ], 1246 | [ 1247 | 0.9, 1248 | "#4d9221" 1249 | ], 1250 | [ 1251 | 1, 1252 | "#276419" 1253 | ] 1254 | ], 1255 | "sequential": [ 1256 | [ 1257 | 0, 1258 | "#0d0887" 1259 | ], 1260 | [ 1261 | 0.1111111111111111, 1262 | "#46039f" 1263 | ], 1264 | [ 1265 | 0.2222222222222222, 1266 | "#7201a8" 1267 | ], 1268 | [ 1269 | 0.3333333333333333, 1270 | "#9c179e" 1271 | ], 1272 | [ 1273 | 0.4444444444444444, 1274 | "#bd3786" 1275 | ], 1276 | [ 1277 | 0.5555555555555556, 1278 | "#d8576b" 1279 | ], 1280 | [ 1281 | 0.6666666666666666, 1282 | "#ed7953" 1283 | ], 1284 | [ 1285 | 0.7777777777777778, 1286 | "#fb9f3a" 1287 | ], 1288 | [ 1289 | 0.8888888888888888, 1290 | "#fdca26" 1291 | ], 1292 | [ 1293 | 1, 1294 | "#f0f921" 1295 | ] 1296 | ], 1297 | "sequentialminus": [ 1298 | [ 1299 | 0, 1300 | "#0d0887" 1301 | ], 1302 | [ 1303 | 0.1111111111111111, 1304 | "#46039f" 1305 | ], 1306 | [ 1307 | 0.2222222222222222, 1308 | "#7201a8" 1309 | ], 1310 | [ 1311 | 0.3333333333333333, 1312 | "#9c179e" 1313 | ], 1314 | [ 1315 | 0.4444444444444444, 1316 | "#bd3786" 1317 | ], 1318 | [ 1319 | 0.5555555555555556, 1320 | "#d8576b" 1321 | ], 1322 | [ 1323 | 0.6666666666666666, 1324 | "#ed7953" 1325 | ], 1326 | [ 1327 | 0.7777777777777778, 1328 | "#fb9f3a" 1329 | ], 1330 | [ 1331 | 0.8888888888888888, 1332 | "#fdca26" 1333 | ], 1334 | [ 1335 | 1, 1336 | "#f0f921" 1337 | ] 1338 | ] 1339 | }, 1340 | "colorway": [ 1341 | "#636efa", 1342 | "#EF553B", 1343 | "#00cc96", 1344 | "#ab63fa", 1345 | "#FFA15A", 1346 | "#19d3f3", 1347 | "#FF6692", 1348 | "#B6E880", 1349 | "#FF97FF", 1350 | "#FECB52" 1351 | ], 1352 | "font": { 1353 | "color": "#2a3f5f" 1354 | }, 1355 | "geo": { 1356 | "bgcolor": "white", 1357 | "lakecolor": "white", 1358 | "landcolor": "#E5ECF6", 1359 | "showlakes": true, 1360 | "showland": true, 1361 | "subunitcolor": "white" 1362 | }, 1363 | "hoverlabel": { 1364 | "align": "left" 1365 | }, 1366 | "hovermode": "closest", 1367 | "mapbox": { 1368 | "style": "light" 1369 | }, 1370 | "paper_bgcolor": "white", 1371 | "plot_bgcolor": "#E5ECF6", 1372 | "polar": { 1373 | "angularaxis": { 1374 | "gridcolor": "white", 1375 | "linecolor": "white", 1376 | "ticks": "" 1377 | }, 1378 | "bgcolor": "#E5ECF6", 1379 | "radialaxis": { 1380 | "gridcolor": "white", 1381 | "linecolor": "white", 1382 | "ticks": "" 1383 | } 1384 | }, 1385 | "scene": { 1386 | "xaxis": { 1387 | "backgroundcolor": "#E5ECF6", 1388 | "gridcolor": "white", 1389 | "gridwidth": 2, 1390 | "linecolor": "white", 1391 | "showbackground": true, 1392 | "ticks": "", 1393 | "zerolinecolor": "white" 1394 | }, 1395 | "yaxis": { 1396 | "backgroundcolor": "#E5ECF6", 1397 | "gridcolor": "white", 1398 | "gridwidth": 2, 1399 | "linecolor": "white", 1400 | "showbackground": true, 1401 | "ticks": "", 1402 | "zerolinecolor": "white" 1403 | }, 1404 | "zaxis": { 1405 | "backgroundcolor": "#E5ECF6", 1406 | "gridcolor": "white", 1407 | "gridwidth": 2, 1408 | "linecolor": "white", 1409 | "showbackground": true, 1410 | "ticks": "", 1411 | "zerolinecolor": "white" 1412 | } 1413 | }, 1414 | "shapedefaults": { 1415 | "line": { 1416 | "color": "#2a3f5f" 1417 | } 1418 | }, 1419 | "ternary": { 1420 | "aaxis": { 1421 | "gridcolor": "white", 1422 | "linecolor": "white", 1423 | "ticks": "" 1424 | }, 1425 | "baxis": { 1426 | "gridcolor": "white", 1427 | "linecolor": "white", 1428 | "ticks": "" 1429 | }, 1430 | "bgcolor": "#E5ECF6", 1431 | "caxis": { 1432 | "gridcolor": "white", 1433 | "linecolor": "white", 1434 | "ticks": "" 1435 | } 1436 | }, 1437 | "title": { 1438 | "x": 0.05 1439 | }, 1440 | "xaxis": { 1441 | "automargin": true, 1442 | "gridcolor": "white", 1443 | "linecolor": "white", 1444 | "ticks": "", 1445 | "title": { 1446 | "standoff": 15 1447 | }, 1448 | "zerolinecolor": "white", 1449 | "zerolinewidth": 2 1450 | }, 1451 | "yaxis": { 1452 | "automargin": true, 1453 | "gridcolor": "white", 1454 | "linecolor": "white", 1455 | "ticks": "", 1456 | "title": { 1457 | "standoff": 15 1458 | }, 1459 | "zerolinecolor": "white", 1460 | "zerolinewidth": 2 1461 | } 1462 | } 1463 | }, 1464 | "title": { 1465 | "text": "Daily Positive Sentiment Score" 1466 | }, 1467 | "xaxis": { 1468 | "anchor": "y", 1469 | "domain": [ 1470 | 0, 1471 | 1 1472 | ], 1473 | "dtick": "D", 1474 | "tickformat": "%Y-%m-%d", 1475 | "title": { 1476 | "text": "Date" 1477 | } 1478 | }, 1479 | "yaxis": { 1480 | "anchor": "x", 1481 | "domain": [ 1482 | 0, 1483 | 1 1484 | ], 1485 | "title": { 1486 | "text": "Positive Sentiment Score" 1487 | } 1488 | } 1489 | } 1490 | } 1491 | }, 1492 | "metadata": {}, 1493 | "output_type": "display_data" 1494 | } 1495 | ], 1496 | "source": [ 1497 | "# Filter out neutral sentiment\n", 1498 | "filtered_df = df[df['sentiment'] != 'neutral']\n", 1499 | "\n", 1500 | "# Extract the date (day only) and calculate daily positive sentiment score\n", 1501 | "filtered_df['Day'] = filtered_df['Date'].dt.date\n", 1502 | "daily_sentiment = (\n", 1503 | " filtered_df.groupby('Day')['sentiment']\n", 1504 | " .apply(lambda x: (x == 'positive').sum() / len(x))\n", 1505 | " .reset_index(name='positive_sentiment_score')\n", 1506 | ")\n", 1507 | "\n", 1508 | "# Plot the daily sentiment score\n", 1509 | "fig = px.line(\n", 1510 | " daily_sentiment,\n", 1511 | " x='Day',\n", 1512 | " y='positive_sentiment_score',\n", 1513 | " title='Daily Positive Sentiment Score',\n", 1514 | " labels={'positive_sentiment_score': 'Positive Sentiment Score', 'Day': 'Date'},\n", 1515 | " markers=True,\n", 1516 | ")\n", 1517 | "\n", 1518 | "fig.update_xaxes(dtick=\"D\", tickformat=\"%Y-%m-%d\")\n", 1519 | "\n", 1520 | "\n", 1521 | "fig" 1522 | ] 1523 | } 1524 | ], 1525 | "metadata": { 1526 | "kernelspec": { 1527 | "display_name": "general_env", 1528 | "language": "python", 1529 | "name": "python3" 1530 | }, 1531 | "language_info": { 1532 | "codemirror_mode": { 1533 | "name": "ipython", 1534 | "version": 3 1535 | }, 1536 | "file_extension": ".py", 1537 | "mimetype": "text/x-python", 1538 | "name": "python", 1539 | "nbconvert_exporter": "python", 1540 | "pygments_lexer": "ipython3", 1541 | "version": "3.12.3" 1542 | } 1543 | }, 1544 | "nbformat": 4, 1545 | "nbformat_minor": 2 1546 | } 1547 | -------------------------------------------------------------------------------- /ai_stocks_prediction.py: -------------------------------------------------------------------------------- 1 | ############## 2 | #### Deep Charts Youtube Channel: https://www.youtube.com/@DeepCharts 3 | #### Subscribe for more AI/Machine Learning/Quant Finance Tutorials 4 | ############## 5 | 6 | 7 | ############## 8 | ### PART 1 ### 9 | # LIBRARIES ## 10 | # & OLLAMA ## 11 | ############## 12 | 13 | # Data Importing Libraries 14 | import yfinance as yf 15 | from finvizfinance.quote import finvizfinance 16 | 17 | # Data Modeling Library 18 | from statsmodels.tsa.statespace.sarimax import SARIMAX 19 | 20 | # Charts 21 | import plotly.graph_objects as go 22 | 23 | # Data Manipulation 24 | import pandas as pd 25 | import numpy as np 26 | 27 | # Avoid Forecasting on Holidays 28 | import holidays 29 | 30 | # Create Local LLM Server Connection 31 | from langchain_community.llms import Ollama 32 | 33 | # Interactive Web App UI 34 | import streamlit as st 35 | 36 | 37 | # Connect to local Ollama server 38 | llm = Ollama(model='llama3') 39 | 40 | 41 | ############## 42 | ### PART 2 ### 43 | # FUNCTIONS ## 44 | ############## 45 | 46 | # Function to classify sentiment 47 | def classify_sentiment(title): 48 | output = llm.invoke(f"Classify the sentiment as 'POSITIVE' or 'NEGATIVE' or 'NEUTRAL' with just that one word only, no additional words or reasoning: {title}") 49 | return output.strip() # Ensure the response is clean and without extra spaces 50 | 51 | # Function to get and process news data 52 | def get_news_data(ticker): 53 | 54 | # Data Pull 55 | stock = finvizfinance(ticker) 56 | news_df = stock.ticker_news() 57 | 58 | # Preprocess before putting into LLM 59 | news_df['Title'] = news_df['Title'].str.lower() 60 | 61 | # Classify Sentiment function applied to each row of news_df 62 | news_df['sentiment'] = news_df['Title'].apply(classify_sentiment) 63 | 64 | # Postprocess after putting into LLM 65 | news_df['sentiment'] = news_df['sentiment'].str.upper() 66 | news_df = news_df[news_df['sentiment'] != 'NEUTRAL'] 67 | news_df['Date'] = pd.to_datetime(news_df['Date']) 68 | news_df['DateOnly'] = news_df['Date'].dt.date 69 | 70 | return news_df 71 | 72 | # Function to group and process sentiment data 73 | def process_sentiment_data(news_df): 74 | 75 | # Reshape data to have df with columns: Date, # of positive Articles, # of negative Articles 76 | grouped = news_df.groupby(['DateOnly', 'sentiment']).size().unstack(fill_value=0) 77 | grouped = grouped.reindex(columns=['POSITIVE', 'NEGATIVE'], fill_value=0) 78 | 79 | # Create rolling averages that count number of positive and negative sentiment articles within past t days 80 | grouped['7day_avg_positive'] = grouped['POSITIVE'].rolling(window=7, min_periods=1).sum() 81 | grouped['7day_avg_negative'] = grouped['NEGATIVE'].rolling(window=7, min_periods=1).sum() 82 | 83 | # Create "Percent Positive" by creating percentage measure 84 | grouped['7day_pct_positive'] = grouped['POSITIVE'] / (grouped['POSITIVE'] + grouped['NEGATIVE']) 85 | result_df = grouped.reset_index() 86 | 87 | return result_df 88 | 89 | # Function to fetch and process stock data 90 | def get_stock_data(ticker, start_date, end_date): 91 | stock_data = yf.download(ticker, start=start_date, end=end_date) # Pull ticker data 92 | stock_data['Pct_Change'] = stock_data['Close'].pct_change() * 100 # Transform closing value to percent change in closing value since previous day 93 | return stock_data 94 | 95 | # Function to combine sentiment and stock data 96 | def combine_data(result_df, stock_data): 97 | combined_df = result_df.set_index('DateOnly').join(stock_data[['Pct_Change']], how='inner') 98 | combined_df['lagged_7day_pct_positive'] = combined_df['7day_pct_positive'].shift(1) # Lag sentiment feature by 1 day for temporal alignment 99 | return combined_df 100 | 101 | # Function to calculate Pearson correlation 102 | def calculate_correlation(combined_df): 103 | correlation_pct_change = combined_df[['lagged_7day_pct_positive', 'Pct_Change']].corr().iloc[0, 1] 104 | return correlation_pct_change 105 | 106 | # Function to get future dates excluding weekends and holidays 107 | def get_future_dates(start_date, num_days): 108 | us_holidays = holidays.US() 109 | future_dates = [] 110 | current_date = start_date 111 | while len(future_dates) < num_days: 112 | if current_date.weekday() < 5 and current_date not in us_holidays: 113 | future_dates.append(current_date) 114 | current_date += pd.Timedelta(days=1) 115 | return future_dates 116 | 117 | # Function to fit ARIMAX model and forecast 118 | def fit_and_forecast(combined_df, forecast_steps=3): 119 | endog = combined_df['Pct_Change'].dropna() # Dependent variable 120 | exog = combined_df['lagged_7day_pct_positive'].dropna() # Predictor variable 121 | endog = endog.loc[exog.index] # Align variables 122 | model = SARIMAX(endog, exog=exog, order=(1, 1, 1)) # ARIMAX model 123 | fit = model.fit(disp=False) # Fit model 124 | 125 | future_dates = get_future_dates(combined_df.index[-1], forecast_steps) # Future dates 126 | future_exog = combined_df['lagged_7day_pct_positive'][-forecast_steps:].values.reshape(-1, 1) # Future exogenous values 127 | 128 | forecast = fit.get_forecast(steps=forecast_steps, exog=future_exog) # Get forecast 129 | forecast_mean = forecast.predicted_mean # Predicted mean 130 | forecast_ci = forecast.conf_int() # Confidence intervals 131 | 132 | return forecast_mean, forecast_ci, future_dates # Return results 133 | 134 | 135 | # Function to create and display plot 136 | def create_plot(combined_df, forecast_mean, forecast_ci, forecast_index): 137 | # Standardize the sentiment proportion 138 | sentiment_std = (combined_df['7day_pct_positive'] - combined_df['7day_pct_positive'].mean()) / combined_df['7day_pct_positive'].std() 139 | 140 | fig = go.Figure() 141 | 142 | # Add standardized sentiment proportion 143 | fig.add_trace(go.Scatter( 144 | x=combined_df.index, 145 | y=sentiment_std, 146 | name='Standardized Sentiment Proportion', 147 | line=dict(color='blue'), 148 | mode='lines' 149 | )) 150 | 151 | # Add stock percentage change 152 | fig.add_trace(go.Scatter( 153 | x=combined_df.index, 154 | y=combined_df['Pct_Change'], 155 | name='Stock Pct Change', 156 | line=dict(color='green'), 157 | yaxis='y2', 158 | mode='lines' 159 | )) 160 | 161 | # Add forecasted stock percentage change 162 | fig.add_trace(go.Scatter( 163 | x=forecast_index, 164 | y=forecast_mean, 165 | name='Forecasted Pct Change', 166 | line=dict(color='red'), 167 | mode='lines' 168 | )) 169 | 170 | # Add confidence intervals for the forecast 171 | fig.add_trace(go.Scatter( 172 | x=np.concatenate([forecast_index, forecast_index[::-1]]), 173 | y=np.concatenate([forecast_ci.iloc[:, 0], forecast_ci.iloc[:, 1][::-1]]), 174 | fill='toself', 175 | fillcolor='rgba(255,0,0,0.2)', 176 | line=dict(color='rgba(255,255,255,0)'), 177 | hoverinfo="skip", 178 | showlegend=False 179 | )) 180 | 181 | # Update layout with appropriate y-axis ranges 182 | fig.update_layout( 183 | title='Sentiment Proportion and Stock Percentage Change with Forecast', 184 | xaxis_title='Date', 185 | yaxis=dict( 186 | title='Standardized Sentiment Proportion', 187 | titlefont=dict(color='blue') 188 | ), 189 | yaxis2=dict( 190 | title='Stock Pct Change', 191 | titlefont=dict(color='green'), 192 | overlaying='y', 193 | side='right' 194 | ), 195 | template='plotly_dark' 196 | ) 197 | st.plotly_chart(fig) 198 | 199 | 200 | ############## 201 | ### PART 3 ### 202 | # STREAMLIT ## 203 | ############## 204 | 205 | # Streamlit app 206 | st.sidebar.title("Predicting Stock Prices by News Sentiment") 207 | ticker = st.sidebar.text_input("Enter stock ticker (e.g., SBUX):", value='SBUX') 208 | run_button = st.sidebar.button("Run Analysis") 209 | 210 | if run_button: 211 | news_df = get_news_data(ticker) 212 | result_df = process_sentiment_data(news_df) 213 | start_date = result_df['DateOnly'].min().strftime('%Y-%m-%d') 214 | end_date = result_df['DateOnly'].max().strftime('%Y-%m-%d') 215 | stock_data = get_stock_data(ticker, start_date, end_date) 216 | combined_df = combine_data(result_df, stock_data) 217 | correlation_pct_change = calculate_correlation(combined_df) 218 | st.write(f'Pearson correlation between lagged sentiment score and stock percentage change: {correlation_pct_change}') 219 | forecast_mean, forecast_ci, forecast_index = fit_and_forecast(combined_df) 220 | create_plot(combined_df, forecast_mean, forecast_ci, forecast_index) 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | -------------------------------------------------------------------------------- /app_streamlit_app_builder_ai.py: -------------------------------------------------------------------------------- 1 | # Source: Deep Charts YouTube Channel (https://www.youtube.com/@DeepCharts) 2 | 3 | # Build Streamlit Apps Within a Streamlit App 4 | # Vibe coded with Gemini 2.5 Pro Experimental 5 | 6 | # Project Setup: 7 | 8 | # Create a project folder: e.g., streamlit_ide_prototype 9 | # Create a Python file: app.py inside the folder. 10 | # Create a sub-folder: workspace inside the project folder. This is where the AI will create/edit files. 11 | # Install libraries: 12 | # pip install streamlit google-generativeai python-dotenv stremalit-option-menu streamlit-ace streamlit-antd-components 13 | # API Key: 14 | # Get your Gemini API key (from Google AI Studio). 15 | # Create a file named .env in your project folder. 16 | # Add your API key to the .env file: 17 | # GOOGLE_API_KEY="YOUR_API_KEY_HERE" 18 | # Alternatively, for deployment, use Streamlit Secrets Management. For local testing, .env is often easier. 19 | 20 | ###### 21 | 22 | import streamlit as st 23 | import google.generativeai as genai 24 | import os 25 | from pathlib import Path 26 | import json 27 | import time 28 | from dotenv import load_dotenv 29 | import subprocess # Needed to run other Streamlit apps (the preview) 30 | import socket # Needed to find an open network port for the preview 31 | import sys # Needed to get the path to the current Python executable 32 | 33 | # --- UI Components --- 34 | # These libraries provide pre-built UI elements like menus and the code editor. 35 | from streamlit_option_menu import option_menu 36 | from streamlit_ace import st_ace 37 | import streamlit_antd_components as sac # Using for specific buttons (Save/Delete group) 38 | 39 | # --- Configuration --- 40 | st.set_page_config( 41 | layout="wide", 42 | page_title="AI App Gen" # Shorter title 43 | ) 44 | load_dotenv() # Load API keys from a file named .env in the same directory 45 | 46 | # --- Constants --- 47 | # Where generated Python app files will be saved 48 | WORKSPACE_DIR = Path("workspace_st_apps") 49 | WORKSPACE_DIR.mkdir(exist_ok=True) # Create the directory if it doesn't exist 50 | 51 | # Code editor appearance settings 52 | ACE_DEFAULT_THEME = "monokai" 53 | ACE_DEFAULT_KEYBINDING = "vscode" 54 | 55 | # Which Google AI model to use for generating code 56 | GEMINI_MODEL_NAME = "gemini-2.5-pro-exp-03-25" 57 | 58 | # Instructions for the Google AI model 59 | # This tells the AI how to format its responses (as JSON commands) 60 | GEMINI_SYSTEM_PROMPT = f""" 61 | You are an AI assistant helping create Streamlit applications. 62 | Your goal is to manage Python files in a workspace based on user requests. 63 | Respond *only* with a valid JSON array containing commands. Do not add any explanations before or after the JSON array. 64 | 65 | Available commands: 66 | 1. `{{"action": "create_update", "filename": "app_name.py", "content": "FULL_PYTHON_CODE_HERE"}}` 67 | - Use this to create a new Python file or completely overwrite an existing one. 68 | - Provide the *entire* file content. Escape backslashes (`\\\\`) and double quotes (`\\"`). Ensure newlines are `\\n`. 69 | - Do *not* include ```python markdown blocks or shebangs (`#!/usr/bin/env python`) in the "content". 70 | 2. `{{"action": "delete", "filename": "old_app.py"}}` 71 | - Use this to delete a Python file from the workspace. 72 | 3. `{{"action": "chat", "content": "Your message here."}}` 73 | - Use this *only* if you need to ask for clarification, report an issue you can't fix with file actions, or confirm understanding. 74 | 75 | Current Python files in workspace: {', '.join([f.name for f in WORKSPACE_DIR.iterdir() if f.is_file() and f.suffix == '.py']) if WORKSPACE_DIR.exists() else 'None'} 76 | 77 | Example Interaction: 78 | User: Create a simple hello world app called hello.py 79 | AI: `[{{"action": "create_update", "filename": "hello.py", "content": "import streamlit as st\\n\\nst.title('Hello World!')\\nst.write('This is a simple app.')"}}` 80 | 81 | Ensure your entire response is *only* the JSON array `[...]`. 82 | """ 83 | 84 | # --- API Client Setup --- 85 | try: 86 | google_api_key = os.getenv("GOOGLE_API_KEY") 87 | if not google_api_key: 88 | # Stop the app if the API key is missing 89 | st.error("🔴 Google API Key not found. Please set `GOOGLE_API_KEY` in a `.env` file.") 90 | st.stop() # Halt execution 91 | # Configure the Gemini library with the key 92 | genai.configure(api_key=google_api_key) 93 | # Create the AI model object 94 | model = genai.GenerativeModel(GEMINI_MODEL_NAME) 95 | except Exception as e: 96 | st.error(f"🔴 Failed to set up Google AI: {e}") 97 | st.stop() 98 | 99 | # --- Session State --- 100 | # Streamlit reruns the script on interaction. Session state stores data 101 | # between reruns, like chat history or which file is selected. 102 | def initialize_session_state(): 103 | """Sets up default values in Streamlit's session state dictionary.""" 104 | state_defaults = { 105 | "messages": [], # List to store chat messages (user and AI) 106 | "selected_file": None, # Name of the file currently shown in the editor 107 | "file_content_on_load": "", # Content of the selected file when loaded (read-only) 108 | "preview_process": None, # Stores the running preview process object 109 | "preview_port": None, # Port number used by the preview 110 | "preview_url": None, # URL to access the preview 111 | "preview_file": None, # Name of the file being previewed 112 | "editor_unsaved_content": "", # Current text typed into the editor 113 | "last_saved_content": "", # Content that was last successfully saved to disk 114 | } 115 | for key, default_value in state_defaults.items(): 116 | if key not in st.session_state: 117 | st.session_state[key] = default_value 118 | 119 | initialize_session_state() # Run the initialization 120 | 121 | # --- File System Functions --- 122 | def get_workspace_python_files(): 123 | """Gets a list of all '.py' filenames in the workspace directory.""" 124 | if not WORKSPACE_DIR.is_dir(): 125 | return [] # Return empty list if directory doesn't exist 126 | try: 127 | # List files, filter for .py, sort alphabetically 128 | python_files = sorted([ 129 | f.name for f in WORKSPACE_DIR.iterdir() if f.is_file() and f.suffix == '.py' 130 | ]) 131 | return python_files 132 | except Exception as e: 133 | st.error(f"Error reading workspace directory: {e}") 134 | return [] 135 | 136 | def read_file(filename): 137 | """Reads the text content of a file from the workspace.""" 138 | if not filename: # Check if filename is provided 139 | return None 140 | # Prevent accessing files outside the workspace (basic security) 141 | if ".." in filename or filename.startswith(("/", "\\")): 142 | st.error(f"Invalid file path: {filename}") 143 | return None 144 | 145 | filepath = WORKSPACE_DIR / filename # Combine directory and filename 146 | try: 147 | with open(filepath, "r", encoding="utf-8") as f: 148 | return f.read() # Return the file's text content 149 | except FileNotFoundError: 150 | st.warning(f"File not found: {filename}") 151 | return None # Indicate file doesn't exist 152 | except Exception as e: 153 | st.error(f"Error reading file '{filename}': {e}") 154 | return None 155 | 156 | def save_file(filename, content): 157 | """Writes text content to a file in the workspace.""" 158 | if not filename: 159 | return False # Cannot save without a filename 160 | if ".." in filename or filename.startswith(("/", "\\")): 161 | st.error(f"Invalid file path: {filename}") 162 | return False 163 | 164 | filepath = WORKSPACE_DIR / filename 165 | try: 166 | # Write the content to the file (overwrites if it exists) 167 | with open(filepath, "w", encoding="utf-8") as f: 168 | f.write(content) 169 | return True # Indicate success 170 | except Exception as e: 171 | st.error(f"Error saving file '{filename}': {e}") 172 | return False # Indicate failure 173 | 174 | def delete_file(filename): 175 | """Deletes a file from the workspace and updates app state.""" 176 | if not filename: 177 | return False 178 | if ".." in filename or filename.startswith(("/", "\\")): 179 | st.error(f"Invalid file path: {filename}") 180 | return False 181 | 182 | filepath = WORKSPACE_DIR / filename 183 | try: 184 | if filepath.is_file(): 185 | os.remove(filepath) # Delete the actual file 186 | st.toast(f"Deleted: {filename}", icon="🗑️") 187 | 188 | # If the deleted file was being previewed, stop the preview 189 | if st.session_state.preview_file == filename: 190 | stop_preview() # Call the function to stop the process 191 | 192 | # If the deleted file was selected in the editor, clear the selection 193 | if st.session_state.selected_file == filename: 194 | st.session_state.selected_file = None 195 | st.session_state.file_content_on_load = "" 196 | st.session_state.editor_unsaved_content = "" 197 | st.session_state.last_saved_content = "" 198 | return True # Indicate success 199 | else: 200 | st.warning(f"Could not delete: File '{filename}' not found.") 201 | return False 202 | except Exception as e: 203 | st.error(f"Error deleting file '{filename}': {e}") 204 | return False 205 | 206 | # --- AI Interaction Functions --- 207 | 208 | def _clean_ai_response_text(ai_response_text): 209 | """Removes potential code fences (```json ... ```) from AI response.""" 210 | text = ai_response_text.strip() 211 | if text.startswith("```json"): 212 | text = text[7:-3].strip() # Remove ```json and ``` 213 | elif text.startswith("```"): 214 | text = text[3:-3].strip() # Remove ``` and ``` 215 | return text 216 | 217 | def parse_and_execute_ai_commands(ai_response_text): 218 | """ 219 | Parses the AI's JSON response and performs the requested file actions. 220 | Returns the list of commands (for chat history display). 221 | """ 222 | cleaned_text = _clean_ai_response_text(ai_response_text) 223 | executed_commands_list = [] # To store commands for chat display 224 | 225 | try: 226 | # Attempt to convert the cleaned text into a Python list of dictionaries 227 | commands = json.loads(cleaned_text) 228 | 229 | # Check if the result is actually a list 230 | if not isinstance(commands, list): 231 | st.error("AI response was valid JSON, but not a list of commands.") 232 | # Return a chat message indicating the error for display 233 | return [{"action": "chat", "content": f"AI Error: Response was not a list. Response: {cleaned_text}"}] 234 | 235 | # Process each command dictionary in the list 236 | for command_data in commands: 237 | # Ensure the command is a dictionary before processing 238 | if not isinstance(command_data, dict): 239 | st.warning(f"AI sent an invalid command format (not a dict): {command_data}") 240 | executed_commands_list.append({"action": "chat", "content": f"AI Error: Invalid command format: {command_data}"}) 241 | continue # Skip to the next command 242 | 243 | # Add the command to the list we return (used for displaying AI actions) 244 | executed_commands_list.append(command_data) 245 | 246 | # Get action details from the dictionary 247 | action = command_data.get("action") 248 | filename = command_data.get("filename") 249 | content = command_data.get("content") 250 | 251 | # --- Execute the action --- 252 | if action == "create_update": 253 | if filename and content is not None: 254 | success = save_file(filename, content) 255 | if success: 256 | st.toast(f"AI saved: {filename}", icon="💾") 257 | # If this file is currently open in the editor, update the editor's content 258 | if st.session_state.selected_file == filename: 259 | st.session_state.file_content_on_load = content 260 | st.session_state.last_saved_content = content 261 | st.session_state.editor_unsaved_content = content 262 | else: 263 | st.error(f"AI command failed: Could not save '{filename}'.") 264 | # Add error details to chat display list 265 | executed_commands_list.append({"action": "chat", "content": f"Error: Failed saving {filename}"}) 266 | else: 267 | st.warning("AI 'create_update' command missing filename or content.") 268 | executed_commands_list.append({"action": "chat", "content": "AI Warning: Invalid create_update"}) 269 | 270 | elif action == "delete": 271 | if filename: 272 | success = delete_file(filename) 273 | if not success: 274 | st.error(f"AI command failed: Could not delete '{filename}'.") 275 | executed_commands_list.append({"action": "chat", "content": f"Error: Failed deleting {filename}"}) 276 | else: 277 | st.warning("AI 'delete' command missing filename.") 278 | executed_commands_list.append({"action": "chat", "content": "AI Warning: Invalid delete"}) 279 | 280 | elif action == "chat": 281 | # No action needed here, the chat message is already in executed_commands_list 282 | # and will be displayed in the chat history. 283 | pass 284 | 285 | else: 286 | # Handle unrecognized actions from the AI 287 | st.warning(f"AI sent unknown action: '{action}'.") 288 | executed_commands_list.append({"action": "chat", "content": f"AI Warning: Unknown action '{action}'"}) 289 | 290 | return executed_commands_list # Return the list for chat display 291 | 292 | except json.JSONDecodeError: 293 | st.error(f"AI response was not valid JSON.\nRaw response:\n```\n{cleaned_text}\n```") 294 | # Return a chat message indicating the JSON error for display 295 | return [{"action": "chat", "content": f"AI Error: Invalid JSON received. Response: {ai_response_text}"}] 296 | except Exception as e: 297 | st.error(f"Error processing AI commands: {e}") 298 | return [{"action": "chat", "content": f"Error processing commands: {e}"}] 299 | 300 | def _prepare_gemini_history(chat_history, system_prompt): 301 | """Formats chat history for the Gemini API call.""" 302 | gemini_history = [] 303 | # Start with the system prompt (instructions for the AI) 304 | gemini_history.append({"role": "user", "parts": [{"text": system_prompt}]}) 305 | # Gemini requires a model response to start the turn properly after a system prompt 306 | gemini_history.append({"role": "model", "parts": [{"text": json.dumps([{"action": "chat", "content": "Understood. I will respond only with JSON commands."}])}]}) 307 | 308 | # Add the actual user/assistant messages from session state 309 | for msg in chat_history: 310 | role = msg["role"] # "user" or "assistant" 311 | content = msg["content"] 312 | api_role = "model" if role == "assistant" else "user" # Map to API roles 313 | 314 | # Convert assistant messages (which are lists of commands) back to JSON strings 315 | if role == "assistant" and isinstance(content, list): 316 | try: 317 | content_str = json.dumps(content) 318 | except Exception: 319 | content_str = str(content) # Fallback if conversion fails 320 | else: 321 | content_str = str(content) # User messages are already strings 322 | 323 | if content_str: # Avoid sending empty messages 324 | gemini_history.append({"role": api_role, "parts": [{"text": content_str}]}) 325 | 326 | return gemini_history 327 | 328 | def ask_gemini_ai(chat_history): 329 | """Sends the conversation history to the Gemini AI and returns its response.""" 330 | 331 | # Get current list of files to include in the prompt context 332 | current_files = get_workspace_python_files() 333 | file_list_info = f"Current Python files: {', '.join(current_files) if current_files else 'None'}" 334 | # Update the system prompt with the current file list 335 | updated_system_prompt = GEMINI_SYSTEM_PROMPT.replace( 336 | "Current Python files: ...", # Placeholder text to replace 337 | file_list_info 338 | ) 339 | 340 | # Prepare the history in the format the API expects 341 | gemini_api_history = _prepare_gemini_history(chat_history, updated_system_prompt) 342 | 343 | try: 344 | # Make the API call to Google 345 | # print(f"DEBUG: Sending history:\n{json.dumps(gemini_api_history, indent=2)}") # Uncomment for debugging API calls 346 | response = model.generate_content(gemini_api_history) 347 | # print(f"DEBUG: Received response:\n{response.text}") # Uncomment for debugging API calls 348 | return response.text # Return the AI's raw text response 349 | 350 | except Exception as e: 351 | # Handle potential errors during the API call 352 | error_message = f"Gemini API call failed: {type(e).__name__}" 353 | st.error(f"🔴 {error_message}: {e}") 354 | 355 | # Try to give a more user-friendly error message for common issues 356 | error_content = f"AI Error: {str(e)[:150]}..." # Default message 357 | if "API key not valid" in str(e): 358 | error_content = "AI Error: Invalid Google API Key." 359 | elif "429" in str(e) or "quota" in str(e).lower() or "resource has been exhausted" in str(e).lower(): 360 | error_content = "AI Error: API Quota or Rate Limit Exceeded." 361 | # Handle cases where the AI's response might be blocked for safety 362 | try: 363 | if response and response.prompt_feedback and response.prompt_feedback.block_reason: 364 | error_content = f"AI Error: Input blocked by safety filters ({response.prompt_feedback.block_reason})." 365 | elif response and response.candidates and response.candidates[0].finish_reason != 'STOP': 366 | error_content = f"AI Error: Response stopped ({response.candidates[0].finish_reason}). May be due to safety filters or length limits." 367 | except Exception: 368 | pass # Ignore errors during safety check parsing 369 | 370 | # Return the error as a JSON chat command so it appears in the chat history 371 | return json.dumps([{"action": "chat", "content": error_content}]) 372 | 373 | # --- Live Preview Process Management --- 374 | def _find_available_port(): 375 | """Finds an unused network port.""" 376 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: 377 | s.bind(('', 0)) # Bind to port 0 to let the OS choose a free port 378 | return s.getsockname()[1] # Return the chosen port number 379 | 380 | def stop_preview(): 381 | """Stops the currently running Streamlit preview process.""" 382 | process_to_stop = st.session_state.get("preview_process") 383 | pid = getattr(process_to_stop, 'pid', None) # Get process ID if available 384 | 385 | if process_to_stop and pid: 386 | st.info(f"Stopping preview process (PID: {pid})...") 387 | try: 388 | # Check if the process is still running 389 | if process_to_stop.poll() is None: 390 | # Ask the process to terminate gracefully 391 | process_to_stop.terminate() 392 | try: 393 | # Wait up to 3 seconds for it to close 394 | process_to_stop.wait(timeout=3) 395 | st.toast(f"Preview process {pid} stopped.", icon="⏹️") 396 | except subprocess.TimeoutExpired: 397 | # If it didn't stop, force kill it 398 | st.warning(f"Preview process {pid} did not stop gracefully, killing...") 399 | if process_to_stop.poll() is None: # Check again before kill 400 | process_to_stop.kill() 401 | process_to_stop.wait(timeout=1) # Brief wait for kill 402 | st.toast(f"Preview process {pid} killed.", icon="💀") 403 | else: 404 | # Process was already finished 405 | st.warning(f"Preview process {pid} had already stopped.") 406 | except ProcessLookupError: 407 | st.warning(f"Preview process {pid} not found (already gone?).") 408 | except Exception as e: 409 | st.error(f"Error trying to stop preview process {pid}: {e}") 410 | 411 | # Always clear the preview state variables after attempting to stop 412 | st.session_state.preview_process = None 413 | st.session_state.preview_port = None 414 | st.session_state.preview_url = None 415 | st.session_state.preview_file = None 416 | st.rerun() # Update the UI immediately 417 | 418 | def start_preview(python_filename): 419 | """Starts a Streamlit app preview in a separate process.""" 420 | filepath = WORKSPACE_DIR / python_filename 421 | # Basic check: ensure the file exists and is a Python file 422 | if not filepath.is_file() or filepath.suffix != '.py': 423 | st.error(f"Cannot preview: '{python_filename}' is not a valid Python file.") 424 | return False 425 | 426 | # Stop any currently running preview first 427 | if st.session_state.get("preview_process"): 428 | st.warning("Stopping existing preview first...") 429 | stop_preview() # This function will rerun, so we might need to adjust flow 430 | # Let's add a small delay here AFTER stop_preview (which reruns) handles its part. 431 | # This might mean the button needs to be clicked twice sometimes, but simplifies state. 432 | # A more complex approach would involve flags in session state. 433 | time.sleep(0.5) # Brief pause 434 | 435 | with st.spinner(f"Starting preview for `{python_filename}`..."): 436 | try: 437 | port = _find_available_port() 438 | # Command to run: python -m streamlit run --port [options] 439 | command = [ 440 | sys.executable, # Use the same Python interpreter running this script 441 | "-m", "streamlit", "run", 442 | str(filepath.resolve()), # Use the full path to the file 443 | "--server.port", str(port), 444 | "--server.headless", "true", # Don't open a browser automatically 445 | "--server.runOnSave", "false", # Don't automatically rerun on save 446 | "--server.fileWatcherType", "none" # Don't watch for file changes 447 | ] 448 | 449 | # Start the command as a new process 450 | preview_proc = subprocess.Popen( 451 | command, 452 | stdout=subprocess.PIPE, # Capture output (optional) 453 | stderr=subprocess.PIPE, # Capture errors 454 | text=True, encoding='utf-8' 455 | ) 456 | 457 | # Give Streamlit a moment to start up or fail 458 | time.sleep(2.5) # Wait a bit 459 | 460 | # Check if the process started successfully (is still running) 461 | if preview_proc.poll() is None: 462 | # Success! Store process info in session state 463 | st.session_state.preview_process = preview_proc 464 | st.session_state.preview_port = port 465 | st.session_state.preview_url = f"http://localhost:{port}" 466 | st.session_state.preview_file = python_filename 467 | st.success(f"Preview started: {st.session_state.preview_url}") 468 | st.toast(f"Preview running for {python_filename}", icon="🚀") 469 | return True 470 | else: 471 | # Failure: Process ended quickly, likely an error 472 | st.error(f"Preview failed to start for `{python_filename}`.") 473 | # Try to show error output from the failed process 474 | try: 475 | stderr_output = preview_proc.stderr.read() 476 | if stderr_output: 477 | st.error("Preview Error Output:") 478 | st.code(stderr_output, language=None) 479 | else: # If no stderr, maybe there was stdout? 480 | stdout_output = preview_proc.stdout.read() 481 | if stdout_output: 482 | st.error("Preview Output (may contain errors):") 483 | st.code(stdout_output, language=None) 484 | except Exception as read_e: 485 | st.error(f"Could not read output from failed preview process: {read_e}") 486 | # Clear any partial state 487 | st.session_state.preview_process = None 488 | return False 489 | except Exception as e: 490 | st.error(f"Error trying to start preview process: {e}") 491 | st.session_state.preview_process = None # Ensure clean state 492 | return False 493 | 494 | # --- Streamlit App UI --- 495 | 496 | st.title("🤖 AI Streamlit App Generator") 497 | 498 | # --- Sidebar --- 499 | with st.sidebar: 500 | st.header("💬 Chat & Controls") 501 | st.divider() 502 | 503 | # --- Chat History Display --- 504 | chat_container = st.container(height=400) 505 | with chat_container: 506 | if not st.session_state.messages: 507 | st.info("Chat history is empty. Type your instructions below.") 508 | else: 509 | # Loop through messages stored in session state 510 | for message in st.session_state.messages: 511 | role = message["role"] # "user" or "assistant" 512 | content = message["content"] 513 | avatar = "🧑‍💻" if role == "user" else "🤖" 514 | 515 | # Display message using Streamlit's chat message element 516 | with st.chat_message(role, avatar=avatar): 517 | if role == "assistant" and isinstance(content, list): 518 | # Assistant message contains commands - format them nicely 519 | file_actions_summary = "" 520 | chat_responses = [] 521 | code_snippets = [] 522 | 523 | for command in content: 524 | if not isinstance(command, dict): continue # Skip malformed 525 | 526 | action = command.get("action") 527 | filename = command.get("filename") 528 | cmd_content = command.get("content") 529 | 530 | if action == "create_update": 531 | file_actions_summary += f"📝 **Saved:** `{filename}`\n" 532 | if cmd_content: 533 | code_snippets.append({"filename": filename, "content": cmd_content}) 534 | elif action == "delete": 535 | file_actions_summary += f"🗑️ **Deleted:** `{filename}`\n" 536 | elif action == "chat": 537 | chat_responses.append(str(cmd_content or "...")) 538 | else: 539 | file_actions_summary += f"⚠️ **Unknown Action:** `{action}`\n" 540 | 541 | # Display the formatted summary and chat responses 542 | full_display_text = (file_actions_summary + "\n".join(chat_responses)).strip() 543 | if full_display_text: 544 | st.markdown(full_display_text) 545 | else: # Handle cases where AI might return empty actions 546 | st.markdown("(AI performed no displayable actions)") 547 | 548 | # Show code snippets in collapsible sections 549 | for snippet in code_snippets: 550 | with st.expander(f"View Code for `{snippet['filename']}`", expanded=False): 551 | st.code(snippet['content'], language="python") 552 | 553 | elif isinstance(content, str): 554 | # Simple text message (from user or AI chat action) 555 | st.write(content) 556 | else: 557 | # Fallback for unexpected content type 558 | st.write(f"Unexpected message format: {content}") 559 | 560 | # --- Chat Input Box --- 561 | user_prompt = st.chat_input("Tell the AI what to do (e.g., 'Create hello.py')") 562 | if user_prompt: 563 | # 1. Add user's message to the chat history (in session state) 564 | st.session_state.messages.append({"role": "user", "content": user_prompt}) 565 | 566 | # 2. Show a spinner while waiting for the AI 567 | with st.spinner("🧠 AI Thinking..."): 568 | # 3. Send the *entire* chat history to the AI 569 | ai_response_text = ask_gemini_ai(st.session_state.messages) 570 | # 4. Parse the AI's response and execute file commands 571 | ai_commands_executed = parse_and_execute_ai_commands(ai_response_text) 572 | 573 | # 5. Add the AI's response (the list of executed commands) to chat history 574 | st.session_state.messages.append({"role": "assistant", "content": ai_commands_executed}) 575 | 576 | # 6. Rerun the script immediately to show the new messages and update file list/editor 577 | st.rerun() 578 | 579 | st.divider() 580 | 581 | # --- Status Info --- 582 | st.subheader("Status & Info") 583 | st.success(f"Using AI model: {GEMINI_MODEL_NAME}", icon="✅") 584 | st.warning( 585 | "**Notes:** Review AI code before running previews. `create_update` overwrites files.", 586 | ) 587 | 588 | 589 | # --- Main Area Tabs --- 590 | selected_tab = option_menu( 591 | menu_title=None, 592 | options=["Workspace", "Live Preview"], 593 | icons=["folder-fill", "play-btn-fill"], 594 | orientation="horizontal", 595 | key="main_tab_menu" 596 | # Removed custom styles for simplicity 597 | ) 598 | 599 | # --- Workspace Tab --- 600 | if selected_tab == "Workspace": 601 | st.header("📂 Workspace & Editor") 602 | st.divider() 603 | 604 | # Create two columns: one for file list, one for editor 605 | file_list_col, editor_col = st.columns([0.3, 0.7]) # 30% width for files, 70% for editor 606 | 607 | with file_list_col: 608 | st.subheader("Files") 609 | python_files = get_workspace_python_files() 610 | 611 | # Prepare options for the dropdown menu 612 | select_options = ["--- Select a file ---"] + python_files 613 | current_selection_in_state = st.session_state.get("selected_file") 614 | 615 | # Find the index of the currently selected file to set the dropdown default 616 | try: 617 | current_index = select_options.index(current_selection_in_state) if current_selection_in_state else 0 618 | except ValueError: 619 | current_index = 0 # If file in state doesn't exist, default to "Select" 620 | 621 | # The dropdown widget 622 | selected_option = st.selectbox( 623 | "Edit file:", 624 | options=select_options, 625 | index=current_index, 626 | key="file_selector_dropdown", 627 | label_visibility="collapsed" # Hide the label "Edit file:" 628 | ) 629 | 630 | # --- Handle File Selection Change --- 631 | # If the dropdown selection is different from what's stored in session state... 632 | newly_selected_filename = selected_option if selected_option != "--- Select a file ---" else None 633 | if newly_selected_filename != current_selection_in_state: 634 | st.session_state.selected_file = newly_selected_filename # Update state 635 | # Read the content of the newly selected file 636 | file_content = read_file(newly_selected_filename) if newly_selected_filename else "" 637 | # Handle case where file read failed (e.g., it was deleted) 638 | if file_content is None and newly_selected_filename: 639 | file_content = f"# ERROR: Could not read file '{newly_selected_filename}'" 640 | 641 | # Update session state with the file's content for the editor 642 | st.session_state.file_content_on_load = file_content 643 | st.session_state.editor_unsaved_content = file_content # Start editor with file content 644 | st.session_state.last_saved_content = file_content # Mark as saved initially 645 | st.rerun() # Rerun script to load the new file into the editor 646 | 647 | with editor_col: 648 | st.subheader("Code Editor") 649 | selected_filename = st.session_state.selected_file 650 | 651 | if selected_filename: 652 | st.caption(f"Editing: `{selected_filename}`") 653 | 654 | # Display the Ace code editor widget 655 | editor_current_text = st_ace( 656 | value=st.session_state.get('editor_unsaved_content', ''), # Show unsaved content 657 | language="python", 658 | theme=ACE_DEFAULT_THEME, 659 | keybinding=ACE_DEFAULT_KEYBINDING, 660 | font_size=14, tab_size=4, wrap=True, 661 | auto_update=False, # Don't trigger reruns on every keystroke 662 | key=f"ace_editor_{selected_filename}" # Unique key helps reset state on file change 663 | ) 664 | 665 | # Check if the editor's current text is different from the last saved text 666 | has_unsaved_changes = (editor_current_text != st.session_state.last_saved_content) 667 | 668 | # If the text in the editor box changes, update our 'unsaved' state variable 669 | if editor_current_text != st.session_state.editor_unsaved_content: 670 | st.session_state.editor_unsaved_content = editor_current_text 671 | st.rerun() # Rerun to update the 'Save Changes' button state 672 | 673 | # --- Editor Action Buttons --- 674 | # Using sac.buttons here for the nice grouped layout with icons. 675 | editor_buttons = [ 676 | sac.ButtonsItem(label="💾 Save Changes", icon="save", disabled=not has_unsaved_changes), 677 | sac.ButtonsItem(label="🗑️ Delete File", icon="trash", color="red"), 678 | ] 679 | clicked_editor_button = sac.buttons( 680 | items=editor_buttons, index=None, format_func='title', 681 | align='end', size='small', return_index=False, 682 | key="editor_action_buttons" 683 | ) 684 | 685 | # --- Handle Button Clicks --- 686 | if clicked_editor_button == "💾 Save Changes": 687 | if save_file(selected_filename, editor_current_text): 688 | # Update state to reflect the save 689 | st.session_state.file_content_on_load = editor_current_text 690 | st.session_state.last_saved_content = editor_current_text 691 | st.toast(f"Saved: `{selected_filename}`", icon="💾") 692 | time.sleep(0.5) # Let toast message show 693 | st.rerun() # Rerun to disable the save button 694 | else: 695 | st.error(f"Error: Could not save '{selected_filename}'.") 696 | 697 | elif clicked_editor_button == "🗑️ Delete File": 698 | # Use sac.confirm_button for a confirmation pop-up 699 | needs_confirmation = True # Flag to show confirmation 700 | if needs_confirmation: 701 | confirmed = sac.confirm_button( 702 | f"Delete `{selected_filename}`?", # Confirmation message 703 | color="error", key="confirm_delete_button" 704 | ) 705 | if confirmed: 706 | if delete_file(selected_filename): 707 | # Deletion successful, file list and editor will update on rerun 708 | st.rerun() 709 | # No 'else' needed, delete_file shows errors 710 | 711 | # Show a warning if there are unsaved changes 712 | if has_unsaved_changes: 713 | st.warning("You have unsaved changes.") 714 | 715 | else: 716 | # Show a placeholder message if no file is selected 717 | st.info("Select a Python file from the list on the left to view or edit.") 718 | st_ace(value="# Select a file...", language="python", readonly=True, key="ace_placeholder") 719 | 720 | # --- Live Preview Tab --- 721 | elif selected_tab == "Live Preview": 722 | st.header("▶️ Live Preview") 723 | st.divider() 724 | st.warning("⚠️ Running AI-generated code can have unintended consequences. Review code first!") 725 | 726 | # Get preview status from session state 727 | is_preview_running = st.session_state.get("preview_process") is not None 728 | file_being_previewed = st.session_state.get("preview_file") 729 | preview_url = st.session_state.get("preview_url") 730 | selected_file_for_preview = st.session_state.get("selected_file") # File selected in Workspace 731 | 732 | # --- Preview Controls --- 733 | st.subheader("Controls") 734 | if not selected_file_for_preview: 735 | st.info("Select a file in the 'Workspace' tab to enable preview controls.") 736 | # Allow stopping a preview even if no file is selected 737 | if is_preview_running: 738 | st.warning(f"Preview is running for: `{file_being_previewed}`") 739 | if st.button(f"⏹️ Stop Preview ({file_being_previewed})", key="stop_other_preview"): 740 | stop_preview() # Will stop and rerun 741 | else: 742 | # Controls for the file selected in the Workspace 743 | st.write(f"File selected for preview: `{selected_file_for_preview}`") 744 | is_python = selected_file_for_preview.endswith(".py") 745 | 746 | if not is_python: 747 | st.error("Cannot preview: Selected file is not a Python (.py) file.") 748 | else: 749 | # Layout Run and Stop buttons side-by-side 750 | run_col, stop_col = st.columns(2) 751 | with run_col: 752 | # Disable Run button if a preview is already running 753 | run_disabled = is_preview_running 754 | if st.button("🚀 Run Preview", disabled=run_disabled, type="primary", use_container_width=True): 755 | if start_preview(selected_file_for_preview): 756 | st.rerun() # Rerun to show the preview iframe 757 | with stop_col: 758 | # Disable Stop button if no preview is running OR if the running preview 759 | # is for a DIFFERENT file than the one currently selected in the workspace. 760 | stop_disabled = not is_preview_running or (file_being_previewed != selected_file_for_preview) 761 | if st.button("⏹️ Stop Preview", disabled=stop_disabled, use_container_width=True): 762 | stop_preview() # Will stop and rerun 763 | 764 | st.divider() 765 | 766 | # --- Preview Display --- 767 | st.subheader("Preview Window") 768 | if is_preview_running: 769 | # Check if the running preview matches the file selected in the workspace 770 | if file_being_previewed == selected_file_for_preview: 771 | st.info(f"Showing preview for `{file_being_previewed}`") 772 | st.caption(f"URL: {preview_url}") 773 | # Check if the process is still alive before showing iframe 774 | live_process = st.session_state.preview_process 775 | if live_process and live_process.poll() is None: 776 | # Display the running Streamlit app in an iframe 777 | st.components.v1.iframe(preview_url, height=600, scrolling=True) 778 | else: 779 | # The process died unexpectedly 780 | st.warning(f"Preview for `{file_being_previewed}` stopped unexpectedly.") 781 | # Attempt to show error output if available 782 | if live_process: 783 | try: 784 | stderr = live_process.stderr.read() 785 | if stderr: 786 | with st.expander("Show error output from stopped process"): st.code(stderr) 787 | except Exception: pass # Ignore errors reading output 788 | # Clear the dead process state (stop_preview handles this and reruns) 789 | if live_process: # Check again in case state changed 790 | stop_preview() 791 | else: 792 | # A preview is running, but not for the file selected in the workspace 793 | st.warning(f"Preview is running for `{file_being_previewed}`. Select that file in the Workspace to see it here, or stop it using the controls above.") 794 | else: 795 | # No preview is currently running 796 | st.info("Click 'Run Preview' on a selected Python file to see it here.") -------------------------------------------------------------------------------- /fast.py: -------------------------------------------------------------------------------- 1 | ## Deep Charts Youtube Channel: https://www.youtube.com/@DeepCharts 2 | ## Subscribe for more AI/Machine Learning/Data Science Tutorials 3 | 4 | ################################## 5 | ## 1. Data Import 6 | ################################## 7 | 8 | import os 9 | import markdown 10 | import pandas as pd 11 | from fasthtml.common import * 12 | from fastcore.basics import NotStr 13 | import plotly.express as px 14 | import nfl_data_py as nfl 15 | 16 | 17 | 18 | ################################## 19 | ## 2. Initialize FastHTML app 20 | ################################## 21 | 22 | app, rt = fast_app() 23 | 24 | 25 | 26 | ################################## 27 | ## 3. Input and Process Markdown Blog Files 28 | ################################## 29 | 30 | # Directory containing Markdown files 31 | POSTS_DIR = 'posts' 32 | 33 | # Load and convert Markdown files to HTML 34 | def load_posts(): 35 | posts = [] 36 | # List all Markdown files with their full paths 37 | md_files = [os.path.join(POSTS_DIR, f) for f in os.listdir(POSTS_DIR) if f.endswith('.md')] 38 | # Sort files by last modified time in descending order 39 | md_files.sort(key=os.path.getmtime, reverse=True) 40 | for filepath in md_files: 41 | with open(filepath, 'r', encoding='utf-8') as file: 42 | html_content = markdown.markdown(file.read()) 43 | title = os.path.basename(filepath).replace('_', ' ').replace('.md', '').title() 44 | posts.append({"title": title, "content": html_content}) 45 | return posts 46 | 47 | 48 | 49 | ################################## 50 | ## 4. Function to import, wrangle, and graph data 51 | ################################## 52 | 53 | # Generate NFL Cumulative Offensive Yards Chart 54 | def generate_offensive_yards_chart(): 55 | # Fetch play-by-play data for the 2024 season 56 | df = nfl.import_pbp_data([2024]) 57 | 58 | # Filter for rushing and passing plays 59 | rushing_plays = df[df['play_type'] == 'run'] 60 | passing_plays = df[df['play_type'] == 'pass'] 61 | 62 | # Group by offensive team and week, then sum yards gained 63 | weekly_rushing_yards = rushing_plays.groupby(['posteam', 'week'])['yards_gained'].sum().reset_index() 64 | weekly_passing_yards = passing_plays.groupby(['posteam', 'week'])['yards_gained'].sum().reset_index() 65 | 66 | # Add a 'play_type' column 67 | weekly_rushing_yards['play_type'] = 'Rushing' 68 | weekly_passing_yards['play_type'] = 'Passing' 69 | 70 | # Combine the dataframes 71 | combined_df = pd.concat([weekly_rushing_yards, weekly_passing_yards]) 72 | 73 | # Pivot the table to have teams as columns and weeks as rows 74 | pivot_df = combined_df.pivot_table(index='week', columns=['posteam', 'play_type'], values='yards_gained', fill_value=0) 75 | 76 | # Calculate cumulative yards 77 | cumulative_yards = pivot_df.cumsum() 78 | 79 | # Reset index for plotting 80 | cumulative_yards = cumulative_yards.reset_index() 81 | cumulative_yards.columns = ['week'] + [f'{team}_{ptype}' for team, ptype in cumulative_yards.columns[1:]] 82 | 83 | # Melt the dataframe for Plotly Express 84 | melted_df = cumulative_yards.melt(id_vars=['week'], var_name='team_playtype', value_name='cumulative_yards') 85 | melted_df[['team', 'play_type']] = melted_df['team_playtype'].str.split('_', expand=True) 86 | 87 | # Create Plotly Express figure 88 | fig = px.line(melted_df, x='week', y='cumulative_yards', color='team', facet_col='play_type', 89 | title='Cumulative Offensive Yards by Week (2024 Season)', 90 | labels={'week': 'Week', 'cumulative_yards': 'Cumulative Yards'}, 91 | category_orders={'play_type': ['Rushing', 'Passing']}) 92 | 93 | fig.update_layout(legend_title_text='Team') 94 | fig.update_xaxes(type='category') 95 | 96 | return fig.to_html(full_html=False, include_plotlyjs='cdn') 97 | 98 | 99 | 100 | ################################## 101 | ## 5. Homepage Route for Content Layout 102 | ################################## 103 | 104 | @rt('/') 105 | def home(): 106 | posts = load_posts() 107 | chart_html = generate_offensive_yards_chart() 108 | 109 | # Create a list of article components for each post 110 | article_posts = [ 111 | Article( 112 | H1(post['title'], cls='post-title'), 113 | Div(NotStr(post['content'])) 114 | ) 115 | for post in posts 116 | ] 117 | return Html( 118 | Head( 119 | Title('Deep Charts: NFL Yards Tracker'), 120 | Link(rel='stylesheet', href='https://cdn.jsdelivr.net/npm/@picocss/pico@latest/css/pico.min.css'), 121 | Style(""" 122 | .header { 123 | text-align: center; 124 | padding: 1em; 125 | background-color: #f8f9fa; 126 | position: fixed; 127 | top: 0; 128 | width: 100%; 129 | z-index: 10; 130 | } 131 | .container { 132 | display: flex; 133 | max-width: 100%; 134 | margin-top: 80px; /* Space for the fixed header */ 135 | } 136 | .posts { 137 | flex: 2; 138 | overflow-y: auto; 139 | height: calc(100vh - 80px); /* Adjust for header */ 140 | padding: 1em; 141 | margin-right: 40%; 142 | box-sizing: border-box; 143 | } 144 | .chart { 145 | flex: 1; 146 | position: fixed; 147 | right: 0; 148 | top: 80px; /* Space for the fixed header */ 149 | width: 40%; 150 | height: calc(100vh - 80px); /* Adjust for header */ 151 | padding: 1em; 152 | box-sizing: border-box; 153 | } 154 | h1.post-title { 155 | font-size: 1.5em; 156 | font-weight: bold; 157 | } 158 | article { 159 | margin-bottom: 2em; 160 | } 161 | """) 162 | ), 163 | Body( 164 | Div( 165 | H1('Deep Charts: NFL Yards Tracker', cls='header'), 166 | Div( 167 | Div(*article_posts, cls="posts"), 168 | Div(NotStr(chart_html), cls="chart"), 169 | cls="container" 170 | ) 171 | ) 172 | ) 173 | ) 174 | 175 | 176 | 177 | ################################## 178 | ## 6. Serve the App 179 | ################################## 180 | 181 | serve() 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | -------------------------------------------------------------------------------- /scikit-ollama-tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Source: Deep Charts Youtube Channel: https://www.youtube.com/@DeepCharts" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# AI Sentiment Analysis with Ollama and Scikit-Ollama" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## Import Libraries" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import pandas as pd\n", 31 | "from finvizfinance.quote import finvizfinance\n", 32 | "from skollama.models.ollama.classification.zero_shot import ZeroShotOllamaClassifier\n", 33 | "from skollama.models.ollama.classification.few_shot import FewShotOllamaClassifier" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## Pull Stock News Headline Data" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "# Initialize the finvizfinance object for INTC\n", 50 | "stock = finvizfinance('INTC')\n", 51 | "\n", 52 | "# Fetch the latest news articles\n", 53 | "news_df = stock.ticker_news()\n", 54 | "\n", 55 | "news_df.head()" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "Data Wrangling (Reorder dataframe, remove headlines without company name in headline)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "# Reorder Columns\n", 72 | "news_df = news_df[['Date','Link','Title']]\n", 73 | "\n", 74 | "# Define the keywords to filter by\n", 75 | "keywords = ['INTC', 'Intel']\n", 76 | "\n", 77 | "# Create a regex pattern by joining keywords with '|'\n", 78 | "pattern = '|'.join(keywords)\n", 79 | "\n", 80 | "# Filter the DataFrame using str.contains\n", 81 | "filtered_news_df = news_df[news_df['Title'].str.contains(pattern, case=False, na=False)]\n", 82 | "\n", 83 | "filtered_news_df.head()" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "## Run Zero Shot Classifier" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "# Initialize the ZeroShotOllamaClassifier\n", 100 | "clf = ZeroShotOllamaClassifier(model='llama3')\n", 101 | "\n", 102 | "# Define the candidate labels\n", 103 | "candidate_labels = ['positive', 'negative', 'neutral']\n", 104 | "\n", 105 | "# Fit the classifier (no training data needed for zero-shot)\n", 106 | "clf.fit(None, candidate_labels)\n", 107 | "\n", 108 | "# Predict the sentiment of each news title as a new colum in our DataFrame\n", 109 | "filtered_news_df['Sentiment_zero'] = clf.predict(filtered_news_df['Title'])\n" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "filtered_news_df[['Title','Sentiment_zero']]" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "## Train and Run Few Shot Classifier" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "1. Start by randomly selecting a few training examples from the original dataset" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "# Randomly select 6 headlines for few-shot training and add a training indicator\n", 142 | "few_shot_df = filtered_news_df.sample(n=7, random_state=1)\n", 143 | "filtered_news_df['Few Shot Training Example'] = filtered_news_df.index.isin(few_shot_df.index)\n", 144 | "\n", 145 | "# View training examples\n", 146 | "list(few_shot_df['Title'])" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "2. Manually review each training example and give human guided label assignment" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 7, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "# Manually assigned labels corresponding to the selected headlines\n", 163 | "# Fill in below based on above headlines\n", 164 | "user_labels = [\n", 165 | " 'neutral',\n", 166 | " 'negative',\n", 167 | " 'neutral',\n", 168 | " 'positive',\n", 169 | " 'positive',\n", 170 | " 'neutral',\n", 171 | " 'positive'\n", 172 | "]\n", 173 | "\n", 174 | "# Add the user-provided labels to the few-shot DataFrame\n", 175 | "few_shot_df['User_Sentiment'] = user_labels" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "3. Initialize and run few shot classifier on the rest of the dataset" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "# Initialize the FewShotOllamaClassifier\n", 192 | "few_shot_clf = FewShotOllamaClassifier(model='llama3')\n", 193 | "\n", 194 | "# Fit the classifier with user-provided examples directly from the DataFrame columns\n", 195 | "few_shot_clf.fit(few_shot_df['Title'], few_shot_df['User_Sentiment'])\n", 196 | "\n", 197 | "# Predict the sentiment of all news titles in the filtered DataFrame\n", 198 | "filtered_news_df['Sentiment_few'] = few_shot_clf.predict(filtered_news_df['Title'])\n" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "filtered_news_df_2 = filtered_news_df[['Title','Sentiment_zero','Sentiment_few','Few Shot Training Example']]\n", 208 | "filtered_news_df_2" 209 | ] 210 | } 211 | ], 212 | "metadata": { 213 | "kernelspec": { 214 | "display_name": "general_env", 215 | "language": "python", 216 | "name": "python3" 217 | }, 218 | "language_info": { 219 | "codemirror_mode": { 220 | "name": "ipython", 221 | "version": 3 222 | }, 223 | "file_extension": ".py", 224 | "mimetype": "text/x-python", 225 | "name": "python", 226 | "nbconvert_exporter": "python", 227 | "pygments_lexer": "ipython3", 228 | "version": "3.12.3" 229 | } 230 | }, 231 | "nbformat": 4, 232 | "nbformat_minor": 2 233 | } 234 | -------------------------------------------------------------------------------- /stock_sentiment_agents.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Stock Sentiment Agent Workflow" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts)" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "## LIbraries\n", 24 | "from phi.agent import Agent\n", 25 | "from phi.model.openai import OpenAIChat\n", 26 | "from phi.tools.googlesearch import GoogleSearch\n", 27 | "from phi.tools.yfinance import YFinanceTools" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "## Put Open AI API key into Python environment\n", 37 | "import os\n", 38 | "os.environ[\"OPENAI_API_KEY\"] = 'sk-xxxxxxxx'" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "application/vnd.jupyter.widget-view+json": { 49 | "model_id": "ed0b90db992446c4a4d7a9625f5ed101", 50 | "version_major": 2, 51 | "version_minor": 0 52 | }, 53 | "text/plain": [ 54 | "Output()" 55 | ] 56 | }, 57 | "metadata": {}, 58 | "output_type": "display_data" 59 | }, 60 | { 61 | "data": { 62 | "text/html": [ 63 | "
\n"
 64 |       ],
 65 |       "text/plain": []
 66 |      },
 67 |      "metadata": {},
 68 |      "output_type": "display_data"
 69 |     }
 70 |    ],
 71 |    "source": [
 72 |     "## Create Agents\n",
 73 |     "\n",
 74 |     "# Sentiment Agent\n",
 75 |     "sentiment_agent = Agent(\n",
 76 |     "    name=\"Sentiment Agent\",\n",
 77 |     "    role=\"Search and interpret news articles.\",\n",
 78 |     "    model=OpenAIChat(id=\"gpt-4o\"),\n",
 79 |     "    tools=[GoogleSearch()],\n",
 80 |     "    instructions=[\n",
 81 |     "        \"Find relevant news articles for each company and analyze the sentiment.\",\n",
 82 |     "        \"Provide sentiment scores from 1 (negative) to 10 (positive) with reasoning and sources.\"\n",
 83 |     "        \"Cite your sources. Be specific and provide links.\"\n",
 84 |     "    ],\n",
 85 |     "    show_tool_calls=True,\n",
 86 |     "    markdown=True,\n",
 87 |     ")\n",
 88 |     "\n",
 89 |     "# Finance Agent\n",
 90 |     "finance_agent = Agent(\n",
 91 |     "    name=\"Finance Agent\",\n",
 92 |     "    role=\"Get financial data and interpret trends.\",\n",
 93 |     "    model=OpenAIChat(id=\"gpt-4o\"),\n",
 94 |     "    tools=[YFinanceTools(stock_price=True, analyst_recommendations=True, company_info=True)],\n",
 95 |     "    instructions=[\n",
 96 |     "        \"Retrieve stock prices, analyst recommendations, and key financial data.\",\n",
 97 |     "        \"Focus on trends and present the data in tables with key insights.\"\n",
 98 |     "    ],\n",
 99 |     "    show_tool_calls=True,\n",
100 |     "    markdown=True,\n",
101 |     ")\n",
102 |     "\n",
103 |     "# Analyst Agent\n",
104 |     "analyst_agent = Agent(\n",
105 |     "    name=\"Analyst Agent\",\n",
106 |     "    role=\"Ensure thoroughness and draw conclusions.\",\n",
107 |     "    model=OpenAIChat(id=\"gpt-4o\"),\n",
108 |     "    instructions=[\n",
109 |     "        \"Check outputs for accuracy and completeness.\",\n",
110 |     "        \"Synthesize data to provide a final sentiment score (1-10) with justification.\"\n",
111 |     "    ],\n",
112 |     "    show_tool_calls=True,\n",
113 |     "    markdown=True,\n",
114 |     ")\n",
115 |     "\n",
116 |     "# Team of Agents\n",
117 |     "agent_team = Agent(\n",
118 |     "    model=OpenAIChat(id=\"gpt-4o\"),\n",
119 |     "    team=[sentiment_agent, finance_agent, analyst_agent],\n",
120 |     "    instructions=[\n",
121 |     "        \"Combine the expertise of all agents to provide a cohesive, well-supported response.\",\n",
122 |     "        \"Always include references and dates for all data points and sources.\",\n",
123 |     "        \"Present all data in structured tables for clarity.\",\n",
124 |     "        \"Explain the methodology used to arrive at the sentiment scores.\"\n",
125 |     "    ],\n",
126 |     "    show_tool_calls=True,\n",
127 |     "    markdown=True,\n",
128 |     ")\n",
129 |     "\n",
130 |     "## Run Agent Team\n",
131 |     "\n",
132 |     "# Final Prompt\n",
133 |     "agent_team.print_response(\n",
134 |     "    \"Analyze the sentiment for the following companies during the week of December 2nd-6th, 2024: NVDA, MSFT. \\n\\n\"\n",
135 |     "    \"1. **Sentiment Analysis**: Search for relevant news articles and interpret th–e sentiment for each company. Provide sentiment scores on a scale of 1 to 10, explain your reasoning, and cite your sources.\\n\\n\"\n",
136 |     "    \"2. **Financial Data**: Analyze stock price movements, analyst recommendations, and any notable financial data. Highlight key trends or events, and present the data in tables.\\n\\n\"\n",
137 |     "    \"3. **Consolidated Analysis**: Combine the insights from sentiment analysis and financial data to assign a final sentiment score (1-10) for each company. Justify the scores and provide a summary of the most important findings.\\n\\n\"\n",
138 |     "    \"Ensure your response is accurate, comprehensive, and includes references to sources with publication dates.\",\n",
139 |     "    stream=True\n",
140 |     ")"
141 |    ]
142 |   }
143 |  ],
144 |  "metadata": {
145 |   "kernelspec": {
146 |    "display_name": "general_env",
147 |    "language": "python",
148 |    "name": "python3"
149 |   },
150 |   "language_info": {
151 |    "codemirror_mode": {
152 |     "name": "ipython",
153 |     "version": 3
154 |    },
155 |    "file_extension": ".py",
156 |    "mimetype": "text/x-python",
157 |    "name": "python",
158 |    "nbconvert_exporter": "python",
159 |    "pygments_lexer": "ipython3",
160 |    "version": "3.12.3"
161 |   }
162 |  },
163 |  "nbformat": 4,
164 |  "nbformat_minor": 2
165 | }
166 | 


--------------------------------------------------------------------------------
/stocks_dashboard.py:
--------------------------------------------------------------------------------
  1 | # Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts)
  2 | 
  3 | import streamlit as st
  4 | import plotly.express as px
  5 | import plotly.graph_objects as go
  6 | import pandas as pd
  7 | import yfinance as yf
  8 | from datetime import datetime, timedelta
  9 | import pytz
 10 | import ta
 11 | 
 12 | ##########################################################################################
 13 | ## PART 1: Define Functions for Pulling, Processing, and Creating Techincial Indicators ##
 14 | ##########################################################################################
 15 | 
 16 | # Fetch stock data based on the ticker, period, and interval
 17 | def fetch_stock_data(ticker, period, interval):
 18 |     end_date = datetime.now()
 19 |     if period == '1wk':
 20 |         start_date = end_date - timedelta(days=7)
 21 |         data = yf.download(ticker, start=start_date, end=end_date, interval=interval)
 22 |     else:
 23 |         data = yf.download(ticker, period=period, interval=interval)
 24 |     return data
 25 | 
 26 | # Process data to ensure it is timezone-aware and has the correct format
 27 | def process_data(data):
 28 |     if data.index.tzinfo is None:
 29 |         data.index = data.index.tz_localize('UTC')
 30 |     data.index = data.index.tz_convert('US/Eastern')
 31 |     data.reset_index(inplace=True)
 32 |     data.rename(columns={'Date': 'Datetime'}, inplace=True)
 33 |     return data
 34 | 
 35 | # Calculate basic metrics from the stock data
 36 | def calculate_metrics(data):
 37 |     last_close = data['Close'].iloc[-1]
 38 |     prev_close = data['Close'].iloc[0]
 39 |     change = last_close - prev_close
 40 |     pct_change = (change / prev_close) * 100
 41 |     high = data['High'].max()
 42 |     low = data['Low'].min()
 43 |     volume = data['Volume'].sum()
 44 |     return last_close, change, pct_change, high, low, volume
 45 | 
 46 | # Add simple moving average (SMA) and exponential moving average (EMA) indicators
 47 | def add_technical_indicators(data):
 48 |     data['SMA_20'] = ta.trend.sma_indicator(data['Close'], window=20)
 49 |     data['EMA_20'] = ta.trend.ema_indicator(data['Close'], window=20)
 50 |     return data
 51 | 
 52 | ###############################################
 53 | ## PART 2: Creating the Dashboard App layout ##
 54 | ###############################################
 55 | 
 56 | 
 57 | # Set up Streamlit page layout
 58 | st.set_page_config(layout="wide")
 59 | st.title('Real Time Stock Dashboard')
 60 | 
 61 | 
 62 | # 2A: SIDEBAR PARAMETERS ############
 63 | 
 64 | # Sidebar for user input parameters
 65 | st.sidebar.header('Chart Parameters')
 66 | ticker = st.sidebar.text_input('Ticker', 'ADBE')
 67 | time_period = st.sidebar.selectbox('Time Period', ['1d', '1wk', '1mo', '1y', 'max'])
 68 | chart_type = st.sidebar.selectbox('Chart Type', ['Candlestick', 'Line'])
 69 | indicators = st.sidebar.multiselect('Technical Indicators', ['SMA 20', 'EMA 20'])
 70 | 
 71 | # Mapping of time periods to data intervals
 72 | interval_mapping = {
 73 |     '1d': '1m',
 74 |     '1wk': '30m',
 75 |     '1mo': '1d',
 76 |     '1y': '1wk',
 77 |     'max': '1wk'
 78 | }
 79 | 
 80 | 
 81 | # 2B: MAIN CONTENT AREA ############
 82 | 
 83 | # Update the dashboard based on user input
 84 | if st.sidebar.button('Update'):
 85 |     data = fetch_stock_data(ticker, time_period, interval_mapping[time_period])
 86 |     data = process_data(data)
 87 |     data = add_technical_indicators(data)
 88 |     
 89 |     last_close, change, pct_change, high, low, volume = calculate_metrics(data)
 90 |     
 91 |     # Display main metrics
 92 |     st.metric(label=f"{ticker} Last Price", value=f"{last_close:.2f} USD", delta=f"{change:.2f} ({pct_change:.2f}%)")
 93 |     
 94 |     col1, col2, col3 = st.columns(3)
 95 |     col1.metric("High", f"{high:.2f} USD")
 96 |     col2.metric("Low", f"{low:.2f} USD")
 97 |     col3.metric("Volume", f"{volume:,}")
 98 |     
 99 |     # Plot the stock price chart
100 |     fig = go.Figure()
101 |     if chart_type == 'Candlestick':
102 |         fig.add_trace(go.Candlestick(x=data['Datetime'],
103 |                                      open=data['Open'],
104 |                                      high=data['High'],
105 |                                      low=data['Low'],
106 |                                      close=data['Close']))
107 |     else:
108 |         fig = px.line(data, x='Datetime', y='Close')
109 |     
110 |     # Add selected technical indicators to the chart
111 |     for indicator in indicators:
112 |         if indicator == 'SMA 20':
113 |             fig.add_trace(go.Scatter(x=data['Datetime'], y=data['SMA_20'], name='SMA 20'))
114 |         elif indicator == 'EMA 20':
115 |             fig.add_trace(go.Scatter(x=data['Datetime'], y=data['EMA_20'], name='EMA 20'))
116 |     
117 |     # Format graph
118 |     fig.update_layout(title=f'{ticker} {time_period.upper()} Chart',
119 |                       xaxis_title='Time',
120 |                       yaxis_title='Price (USD)',
121 |                       height=600)
122 |     st.plotly_chart(fig, use_container_width=True)
123 |     
124 |     # Display historical data and technical indicators
125 |     st.subheader('Historical Data')
126 |     st.dataframe(data[['Datetime', 'Open', 'High', 'Low', 'Close', 'Volume']])
127 |     
128 |     st.subheader('Technical Indicators')
129 |     st.dataframe(data[['Datetime', 'SMA_20', 'EMA_20']])
130 | 
131 | 
132 | # 2C: SIDEBAR PRICES ############
133 | 
134 | # Sidebar section for real-time stock prices of selected symbols
135 | st.sidebar.header('Real-Time Stock Prices')
136 | stock_symbols = ['AAPL', 'GOOGL', 'AMZN', 'MSFT']
137 | for symbol in stock_symbols:
138 |     real_time_data = fetch_stock_data(symbol, '1d', '1m')
139 |     if not real_time_data.empty:
140 |         real_time_data = process_data(real_time_data)
141 |         last_price = real_time_data['Close'].iloc[-1]
142 |         change = last_price - real_time_data['Open'].iloc[0]
143 |         pct_change = (change / real_time_data['Open'].iloc[0]) * 100
144 |         st.sidebar.metric(f"{symbol}", f"{last_price:.2f} USD", f"{change:.2f} ({pct_change:.2f}%)")
145 | 
146 | # Sidebar information section
147 | st.sidebar.subheader('About')
148 | st.sidebar.info('This dashboard provides stock data and technical indicators for various time periods. Use the sidebar to customize your view.')
149 | 
150 | 
151 | 


--------------------------------------------------------------------------------
/structured_outputs.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Structured Outputs: From Text to Tabular Data"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts)"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "### Import Libraries"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 2,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "from ollama import chat\n",
 31 |     "from pydantic import BaseModel\n",
 32 |     "import pandas as pd\n",
 33 |     "from gnews import GNews"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "### Pull News Headline Data"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "\n",
 50 |     "# Fetch news articles\n",
 51 |     "google_news = GNews()\n",
 52 |     "news = google_news.get_news(\"NVDA\")\n",
 53 |     "\n",
 54 |     "# Extract top 6 news titles\n",
 55 |     "news_titles = [article['title'] for article in news[:6]]\n",
 56 |     "news_titles"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "### LLM Model and Structured Outputs"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "# Define BaseModel for news analysis\n",
 73 |     "class NewsAnalysis(BaseModel):\n",
 74 |     "    sentiment: str  \n",
 75 |     "    future_looking: bool  \n",
 76 |     "\n",
 77 |     "# Initialize an empty list to store results\n",
 78 |     "results = []\n",
 79 |     "\n",
 80 |     "# Loop through the news titles and analyze each\n",
 81 |     "for title in news_titles:\n",
 82 |     "    response = chat(\n",
 83 |     "        messages=[\n",
 84 |     "            {\n",
 85 |     "                'role': 'user',\n",
 86 |     "                'content': f\"\"\"Analyze the following title for sentiment (positive, negative, or neutral) \n",
 87 |     "                               and whether it provides future-looking financial insight, predictions, or \n",
 88 |     "                               guidance on whether to buy/hold/sell the stock (True or False): {title}\n",
 89 |     "                \"\"\",\n",
 90 |     "            }\n",
 91 |     "        ],\n",
 92 |     "        model='llama3.2',\n",
 93 |     "        format=NewsAnalysis.model_json_schema(),\n",
 94 |     "    )\n",
 95 |     "\n",
 96 |     "    # Parse the response into the NewsAnalysis model\n",
 97 |     "    sentiment_analysis = NewsAnalysis.model_validate_json(response['message']['content'])\n",
 98 |     "\n",
 99 |     "    # Append the results to the list\n",
100 |     "    results.append({\n",
101 |     "        'title': title,\n",
102 |     "        'sentiment': sentiment_analysis.sentiment,\n",
103 |     "        'future_looking': sentiment_analysis.future_looking\n",
104 |     "    })\n",
105 |     "\n",
106 |     "# Convert the results to a DataFrame\n",
107 |     "df = pd.DataFrame(results)\n",
108 |     "df\n"
109 |    ]
110 |   }
111 |  ],
112 |  "metadata": {
113 |   "kernelspec": {
114 |    "display_name": "structured_output",
115 |    "language": "python",
116 |    "name": "python3"
117 |   },
118 |   "language_info": {
119 |    "codemirror_mode": {
120 |     "name": "ipython",
121 |     "version": 3
122 |    },
123 |    "file_extension": ".py",
124 |    "mimetype": "text/x-python",
125 |    "name": "python",
126 |    "nbconvert_exporter": "python",
127 |    "pygments_lexer": "ipython3",
128 |    "version": "3.9.21"
129 |   }
130 |  },
131 |  "nbformat": 4,
132 |  "nbformat_minor": 2
133 | }
134 | 


--------------------------------------------------------------------------------