├── AI_Technical_Analysis.py
├── AutoScraper Tutorial.ipynb
├── LLM_timeseries_crypto.ipynb
├── Pandas_Top_5_Tricks.ipynb
├── README.md
├── Technical_Indicators_For_Machine_Learning.ipynb
├── Yelp API Notebook.ipynb
├── ai_coding_agent_tutorial.ipynb
├── ai_image_generator.py
├── ai_sentiment_analysis_gemini.ipynb
├── ai_stocks_prediction.py
├── app_streamlit_app_builder_ai.py
├── fast.py
├── flight_delay_ML_project.ipynb
├── scikit-ollama-tutorial.ipynb
├── stock_sentiment_agents.ipynb
├── stocks_dashboard.py
└── structured_outputs.ipynb
/AI_Technical_Analysis.py:
--------------------------------------------------------------------------------
1 | ## Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts)
2 |
3 | #### NOTE: Set yfinance to the following version to get chart working: "pip install yfinance==0.2.40"
4 |
5 | import streamlit as st
6 | import yfinance as yf
7 | import pandas as pd
8 | import plotly.graph_objects as go
9 | import ollama
10 | import tempfile
11 | import base64
12 | import os
13 |
14 | # Set up Streamlit app
15 | st.set_page_config(layout="wide")
16 | st.title("AI-Powered Technical Stock Analysis Dashboard")
17 | st.sidebar.header("Configuration")
18 |
19 | # Input for stock ticker and date range
20 | ticker = st.sidebar.text_input("Enter Stock Ticker (e.g., AAPL):", "AAPL")
21 | start_date = st.sidebar.date_input("Start Date", value=pd.to_datetime("2023-01-01"))
22 | end_date = st.sidebar.date_input("End Date", value=pd.to_datetime("2024-12-14"))
23 |
24 | # Fetch stock data
25 | if st.sidebar.button("Fetch Data"):
26 | st.session_state["stock_data"] = yf.download(ticker, start=start_date, end=end_date)
27 | st.success("Stock data loaded successfully!")
28 |
29 | # Check if data is available
30 | if "stock_data" in st.session_state:
31 | data = st.session_state["stock_data"]
32 |
33 | # Plot candlestick chart
34 | fig = go.Figure(data=[
35 | go.Candlestick(
36 | x=data.index,
37 | open=data['Open'],
38 | high=data['High'],
39 | low=data['Low'],
40 | close=data['Close'],
41 | name="Candlestick" # Replace "trace 0" with "Candlestick"
42 | )
43 | ])
44 |
45 | # Sidebar: Select technical indicators
46 | st.sidebar.subheader("Technical Indicators")
47 | indicators = st.sidebar.multiselect(
48 | "Select Indicators:",
49 | ["20-Day SMA", "20-Day EMA", "20-Day Bollinger Bands", "VWAP"],
50 | default=["20-Day SMA"]
51 | )
52 |
53 | # Helper function to add indicators to the chart
54 | def add_indicator(indicator):
55 | if indicator == "20-Day SMA":
56 | sma = data['Close'].rolling(window=20).mean()
57 | fig.add_trace(go.Scatter(x=data.index, y=sma, mode='lines', name='SMA (20)'))
58 | elif indicator == "20-Day EMA":
59 | ema = data['Close'].ewm(span=20).mean()
60 | fig.add_trace(go.Scatter(x=data.index, y=ema, mode='lines', name='EMA (20)'))
61 | elif indicator == "20-Day Bollinger Bands":
62 | sma = data['Close'].rolling(window=20).mean()
63 | std = data['Close'].rolling(window=20).std()
64 | bb_upper = sma + 2 * std
65 | bb_lower = sma - 2 * std
66 | fig.add_trace(go.Scatter(x=data.index, y=bb_upper, mode='lines', name='BB Upper'))
67 | fig.add_trace(go.Scatter(x=data.index, y=bb_lower, mode='lines', name='BB Lower'))
68 | elif indicator == "VWAP":
69 | data['VWAP'] = (data['Close'] * data['Volume']).cumsum() / data['Volume'].cumsum()
70 | fig.add_trace(go.Scatter(x=data.index, y=data['VWAP'], mode='lines', name='VWAP'))
71 |
72 | # Add selected indicators to the chart
73 | for indicator in indicators:
74 | add_indicator(indicator)
75 |
76 | fig.update_layout(xaxis_rangeslider_visible=False)
77 | st.plotly_chart(fig)
78 |
79 | # Analyze chart with LLaMA 3.2 Vision
80 | st.subheader("AI-Powered Analysis")
81 | if st.button("Run AI Analysis"):
82 | with st.spinner("Analyzing the chart, please wait..."):
83 | # Save chart as a temporary image
84 | with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
85 | fig.write_image(tmpfile.name)
86 | tmpfile_path = tmpfile.name
87 |
88 | # Read image and encode to Base64
89 | with open(tmpfile_path, "rb") as image_file:
90 | image_data = base64.b64encode(image_file.read()).decode('utf-8')
91 |
92 | # Prepare AI analysis request
93 | messages = [{
94 | 'role': 'user',
95 | 'content': """You are a Stock Trader specializing in Technical Analysis at a top financial institution.
96 | Analyze the stock chart's technical indicators and provide a buy/hold/sell recommendation.
97 | Base your recommendation only on the candlestick chart and the displayed technical indicators.
98 | First, provide the recommendation, then, provide your detailed reasoning.
99 | """,
100 | 'images': [image_data]
101 | }]
102 | response = ollama.chat(model='llama3.2-vision', messages=messages)
103 |
104 | # Display AI analysis result
105 | st.write("**AI Analysis Results:**")
106 | st.write(response["message"]["content"])
107 |
108 | # Clean up temporary file
109 | os.remove(tmpfile_path)
110 |
111 |
112 |
113 |
114 |
--------------------------------------------------------------------------------
/AutoScraper Tutorial.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts)"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# AutoScraper Tutorial"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "### Example 1: Pulling Text"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "from autoscraper import AutoScraper\n",
31 | "\n",
32 | "# Web Page to Scrape from\n",
33 | "url = 'https://www.noaa.gov/media-releases'\n",
34 | "\n",
35 | "# Example Text to Pull\n",
36 | "# Note: Change below to most recent news release headline on 'https://www.noaa.gov/media-releases'\n",
37 | "news_list = [\"Applications now open nationwide for community-led heat-monitoring campaigns\"]\n",
38 | "\n",
39 | "# Initialize AutoScraper\n",
40 | "scraper = AutoScraper()\n",
41 | "\n",
42 | "# Build Model\n",
43 | "news_result = scraper.build(url, news_list)\n",
44 | "\n",
45 | "# Review Results\n",
46 | "news_result"
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {},
52 | "source": [
53 | "### Example 2: Pulling Tabular Data"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "metadata": {},
60 | "outputs": [],
61 | "source": [
62 | "from autoscraper import AutoScraper\n",
63 | "\n",
64 | "# Web Page to Scrape from\n",
65 | "url = 'https://en.wikipedia.org/wiki/List_of_counties_in_California'\n",
66 | "\n",
67 | "# Example Text to Pull\n",
68 | "county_list = [\"Alameda County\",\"Yuba County\"]\n",
69 | "est_list = [\"1,622,188\",\"85,722\"]\n",
70 | "\n",
71 | "\n",
72 | "# Initialize AutoScraper\n",
73 | "scraper = AutoScraper()\n",
74 | "\n",
75 | "# Build Model\n",
76 | "county_result = scraper.build(url, county_list)\n",
77 | "est_result = scraper.build(url, est_list)\n",
78 | "\n",
79 | "# Review Results\n",
80 | "print(county_result)\n",
81 | "print(est_result)\n"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {},
88 | "outputs": [],
89 | "source": [
90 | "import pandas as pd\n",
91 | "\n",
92 | "# Convert Lists to Dictionary\n",
93 | "data = {'County': county_result, 'Estimated Population': est_result}\n",
94 | "\n",
95 | "# Convert Dictionary to Dataframe\n",
96 | "df = pd.DataFrame(data)\n",
97 | "\n",
98 | "df"
99 | ]
100 | }
101 | ],
102 | "metadata": {
103 | "kernelspec": {
104 | "display_name": "general_env",
105 | "language": "python",
106 | "name": "python3"
107 | },
108 | "language_info": {
109 | "codemirror_mode": {
110 | "name": "ipython",
111 | "version": 3
112 | },
113 | "file_extension": ".py",
114 | "mimetype": "text/x-python",
115 | "name": "python",
116 | "nbconvert_exporter": "python",
117 | "pygments_lexer": "ipython3",
118 | "version": "3.12.3"
119 | }
120 | },
121 | "nbformat": 4,
122 | "nbformat_minor": 2
123 | }
124 |
--------------------------------------------------------------------------------
/LLM_timeseries_crypto.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Predicting Crypto with LLMs"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Libraries"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 10,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "import pandas as pd\n",
24 | "import numpy as np\n",
25 | "from datetime import datetime, timedelta\n",
26 | "import yfinance as yf\n",
27 | "from statsmodels.tsa.arima.model import ARIMA\n",
28 | "from langchain_community.llms import Ollama"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "## Pull Crypto data"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 11,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": [
44 | "# Pull stock data from yfinance for the past month\n",
45 | "def pull_stocks(ticker):\n",
46 | " end_date = datetime.today()\n",
47 | " start_date = end_date - timedelta(days=30)\n",
48 | " stock_data = yf.Ticker(ticker)\n",
49 | " stock_df = stock_data.history(start=start_date, end=end_date)\n",
50 | " stock_df.index = stock_df.index.tz_localize(None) # Ensure stock data is timezone-naive\n",
51 | " stock_df = stock_df.reset_index()\n",
52 | " stock_df['Date'] = stock_df['Date'].dt.strftime('%Y-%m-%d')\n",
53 | "\n",
54 | " stock_df['pct_change'] = stock_df['Close'].pct_change()\n",
55 | "\n",
56 | " stock_df = stock_df[stock_df['pct_change'].notna()==True]\n",
57 | "\n",
58 | " stock_df = stock_df[['Date','pct_change']]\n",
59 | "\n",
60 | " actual_final = stock_df.tail(1)\n",
61 | "\n",
62 | " # stock_df = stock_df.iloc[:-1]\n",
63 | "\n",
64 | " return stock_df, actual_final\n",
65 | "\n",
66 | "btc, btc_final = pull_stocks('BTC-USD')\n",
67 | "eth, eth_final = pull_stocks('ETH-USD')\n",
68 | "xrp, xrp_final = pull_stocks('XRP-USD')\n",
69 | "\n"
70 | ]
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "metadata": {},
75 | "source": [
76 | "## Run ARIMA"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": 12,
82 | "metadata": {},
83 | "outputs": [
84 | {
85 | "name": "stderr",
86 | "output_type": "stream",
87 | "text": [
88 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
89 | " self._init_dates(dates, freq)\n",
90 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
91 | " self._init_dates(dates, freq)\n",
92 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
93 | " self._init_dates(dates, freq)\n",
94 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
95 | " self._init_dates(dates, freq)\n",
96 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
97 | " self._init_dates(dates, freq)\n",
98 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
99 | " self._init_dates(dates, freq)\n",
100 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.\n",
101 | " warn('Non-invertible starting MA parameters found.'\n",
102 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
103 | " self._init_dates(dates, freq)\n",
104 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
105 | " self._init_dates(dates, freq)\n",
106 | "/opt/homebrew/Caskroom/miniforge/base/envs/general_env/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency D will be used.\n",
107 | " self._init_dates(dates, freq)\n"
108 | ]
109 | },
110 | {
111 | "name": "stdout",
112 | "output_type": "stream",
113 | "text": [
114 | "Predicted percentage change for next day: -0.007978\n",
115 | "Predicted percentage change for next day: -0.005279\n",
116 | "Predicted percentage change for next day: -0.006455\n"
117 | ]
118 | }
119 | ],
120 | "source": [
121 | "def arima(timeseries_df):\n",
122 | " # Ensure 'Date' is the index and in datetime format\n",
123 | " timeseries_df.set_index('Date', inplace=True)\n",
124 | " timeseries_df.index = pd.to_datetime(timeseries_df.index)\n",
125 | "\n",
126 | " # Remove the last row (assumed to be NaN)\n",
127 | " timeseries_df = timeseries_df[:-1]\n",
128 | "\n",
129 | " # Convert percentage strings to float if necessary\n",
130 | " if timeseries_df['pct_change'].dtype == 'object':\n",
131 | " timeseries_df['pct_change'] = timeseries_df['pct_change'].str.rstrip('%').astype('float') / 100.0\n",
132 | "\n",
133 | " # Fit ARIMA model\n",
134 | " model = ARIMA(timeseries_df['pct_change'].dropna(), order=(1, 1, 1))\n",
135 | " results = model.fit()\n",
136 | "\n",
137 | " # Predict the next day's percentage change\n",
138 | " forecast = results.forecast(steps=1)\n",
139 | " predicted_pct_change = forecast.values[0]\n",
140 | "\n",
141 | " print(f\"Predicted percentage change for next day: {predicted_pct_change:.6f}\")\n",
142 | "\n",
143 | "arima(btc)\n",
144 | "arima(eth)\n",
145 | "arima(xrp)"
146 | ]
147 | },
148 | {
149 | "cell_type": "markdown",
150 | "metadata": {},
151 | "source": [
152 | "## Prepare data for LLM"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 13,
158 | "metadata": {},
159 | "outputs": [],
160 | "source": [
161 | "from io import StringIO\n",
162 | "\n",
163 | "def convert_to_csv_string(timeseries):\n",
164 | "\n",
165 | " timeseries = timeseries.reset_index()\n",
166 | "\n",
167 | " timeseries['pct_change'] = np.round(timeseries['pct_change'], 6)\n",
168 | "\n",
169 | " # Remove final row\n",
170 | " timeseries = timeseries.iloc[:-1]\n",
171 | "\n",
172 | " # Convert DataFrame to CSV string\n",
173 | " csv_buffer = StringIO()\n",
174 | " timeseries.to_csv(csv_buffer, index=False)\n",
175 | " csv_string = csv_buffer.getvalue()\n",
176 | "\n",
177 | " return csv_string\n",
178 | "\n",
179 | "\n",
180 | "btc_for_llm = convert_to_csv_string(btc)\n",
181 | "eth_for_llm = convert_to_csv_string(eth)\n",
182 | "xrp_for_llm = convert_to_csv_string(xrp)\n"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": 14,
188 | "metadata": {},
189 | "outputs": [
190 | {
191 | "name": "stdout",
192 | "output_type": "stream",
193 | "text": [
194 | "Date,pct_change\n",
195 | "2024-05-31,-0.012778\n",
196 | "2024-06-01,0.003193\n",
197 | "2024-06-02,0.00066\n",
198 | "2024-06-03,0.015545\n",
199 | "2024-06-04,0.025623\n",
200 | "2024-06-05,0.007299\n",
201 | "2024-06-06,-0.004581\n",
202 | "2024-06-07,-0.019992\n",
203 | "2024-06-08,-0.000531\n",
204 | "2024-06-09,0.004938\n",
205 | "2024-06-10,-0.001949\n",
206 | "2024-06-11,-0.031365\n",
207 | "2024-06-12,0.013503\n",
208 | "2024-06-13,-0.021758\n",
209 | "2024-06-14,-0.011165\n",
210 | "2024-06-15,0.002725\n",
211 | "2024-06-16,0.006769\n",
212 | "2024-06-17,-0.002232\n",
213 | "2024-06-18,-0.020297\n",
214 | "2024-06-19,-0.00277\n",
215 | "2024-06-20,-0.002026\n",
216 | "2024-06-21,-0.011298\n",
217 | "2024-06-22,0.00244\n",
218 | "2024-06-23,-0.016681\n",
219 | "2024-06-24,-0.045954\n",
220 | "2024-06-25,0.025337\n",
221 | "2024-06-26,-0.016073\n",
222 | "2024-06-27,0.013049\n",
223 | "\n"
224 | ]
225 | }
226 | ],
227 | "source": [
228 | "print(btc_for_llm)"
229 | ]
230 | },
231 | {
232 | "cell_type": "markdown",
233 | "metadata": {},
234 | "source": [
235 | "## Run LLM"
236 | ]
237 | },
238 | {
239 | "cell_type": "markdown",
240 | "metadata": {},
241 | "source": [
242 | "Note: This requires having an active local Ollama server connection running and installing the llama3, mistral, and gemma3 models"
243 | ]
244 | },
245 | {
246 | "cell_type": "code",
247 | "execution_count": 15,
248 | "metadata": {},
249 | "outputs": [],
250 | "source": [
251 | "## Note: Change the date of the prediction/forecast for your own use (here, 2024-06-28)\n",
252 | "\n",
253 | "def predict_timeseries(timeseries):\n",
254 | " output = llm.invoke(f\"\"\"\n",
255 | " You are a large language model with time series forecasting capabilities.\n",
256 | " Predict the percent change for the day immediately after the end of the provided time series (2024-06-28).\n",
257 | " Use only your model capabilities, not any other method.\n",
258 | " The data is in the format of a csv file.\n",
259 | " The dataset includes:\n",
260 | " - Date\n",
261 | " - Percent change in the cryptocurrency from the previous day\n",
262 | " Provide only the forecasted percent change for 2024-06-28 as a point estimate. \n",
263 | " Do not include any other text or context, just the one value:\n",
264 | " {timeseries}\n",
265 | " \"\"\")\n",
266 | " return output.strip()\n"
267 | ]
268 | },
269 | {
270 | "cell_type": "code",
271 | "execution_count": 16,
272 | "metadata": {},
273 | "outputs": [
274 | {
275 | "name": "stdout",
276 | "output_type": "stream",
277 | "text": [
278 | "0.008937 (rounded to four decimal places)\n",
279 | "0.0198 (rounded to two decimal places)\n",
280 | "0.00895 (rounded to four decimal places)\n"
281 | ]
282 | }
283 | ],
284 | "source": [
285 | "llm = Ollama(model=\"mistral\", temperature=0)\n",
286 | "\n",
287 | "print(predict_timeseries(btc_for_llm))\n",
288 | "print(predict_timeseries(eth_for_llm))\n",
289 | "print(predict_timeseries(xrp_for_llm))"
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": 17,
295 | "metadata": {},
296 | "outputs": [
297 | {
298 | "name": "stdout",
299 | "output_type": "stream",
300 | "text": [
301 | "0.011345\n",
302 | "0.011345\n",
303 | "0.005211\n"
304 | ]
305 | }
306 | ],
307 | "source": [
308 | "llm = Ollama(model=\"llama3\", temperature=0)\n",
309 | "\n",
310 | "print(predict_timeseries(btc_for_llm))\n",
311 | "print(predict_timeseries(eth_for_llm))\n",
312 | "print(predict_timeseries(xrp_for_llm))\n"
313 | ]
314 | },
315 | {
316 | "cell_type": "code",
317 | "execution_count": 18,
318 | "metadata": {},
319 | "outputs": [
320 | {
321 | "name": "stdout",
322 | "output_type": "stream",
323 | "text": [
324 | "0.012778\n",
325 | "0.012345\n",
326 | "0.005678\n"
327 | ]
328 | }
329 | ],
330 | "source": [
331 | "llm = Ollama(model=\"gemma2\", temperature=0)\n",
332 | "\n",
333 | "print(predict_timeseries(btc_for_llm))\n",
334 | "print(predict_timeseries(eth_for_llm))\n",
335 | "print(predict_timeseries(xrp_for_llm))\n"
336 | ]
337 | },
338 | {
339 | "cell_type": "markdown",
340 | "metadata": {},
341 | "source": [
342 | "## Actual values for predicted day"
343 | ]
344 | },
345 | {
346 | "cell_type": "code",
347 | "execution_count": 19,
348 | "metadata": {},
349 | "outputs": [
350 | {
351 | "data": {
352 | "text/html": [
353 | "
\n",
354 | "\n",
367 | "
\n",
368 | " \n",
369 | " \n",
370 | " | \n",
371 | " Date | \n",
372 | " pct_change | \n",
373 | "
\n",
374 | " \n",
375 | " \n",
376 | " \n",
377 | " 29 | \n",
378 | " 2024-06-28 | \n",
379 | " -0.020853 | \n",
380 | "
\n",
381 | " \n",
382 | "
\n",
383 | "
"
384 | ],
385 | "text/plain": [
386 | " Date pct_change\n",
387 | "29 2024-06-28 -0.020853"
388 | ]
389 | },
390 | "execution_count": 19,
391 | "metadata": {},
392 | "output_type": "execute_result"
393 | }
394 | ],
395 | "source": [
396 | "btc_final"
397 | ]
398 | },
399 | {
400 | "cell_type": "code",
401 | "execution_count": 20,
402 | "metadata": {},
403 | "outputs": [
404 | {
405 | "data": {
406 | "text/html": [
407 | "\n",
408 | "\n",
421 | "
\n",
422 | " \n",
423 | " \n",
424 | " | \n",
425 | " Date | \n",
426 | " pct_change | \n",
427 | "
\n",
428 | " \n",
429 | " \n",
430 | " \n",
431 | " 29 | \n",
432 | " 2024-06-28 | \n",
433 | " -0.020659 | \n",
434 | "
\n",
435 | " \n",
436 | "
\n",
437 | "
"
438 | ],
439 | "text/plain": [
440 | " Date pct_change\n",
441 | "29 2024-06-28 -0.020659"
442 | ]
443 | },
444 | "execution_count": 20,
445 | "metadata": {},
446 | "output_type": "execute_result"
447 | }
448 | ],
449 | "source": [
450 | "eth_final"
451 | ]
452 | },
453 | {
454 | "cell_type": "code",
455 | "execution_count": 21,
456 | "metadata": {},
457 | "outputs": [
458 | {
459 | "data": {
460 | "text/html": [
461 | "\n",
462 | "\n",
475 | "
\n",
476 | " \n",
477 | " \n",
478 | " | \n",
479 | " Date | \n",
480 | " pct_change | \n",
481 | "
\n",
482 | " \n",
483 | " \n",
484 | " \n",
485 | " 29 | \n",
486 | " 2024-06-28 | \n",
487 | " -0.007943 | \n",
488 | "
\n",
489 | " \n",
490 | "
\n",
491 | "
"
492 | ],
493 | "text/plain": [
494 | " Date pct_change\n",
495 | "29 2024-06-28 -0.007943"
496 | ]
497 | },
498 | "execution_count": 21,
499 | "metadata": {},
500 | "output_type": "execute_result"
501 | }
502 | ],
503 | "source": [
504 | "xrp_final"
505 | ]
506 | }
507 | ],
508 | "metadata": {
509 | "kernelspec": {
510 | "display_name": "general_env",
511 | "language": "python",
512 | "name": "python3"
513 | },
514 | "language_info": {
515 | "codemirror_mode": {
516 | "name": "ipython",
517 | "version": 3
518 | },
519 | "file_extension": ".py",
520 | "mimetype": "text/x-python",
521 | "name": "python",
522 | "nbconvert_exporter": "python",
523 | "pygments_lexer": "ipython3",
524 | "version": "3.1.-1"
525 | }
526 | },
527 | "nbformat": 4,
528 | "nbformat_minor": 2
529 | }
530 |
--------------------------------------------------------------------------------
/Pandas_Top_5_Tricks.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "Source: @DeepCharts Youtube Channel"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# Top 5 Pandas Tips and Tricks"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "import pandas as pd"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {},
29 | "source": [
30 | "### 1. Merging with the Indicator Argument"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 3,
36 | "metadata": {},
37 | "outputs": [
38 | {
39 | "data": {
40 | "text/html": [
41 | "\n",
42 | "\n",
55 | "
\n",
56 | " \n",
57 | " \n",
58 | " | \n",
59 | " key | \n",
60 | " value1 | \n",
61 | " value2 | \n",
62 | " _merge | \n",
63 | "
\n",
64 | " \n",
65 | " \n",
66 | " \n",
67 | " 0 | \n",
68 | " A | \n",
69 | " 1.0 | \n",
70 | " NaN | \n",
71 | " left_only | \n",
72 | "
\n",
73 | " \n",
74 | " 1 | \n",
75 | " B | \n",
76 | " 2.0 | \n",
77 | " 4.0 | \n",
78 | " both | \n",
79 | "
\n",
80 | " \n",
81 | " 2 | \n",
82 | " C | \n",
83 | " 3.0 | \n",
84 | " 5.0 | \n",
85 | " both | \n",
86 | "
\n",
87 | " \n",
88 | " 3 | \n",
89 | " D | \n",
90 | " NaN | \n",
91 | " 6.0 | \n",
92 | " right_only | \n",
93 | "
\n",
94 | " \n",
95 | "
\n",
96 | "
"
97 | ],
98 | "text/plain": [
99 | " key value1 value2 _merge\n",
100 | "0 A 1.0 NaN left_only\n",
101 | "1 B 2.0 4.0 both\n",
102 | "2 C 3.0 5.0 both\n",
103 | "3 D NaN 6.0 right_only"
104 | ]
105 | },
106 | "execution_count": 3,
107 | "metadata": {},
108 | "output_type": "execute_result"
109 | }
110 | ],
111 | "source": [
112 | "df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value1': [1, 2, 3]})\n",
113 | "df2 = pd.DataFrame({'key': ['B', 'C', 'D'], 'value2': [4, 5, 6]})\n",
114 | "\n",
115 | "merged = pd.merge(df1, df2, on='key', how='outer', indicator=True)\n",
116 | "merged"
117 | ]
118 | },
119 | {
120 | "cell_type": "markdown",
121 | "metadata": {},
122 | "source": [
123 | "### 2. Custom Chaining with pipe"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": 5,
129 | "metadata": {},
130 | "outputs": [
131 | {
132 | "data": {
133 | "text/html": [
134 | "\n",
135 | "\n",
148 | "
\n",
149 | " \n",
150 | " \n",
151 | " | \n",
152 | " Quantity | \n",
153 | " Price | \n",
154 | " Total | \n",
155 | "
\n",
156 | " \n",
157 | " \n",
158 | " \n",
159 | " 3 | \n",
160 | " 20 | \n",
161 | " 250 | \n",
162 | " 5000 | \n",
163 | "
\n",
164 | " \n",
165 | " 1 | \n",
166 | " 15 | \n",
167 | " 150 | \n",
168 | " 2250 | \n",
169 | "
\n",
170 | " \n",
171 | " 2 | \n",
172 | " 10 | \n",
173 | " 200 | \n",
174 | " 2000 | \n",
175 | "
\n",
176 | " \n",
177 | "
\n",
178 | "
"
179 | ],
180 | "text/plain": [
181 | " Quantity Price Total\n",
182 | "3 20 250 5000\n",
183 | "1 15 150 2250\n",
184 | "2 10 200 2000"
185 | ]
186 | },
187 | "execution_count": 5,
188 | "metadata": {},
189 | "output_type": "execute_result"
190 | }
191 | ],
192 | "source": [
193 | "df = pd.DataFrame({\n",
194 | " 'Quantity': [10, 15, 10, 20],\n",
195 | " 'Price': [100, 150, 200, 250]\n",
196 | "})\n",
197 | "\n",
198 | "\n",
199 | "# Custom function to calculate Total\n",
200 | "def add_total(df):\n",
201 | " df['Total'] = df['Quantity'] * df['Price']\n",
202 | " return df\n",
203 | "\n",
204 | "# Method chaining with pipe\n",
205 | "result = (\n",
206 | " df\n",
207 | " .pipe(add_total)\n",
208 | " .query('Total > 1000')\n",
209 | " .sort_values('Total', ascending=False)\n",
210 | ")\n",
211 | "\n",
212 | "result"
213 | ]
214 | },
215 | {
216 | "cell_type": "markdown",
217 | "metadata": {},
218 | "source": [
219 | "### 3. Window Functions (Moving Average and Cumulative Sum)"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": 7,
225 | "metadata": {},
226 | "outputs": [
227 | {
228 | "data": {
229 | "text/html": [
230 | "\n",
231 | "\n",
244 | "
\n",
245 | " \n",
246 | " \n",
247 | " | \n",
248 | " Date | \n",
249 | " Sales | \n",
250 | " 3-Day MA | \n",
251 | " Cumulative Sales | \n",
252 | "
\n",
253 | " \n",
254 | " \n",
255 | " \n",
256 | " 0 | \n",
257 | " 2023-01-01 | \n",
258 | " 100 | \n",
259 | " NaN | \n",
260 | " 100.0 | \n",
261 | "
\n",
262 | " \n",
263 | " 1 | \n",
264 | " 2023-01-02 | \n",
265 | " 150 | \n",
266 | " NaN | \n",
267 | " 250.0 | \n",
268 | "
\n",
269 | " \n",
270 | " 2 | \n",
271 | " 2023-01-03 | \n",
272 | " 200 | \n",
273 | " 150.0 | \n",
274 | " 450.0 | \n",
275 | "
\n",
276 | " \n",
277 | " 3 | \n",
278 | " 2023-01-04 | \n",
279 | " 250 | \n",
280 | " 200.0 | \n",
281 | " 700.0 | \n",
282 | "
\n",
283 | " \n",
284 | " 4 | \n",
285 | " 2023-01-05 | \n",
286 | " 300 | \n",
287 | " 250.0 | \n",
288 | " 1000.0 | \n",
289 | "
\n",
290 | " \n",
291 | " 5 | \n",
292 | " 2023-01-06 | \n",
293 | " 350 | \n",
294 | " 300.0 | \n",
295 | " 1350.0 | \n",
296 | "
\n",
297 | " \n",
298 | " 6 | \n",
299 | " 2023-01-07 | \n",
300 | " 400 | \n",
301 | " 350.0 | \n",
302 | " 1750.0 | \n",
303 | "
\n",
304 | " \n",
305 | "
\n",
306 | "
"
307 | ],
308 | "text/plain": [
309 | " Date Sales 3-Day MA Cumulative Sales\n",
310 | "0 2023-01-01 100 NaN 100.0\n",
311 | "1 2023-01-02 150 NaN 250.0\n",
312 | "2 2023-01-03 200 150.0 450.0\n",
313 | "3 2023-01-04 250 200.0 700.0\n",
314 | "4 2023-01-05 300 250.0 1000.0\n",
315 | "5 2023-01-06 350 300.0 1350.0\n",
316 | "6 2023-01-07 400 350.0 1750.0"
317 | ]
318 | },
319 | "execution_count": 7,
320 | "metadata": {},
321 | "output_type": "execute_result"
322 | }
323 | ],
324 | "source": [
325 | "# Sample DataFrame\n",
326 | "df = pd.DataFrame({\n",
327 | " 'Date': pd.date_range('2023-01-01', periods=7),\n",
328 | " 'Sales': [100, 150, 200, 250, 300, 350, 400]\n",
329 | "})\n",
330 | "\n",
331 | "# Rolling average\n",
332 | "df['3-Day MA'] = df['Sales'].rolling(window=3).mean()\n",
333 | "\n",
334 | "# Cumulative sum\n",
335 | "df['Cumulative Sales'] = df['Sales'].expanding().sum()\n",
336 | "\n",
337 | "df"
338 | ]
339 | },
340 | {
341 | "cell_type": "markdown",
342 | "metadata": {},
343 | "source": [
344 | "### 4. Identify Duplicates and Drop Duplicates"
345 | ]
346 | },
347 | {
348 | "cell_type": "code",
349 | "execution_count": 9,
350 | "metadata": {},
351 | "outputs": [
352 | {
353 | "data": {
354 | "text/html": [
355 | "\n",
356 | "\n",
369 | "
\n",
370 | " \n",
371 | " \n",
372 | " | \n",
373 | " ID | \n",
374 | " Name | \n",
375 | "
\n",
376 | " \n",
377 | " \n",
378 | " \n",
379 | " 0 | \n",
380 | " 1 | \n",
381 | " Alice | \n",
382 | "
\n",
383 | " \n",
384 | " 1 | \n",
385 | " 2 | \n",
386 | " Bob | \n",
387 | "
\n",
388 | " \n",
389 | " 2 | \n",
390 | " 2 | \n",
391 | " Bob | \n",
392 | "
\n",
393 | " \n",
394 | " 3 | \n",
395 | " 3 | \n",
396 | " Charlie | \n",
397 | "
\n",
398 | " \n",
399 | " 4 | \n",
400 | " 4 | \n",
401 | " David | \n",
402 | "
\n",
403 | " \n",
404 | " 5 | \n",
405 | " 4 | \n",
406 | " David | \n",
407 | "
\n",
408 | " \n",
409 | "
\n",
410 | "
"
411 | ],
412 | "text/plain": [
413 | " ID Name\n",
414 | "0 1 Alice\n",
415 | "1 2 Bob\n",
416 | "2 2 Bob\n",
417 | "3 3 Charlie\n",
418 | "4 4 David\n",
419 | "5 4 David"
420 | ]
421 | },
422 | "execution_count": 9,
423 | "metadata": {},
424 | "output_type": "execute_result"
425 | }
426 | ],
427 | "source": [
428 | "# Sample DataFrame with duplicates\n",
429 | "df = pd.DataFrame({\n",
430 | " 'ID': [1, 2, 2, 3, 4, 4],\n",
431 | " 'Name': ['Alice', 'Bob', 'Bob', 'Charlie', 'David', 'David']\n",
432 | "})\n",
433 | "df"
434 | ]
435 | },
436 | {
437 | "cell_type": "code",
438 | "execution_count": 11,
439 | "metadata": {},
440 | "outputs": [
441 | {
442 | "data": {
443 | "text/html": [
444 | "\n",
445 | "\n",
458 | "
\n",
459 | " \n",
460 | " \n",
461 | " | \n",
462 | " ID | \n",
463 | " Name | \n",
464 | "
\n",
465 | " \n",
466 | " \n",
467 | " \n",
468 | " 1 | \n",
469 | " 2 | \n",
470 | " Bob | \n",
471 | "
\n",
472 | " \n",
473 | " 2 | \n",
474 | " 2 | \n",
475 | " Bob | \n",
476 | "
\n",
477 | " \n",
478 | " 4 | \n",
479 | " 4 | \n",
480 | " David | \n",
481 | "
\n",
482 | " \n",
483 | " 5 | \n",
484 | " 4 | \n",
485 | " David | \n",
486 | "
\n",
487 | " \n",
488 | "
\n",
489 | "
"
490 | ],
491 | "text/plain": [
492 | " ID Name\n",
493 | "1 2 Bob\n",
494 | "2 2 Bob\n",
495 | "4 4 David\n",
496 | "5 4 David"
497 | ]
498 | },
499 | "execution_count": 11,
500 | "metadata": {},
501 | "output_type": "execute_result"
502 | }
503 | ],
504 | "source": [
505 | "# Identify duplicates\n",
506 | "duplicates = df[df.duplicated(subset='ID', keep=False)]\n",
507 | "duplicates\n"
508 | ]
509 | },
510 | {
511 | "cell_type": "code",
512 | "execution_count": 13,
513 | "metadata": {},
514 | "outputs": [
515 | {
516 | "data": {
517 | "text/html": [
518 | "\n",
519 | "\n",
532 | "
\n",
533 | " \n",
534 | " \n",
535 | " | \n",
536 | " ID | \n",
537 | " Name | \n",
538 | "
\n",
539 | " \n",
540 | " \n",
541 | " \n",
542 | " 0 | \n",
543 | " 1 | \n",
544 | " Alice | \n",
545 | "
\n",
546 | " \n",
547 | " 1 | \n",
548 | " 2 | \n",
549 | " Bob | \n",
550 | "
\n",
551 | " \n",
552 | " 3 | \n",
553 | " 3 | \n",
554 | " Charlie | \n",
555 | "
\n",
556 | " \n",
557 | " 4 | \n",
558 | " 4 | \n",
559 | " David | \n",
560 | "
\n",
561 | " \n",
562 | "
\n",
563 | "
"
564 | ],
565 | "text/plain": [
566 | " ID Name\n",
567 | "0 1 Alice\n",
568 | "1 2 Bob\n",
569 | "3 3 Charlie\n",
570 | "4 4 David"
571 | ]
572 | },
573 | "execution_count": 13,
574 | "metadata": {},
575 | "output_type": "execute_result"
576 | }
577 | ],
578 | "source": [
579 | "# Remove duplicates, keep first\n",
580 | "df_cleaned = df.drop_duplicates(subset='ID')\n",
581 | "df_cleaned"
582 | ]
583 | },
584 | {
585 | "cell_type": "markdown",
586 | "metadata": {},
587 | "source": [
588 | "### 5. Binning Data with cut and qcut"
589 | ]
590 | },
591 | {
592 | "cell_type": "code",
593 | "execution_count": 15,
594 | "metadata": {},
595 | "outputs": [
596 | {
597 | "data": {
598 | "text/html": [
599 | "\n",
600 | "\n",
613 | "
\n",
614 | " \n",
615 | " \n",
616 | " | \n",
617 | " Age | \n",
618 | " Income | \n",
619 | "
\n",
620 | " \n",
621 | " \n",
622 | " \n",
623 | " 0 | \n",
624 | " 22 | \n",
625 | " 25000 | \n",
626 | "
\n",
627 | " \n",
628 | " 1 | \n",
629 | " 25 | \n",
630 | " 27000 | \n",
631 | "
\n",
632 | " \n",
633 | " 2 | \n",
634 | " 29 | \n",
635 | " 30000 | \n",
636 | "
\n",
637 | " \n",
638 | " 3 | \n",
639 | " 34 | \n",
640 | " 32000 | \n",
641 | "
\n",
642 | " \n",
643 | " 4 | \n",
644 | " 45 | \n",
645 | " 40000 | \n",
646 | "
\n",
647 | " \n",
648 | " 5 | \n",
649 | " 52 | \n",
650 | " 50000 | \n",
651 | "
\n",
652 | " \n",
653 | " 6 | \n",
654 | " 61 | \n",
655 | " 60000 | \n",
656 | "
\n",
657 | " \n",
658 | " 7 | \n",
659 | " 70 | \n",
660 | " 70000 | \n",
661 | "
\n",
662 | " \n",
663 | " 8 | \n",
664 | " 80 | \n",
665 | " 80000 | \n",
666 | "
\n",
667 | " \n",
668 | " 9 | \n",
669 | " 90 | \n",
670 | " 90000 | \n",
671 | "
\n",
672 | " \n",
673 | "
\n",
674 | "
"
675 | ],
676 | "text/plain": [
677 | " Age Income\n",
678 | "0 22 25000\n",
679 | "1 25 27000\n",
680 | "2 29 30000\n",
681 | "3 34 32000\n",
682 | "4 45 40000\n",
683 | "5 52 50000\n",
684 | "6 61 60000\n",
685 | "7 70 70000\n",
686 | "8 80 80000\n",
687 | "9 90 90000"
688 | ]
689 | },
690 | "execution_count": 15,
691 | "metadata": {},
692 | "output_type": "execute_result"
693 | }
694 | ],
695 | "source": [
696 | "# Sample data\n",
697 | "data = {\n",
698 | " 'Age': [22, 25, 29, 34, 45, 52, 61, 70, 80, 90],\n",
699 | " 'Income': [25000, 27000, 30000, 32000, 40000, 50000, 60000, 70000, 80000, 90000]\n",
700 | "}\n",
701 | "\n",
702 | "df = pd.DataFrame(data)\n",
703 | "df"
704 | ]
705 | },
706 | {
707 | "cell_type": "code",
708 | "execution_count": 17,
709 | "metadata": {},
710 | "outputs": [
711 | {
712 | "data": {
713 | "text/html": [
714 | "\n",
715 | "\n",
728 | "
\n",
729 | " \n",
730 | " \n",
731 | " | \n",
732 | " Age | \n",
733 | " Income | \n",
734 | " Age Group | \n",
735 | "
\n",
736 | " \n",
737 | " \n",
738 | " \n",
739 | " 0 | \n",
740 | " 22 | \n",
741 | " 25000 | \n",
742 | " Young Adult | \n",
743 | "
\n",
744 | " \n",
745 | " 1 | \n",
746 | " 25 | \n",
747 | " 27000 | \n",
748 | " Young Adult | \n",
749 | "
\n",
750 | " \n",
751 | " 2 | \n",
752 | " 29 | \n",
753 | " 30000 | \n",
754 | " Young Adult | \n",
755 | "
\n",
756 | " \n",
757 | " 3 | \n",
758 | " 34 | \n",
759 | " 32000 | \n",
760 | " Young Adult | \n",
761 | "
\n",
762 | " \n",
763 | " 4 | \n",
764 | " 45 | \n",
765 | " 40000 | \n",
766 | " Adult | \n",
767 | "
\n",
768 | " \n",
769 | " 5 | \n",
770 | " 52 | \n",
771 | " 50000 | \n",
772 | " Adult | \n",
773 | "
\n",
774 | " \n",
775 | " 6 | \n",
776 | " 61 | \n",
777 | " 60000 | \n",
778 | " Senior | \n",
779 | "
\n",
780 | " \n",
781 | " 7 | \n",
782 | " 70 | \n",
783 | " 70000 | \n",
784 | " Senior | \n",
785 | "
\n",
786 | " \n",
787 | " 8 | \n",
788 | " 80 | \n",
789 | " 80000 | \n",
790 | " Senior | \n",
791 | "
\n",
792 | " \n",
793 | " 9 | \n",
794 | " 90 | \n",
795 | " 90000 | \n",
796 | " Senior | \n",
797 | "
\n",
798 | " \n",
799 | "
\n",
800 | "
"
801 | ],
802 | "text/plain": [
803 | " Age Income Age Group\n",
804 | "0 22 25000 Young Adult\n",
805 | "1 25 27000 Young Adult\n",
806 | "2 29 30000 Young Adult\n",
807 | "3 34 32000 Young Adult\n",
808 | "4 45 40000 Adult\n",
809 | "5 52 50000 Adult\n",
810 | "6 61 60000 Senior\n",
811 | "7 70 70000 Senior\n",
812 | "8 80 80000 Senior\n",
813 | "9 90 90000 Senior"
814 | ]
815 | },
816 | "execution_count": 17,
817 | "metadata": {},
818 | "output_type": "execute_result"
819 | }
820 | ],
821 | "source": [
822 | "# Equal-width binning for Age\n",
823 | "age_bins = [0, 18, 35, 60, 100]\n",
824 | "age_labels = ['Child', 'Young Adult', 'Adult', 'Senior']\n",
825 | "df['Age Group'] = pd.cut(df['Age'], bins=age_bins, labels=age_labels)\n",
826 | "df"
827 | ]
828 | },
829 | {
830 | "cell_type": "code",
831 | "execution_count": 19,
832 | "metadata": {},
833 | "outputs": [
834 | {
835 | "data": {
836 | "text/html": [
837 | "\n",
838 | "\n",
851 | "
\n",
852 | " \n",
853 | " \n",
854 | " | \n",
855 | " Age | \n",
856 | " Income | \n",
857 | " Age Group | \n",
858 | " Income Quartile | \n",
859 | "
\n",
860 | " \n",
861 | " \n",
862 | " \n",
863 | " 0 | \n",
864 | " 22 | \n",
865 | " 25000 | \n",
866 | " Young Adult | \n",
867 | " Q1 | \n",
868 | "
\n",
869 | " \n",
870 | " 1 | \n",
871 | " 25 | \n",
872 | " 27000 | \n",
873 | " Young Adult | \n",
874 | " Q1 | \n",
875 | "
\n",
876 | " \n",
877 | " 2 | \n",
878 | " 29 | \n",
879 | " 30000 | \n",
880 | " Young Adult | \n",
881 | " Q1 | \n",
882 | "
\n",
883 | " \n",
884 | " 3 | \n",
885 | " 34 | \n",
886 | " 32000 | \n",
887 | " Young Adult | \n",
888 | " Q2 | \n",
889 | "
\n",
890 | " \n",
891 | " 4 | \n",
892 | " 45 | \n",
893 | " 40000 | \n",
894 | " Adult | \n",
895 | " Q2 | \n",
896 | "
\n",
897 | " \n",
898 | " 5 | \n",
899 | " 52 | \n",
900 | " 50000 | \n",
901 | " Adult | \n",
902 | " Q3 | \n",
903 | "
\n",
904 | " \n",
905 | " 6 | \n",
906 | " 61 | \n",
907 | " 60000 | \n",
908 | " Senior | \n",
909 | " Q3 | \n",
910 | "
\n",
911 | " \n",
912 | " 7 | \n",
913 | " 70 | \n",
914 | " 70000 | \n",
915 | " Senior | \n",
916 | " Q4 | \n",
917 | "
\n",
918 | " \n",
919 | " 8 | \n",
920 | " 80 | \n",
921 | " 80000 | \n",
922 | " Senior | \n",
923 | " Q4 | \n",
924 | "
\n",
925 | " \n",
926 | " 9 | \n",
927 | " 90 | \n",
928 | " 90000 | \n",
929 | " Senior | \n",
930 | " Q4 | \n",
931 | "
\n",
932 | " \n",
933 | "
\n",
934 | "
"
935 | ],
936 | "text/plain": [
937 | " Age Income Age Group Income Quartile\n",
938 | "0 22 25000 Young Adult Q1\n",
939 | "1 25 27000 Young Adult Q1\n",
940 | "2 29 30000 Young Adult Q1\n",
941 | "3 34 32000 Young Adult Q2\n",
942 | "4 45 40000 Adult Q2\n",
943 | "5 52 50000 Adult Q3\n",
944 | "6 61 60000 Senior Q3\n",
945 | "7 70 70000 Senior Q4\n",
946 | "8 80 80000 Senior Q4\n",
947 | "9 90 90000 Senior Q4"
948 | ]
949 | },
950 | "execution_count": 19,
951 | "metadata": {},
952 | "output_type": "execute_result"
953 | }
954 | ],
955 | "source": [
956 | "# Quantile-based binning for Income\n",
957 | "df['Income Quartile'] = pd.qcut(df['Income'], 4, labels=['Q1', 'Q2', 'Q3', 'Q4'])\n",
958 | "df"
959 | ]
960 | },
961 | {
962 | "cell_type": "markdown",
963 | "metadata": {},
964 | "source": [
965 | "### BONUS. Interpolating Data"
966 | ]
967 | },
968 | {
969 | "cell_type": "code",
970 | "execution_count": 21,
971 | "metadata": {},
972 | "outputs": [
973 | {
974 | "data": {
975 | "text/html": [
976 | "\n",
977 | "\n",
990 | "
\n",
991 | " \n",
992 | " \n",
993 | " | \n",
994 | " Time | \n",
995 | " Value | \n",
996 | "
\n",
997 | " \n",
998 | " \n",
999 | " \n",
1000 | " 0 | \n",
1001 | " 2020-01-01 | \n",
1002 | " 1.0 | \n",
1003 | "
\n",
1004 | " \n",
1005 | " 1 | \n",
1006 | " 2020-01-02 | \n",
1007 | " NaN | \n",
1008 | "
\n",
1009 | " \n",
1010 | " 2 | \n",
1011 | " 2020-01-03 | \n",
1012 | " NaN | \n",
1013 | "
\n",
1014 | " \n",
1015 | " 3 | \n",
1016 | " 2020-01-04 | \n",
1017 | " 4.0 | \n",
1018 | "
\n",
1019 | " \n",
1020 | " 4 | \n",
1021 | " 2020-01-05 | \n",
1022 | " 5.0 | \n",
1023 | "
\n",
1024 | " \n",
1025 | "
\n",
1026 | "
"
1027 | ],
1028 | "text/plain": [
1029 | " Time Value\n",
1030 | "0 2020-01-01 1.0\n",
1031 | "1 2020-01-02 NaN\n",
1032 | "2 2020-01-03 NaN\n",
1033 | "3 2020-01-04 4.0\n",
1034 | "4 2020-01-05 5.0"
1035 | ]
1036 | },
1037 | "execution_count": 21,
1038 | "metadata": {},
1039 | "output_type": "execute_result"
1040 | }
1041 | ],
1042 | "source": [
1043 | "import numpy as np\n",
1044 | "\n",
1045 | "df = pd.DataFrame({'Time': pd.date_range(start='1/1/2020', periods=5, freq='D'),\n",
1046 | " 'Value': [1, np.nan, np.nan, 4, 5]})\n",
1047 | "df"
1048 | ]
1049 | },
1050 | {
1051 | "cell_type": "code",
1052 | "execution_count": 23,
1053 | "metadata": {},
1054 | "outputs": [
1055 | {
1056 | "data": {
1057 | "text/html": [
1058 | "\n",
1059 | "\n",
1072 | "
\n",
1073 | " \n",
1074 | " \n",
1075 | " | \n",
1076 | " Time | \n",
1077 | " Value | \n",
1078 | " Interpolated | \n",
1079 | "
\n",
1080 | " \n",
1081 | " \n",
1082 | " \n",
1083 | " 0 | \n",
1084 | " 2020-01-01 | \n",
1085 | " 1.0 | \n",
1086 | " 1.0 | \n",
1087 | "
\n",
1088 | " \n",
1089 | " 1 | \n",
1090 | " 2020-01-02 | \n",
1091 | " NaN | \n",
1092 | " 2.0 | \n",
1093 | "
\n",
1094 | " \n",
1095 | " 2 | \n",
1096 | " 2020-01-03 | \n",
1097 | " NaN | \n",
1098 | " 3.0 | \n",
1099 | "
\n",
1100 | " \n",
1101 | " 3 | \n",
1102 | " 2020-01-04 | \n",
1103 | " 4.0 | \n",
1104 | " 4.0 | \n",
1105 | "
\n",
1106 | " \n",
1107 | " 4 | \n",
1108 | " 2020-01-05 | \n",
1109 | " 5.0 | \n",
1110 | " 5.0 | \n",
1111 | "
\n",
1112 | " \n",
1113 | "
\n",
1114 | "
"
1115 | ],
1116 | "text/plain": [
1117 | " Time Value Interpolated\n",
1118 | "0 2020-01-01 1.0 1.0\n",
1119 | "1 2020-01-02 NaN 2.0\n",
1120 | "2 2020-01-03 NaN 3.0\n",
1121 | "3 2020-01-04 4.0 4.0\n",
1122 | "4 2020-01-05 5.0 5.0"
1123 | ]
1124 | },
1125 | "execution_count": 23,
1126 | "metadata": {},
1127 | "output_type": "execute_result"
1128 | }
1129 | ],
1130 | "source": [
1131 | "df['Interpolated'] = df['Value'].interpolate(method='linear')\n",
1132 | "df"
1133 | ]
1134 | }
1135 | ],
1136 | "metadata": {
1137 | "kernelspec": {
1138 | "display_name": "general_env",
1139 | "language": "python",
1140 | "name": "python3"
1141 | },
1142 | "language_info": {
1143 | "codemirror_mode": {
1144 | "name": "ipython",
1145 | "version": 3
1146 | },
1147 | "file_extension": ".py",
1148 | "mimetype": "text/x-python",
1149 | "name": "python",
1150 | "nbconvert_exporter": "python",
1151 | "pygments_lexer": "ipython3",
1152 | "version": "3.12.3"
1153 | }
1154 | },
1155 | "nbformat": 4,
1156 | "nbformat_minor": 2
1157 | }
1158 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # projects
--------------------------------------------------------------------------------
/ai_coding_agent_tutorial.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Coding Agents with smolagents and Gemini Flash"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Setup"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "### Libraries"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 3,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMModel\n",
31 | "import os"
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {},
37 | "source": [
38 | "### Gemini API Key"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 1,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "os.environ[\"GEMINI_API_KEY\"] = \"API Key Goes Here\""
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 4,
53 | "metadata": {},
54 | "outputs": [],
55 | "source": [
56 | "# Gemini\n",
57 | "model = LiteLLMModel(model_id=\"gemini/gemini-1.5-flash\")"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 14,
63 | "metadata": {},
64 | "outputs": [],
65 | "source": [
66 | "# Ollama (Llama3.2)\n",
67 | "# model = LiteLLMModel(model_id=\"ollama/llama3.2\")\n"
68 | ]
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {},
73 | "source": [
74 | "## Agent Creation"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 5,
80 | "metadata": {},
81 | "outputs": [],
82 | "source": [
83 | "# Define the Feature Selection Agent\n",
84 | "feature_selection_agent = CodeAgent(\n",
85 | " tools=[DuckDuckGoSearchTool], # search internet if necessary\n",
86 | " additional_authorized_imports=['pandas','statsmodels','sklearn','numpy','json'], # packages for code interpreter\n",
87 | " model=model # model set above\n",
88 | ")\n"
89 | ]
90 | },
91 | {
92 | "cell_type": "markdown",
93 | "metadata": {},
94 | "source": [
95 | "### Set Task Prompt"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 6,
101 | "metadata": {},
102 | "outputs": [],
103 | "source": [
104 | "# Task for the agent\n",
105 | "task = \"\"\"\n",
106 | "1. Load the Diabetes dataset from the 'sklearn' library using the following code:\n",
107 | " from sklearn.datasets import load_diabetes\n",
108 | " import pandas as pd\n",
109 | "\n",
110 | " # Load the dataset\n",
111 | " data, target = load_diabetes(return_X_y=True, as_frame=False)\n",
112 | "\n",
113 | " # Create a DataFrame\n",
114 | " df = pd.DataFrame(data, columns=load_diabetes().feature_names)\n",
115 | " df['target'] = target\n",
116 | "2. Split data with a train/test split of 75%/25%\n",
117 | "3. Create a linear regression model on the training data predicting the target variable using the \"sklearn\" or \"statsmodels\" library.\n",
118 | "4. Execute on a strategy of combination of up to 3 predictors that attains the lowest root mean square error (RMSE) on the testing data. \n",
119 | " (You can't use the target variable).\n",
120 | "5. Use feature engineering as needed to improve model performance.\n",
121 | "6. Based on the lowest RMSE of each model for the testing data, provide a final list of predictors for the top 5 models\n",
122 | "7. Output as a table\n",
123 | "\"\"\""
124 | ]
125 | },
126 | {
127 | "cell_type": "markdown",
128 | "metadata": {},
129 | "source": [
130 | "## Execute the agent and task"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": null,
136 | "metadata": {},
137 | "outputs": [],
138 | "source": [
139 | "result = feature_selection_agent.run(task)"
140 | ]
141 | }
142 | ],
143 | "metadata": {
144 | "kernelspec": {
145 | "display_name": "smol_env",
146 | "language": "python",
147 | "name": "python3"
148 | },
149 | "language_info": {
150 | "codemirror_mode": {
151 | "name": "ipython",
152 | "version": 3
153 | },
154 | "file_extension": ".py",
155 | "mimetype": "text/x-python",
156 | "name": "python",
157 | "nbconvert_exporter": "python",
158 | "pygments_lexer": "ipython3",
159 | "version": "3.12.8"
160 | }
161 | },
162 | "nbformat": 4,
163 | "nbformat_minor": 2
164 | }
165 |
--------------------------------------------------------------------------------
/ai_image_generator.py:
--------------------------------------------------------------------------------
1 | # Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts)
2 |
3 | # PART 1: LIBRARY IMPORTS
4 |
5 | import streamlit as st
6 | import replicate
7 | import os
8 | import requests
9 | from PIL import Image
10 | from io import BytesIO
11 |
12 |
13 | # PART 2: SETUP REPLICATE CREDENTIALS AND AUTHENTICATION
14 |
15 | # Set up your Replicate API key (optionally from environment variable)
16 | REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN") # You can store your API key in an environment variable
17 |
18 | if REPLICATE_API_TOKEN is None:
19 | st.error("Replicate API token not found. Please set it in your environment.")
20 | st.stop()
21 |
22 | # Authenticate with Replicate using the API token
23 | replicate.Client(api_token=REPLICATE_API_TOKEN)
24 |
25 |
26 | # PART 3: STREAMLIT WEBAPP
27 |
28 | # Initialize session state for storing the generated image URL
29 | if 'image_url' not in st.session_state:
30 | st.session_state['image_url'] = None
31 |
32 |
33 | # PART 3A: SIDEBAR OPTIONS
34 |
35 | # Sidebar inputs
36 | with st.sidebar:
37 |
38 | # Title of the app
39 | st.title('AI Image Generation: Flux Schnell')
40 |
41 | st.header("Prompt and Options")
42 |
43 | # Input box for the user to type the prompt (using text_area for multiline input)
44 | prompt = st.text_area('Enter a prompt to generate an image', height=50)
45 |
46 | # Checkbox to enable or disable random seed
47 | use_random_seed = st.checkbox('Use Random Seed', value=True)
48 |
49 | # Slider for random seed (only if the checkbox is checked)
50 | if use_random_seed:
51 | random_seed = st.slider('Random Seed', 0, 1000, 435)
52 | else:
53 | random_seed = None
54 |
55 | # Slider for output quality
56 | output_quality = st.slider('Output Quality', 50, 100, 80)
57 |
58 | # Create two columns for Generate and Download buttons
59 | col1, col2 = st.columns([1, 1])
60 |
61 | # Button to submit the prompt and generate image
62 | generate_button = col1.button('Generate Image')
63 |
64 |
65 | # PART 4A: MAIN CONTENT AREA (IMAGE GENERATION AND ACCESS)
66 |
67 | # Check if the button was pressed and if there is a prompt
68 | if generate_button and prompt:
69 | with st.spinner('Generating image...'):
70 | try:
71 | # Call the Flux Schnell model on Replicate
72 | input_data = {
73 | "prompt": prompt,
74 | "aspect_ratio": '3:2', # Set the aspect ratio
75 | "quality": output_quality # Set the output quality
76 | }
77 |
78 | # Add random seed only if it's enabled
79 | if random_seed is not None:
80 | input_data["seed"] = random_seed
81 |
82 | # Use replicate.run to invoke the model
83 | output = replicate.run(
84 | "black-forest-labs/flux-schnell", # Model name
85 | input=input_data # Input to the model
86 | )
87 |
88 | # Store the generated image URL in session state
89 | st.session_state['image_url'] = output[0] # Assuming the image is the first element in output
90 |
91 | except Exception as e:
92 | st.error(f"An error occurred: {e}")
93 |
94 | # If an image URL is present in session state, display the image and download button
95 | if st.session_state['image_url']:
96 | # Display the image
97 | st.image(st.session_state['image_url'], caption='Generated Image')
98 |
99 | # Download the image from the URL
100 | response = requests.get(st.session_state['image_url'])
101 | image = Image.open(BytesIO(response.content))
102 |
103 | # Convert the image to a binary stream and save it as .jpg
104 | img_buffer = BytesIO()
105 | image.save(img_buffer, format="JPEG")
106 | img_buffer.seek(0)
107 |
108 | # Display the download button in the second column
109 | with col2:
110 | st.download_button(
111 | label="Download Image",
112 | data=img_buffer,
113 | file_name="generated_image.jpg",
114 | mime="image/jpeg"
115 | )
--------------------------------------------------------------------------------
/ai_sentiment_analysis_gemini.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# AI Pipeline: Blue Sky Scraper + Gemini Flash Sentiment Analysis"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "### Libraries"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "import requests\n",
24 | "import pandas as pd\n",
25 | "import google.generativeai as genai\n",
26 | "import enum\n",
27 | "from typing_extensions import TypedDict\n",
28 | "import json\n",
29 | "import plotly.express as px"
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "metadata": {},
35 | "source": [
36 | "## 1. Configuration"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {},
42 | "source": [
43 | "### Authentication and API Keys"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": 2,
49 | "metadata": {},
50 | "outputs": [],
51 | "source": [
52 | "# Replace with your Bluesky handle and password\n",
53 | "BLUESKY_HANDLE = 'handle goes here'\n",
54 | "BLUESKY_PASSWORD = 'password goes here'\n",
55 | "\n",
56 | "# Replace with your Google AI Studio API key\n",
57 | "genai.configure(api_key='api key goes here')"
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {},
63 | "source": [
64 | "### Gemini Model"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": 3,
70 | "metadata": {},
71 | "outputs": [],
72 | "source": [
73 | "model = genai.GenerativeModel(\"gemini-1.5-flash\") # gemini-2.0-flash-exp"
74 | ]
75 | },
76 | {
77 | "cell_type": "markdown",
78 | "metadata": {},
79 | "source": [
80 | "### Stock (or keyword to analyze)"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 4,
86 | "metadata": {},
87 | "outputs": [],
88 | "source": [
89 | "search_term = 'ADBE'"
90 | ]
91 | },
92 | {
93 | "cell_type": "markdown",
94 | "metadata": {},
95 | "source": [
96 | "### Number of posts to return"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": 5,
102 | "metadata": {},
103 | "outputs": [],
104 | "source": [
105 | "n = 100 # Number of latest posts to retrieve"
106 | ]
107 | },
108 | {
109 | "cell_type": "markdown",
110 | "metadata": {},
111 | "source": [
112 | "## 2. Blue Sky Web Scraper"
113 | ]
114 | },
115 | {
116 | "cell_type": "code",
117 | "execution_count": 6,
118 | "metadata": {},
119 | "outputs": [],
120 | "source": [
121 | "# Authenticate and obtain access token\n",
122 | "auth_response = requests.post(\n",
123 | " 'https://bsky.social/xrpc/com.atproto.server.createSession',\n",
124 | " json={'identifier': BLUESKY_HANDLE, 'password': BLUESKY_PASSWORD}\n",
125 | ")\n",
126 | "auth_response.raise_for_status()\n",
127 | "access_token = auth_response.json().get('accessJwt')\n"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 7,
133 | "metadata": {},
134 | "outputs": [],
135 | "source": [
136 | "# Set up the request headers with the access token\n",
137 | "headers = {'Authorization': f'Bearer {access_token}'}\n",
138 | "\n",
139 | "# Define the search parameters\n",
140 | "params = {\n",
141 | " 'q': search_term,\n",
142 | " 'sort': 'latest',\n",
143 | " 'limit': n\n",
144 | "}\n",
145 | "\n",
146 | "# Perform the search request\n",
147 | "search_response = requests.get(\n",
148 | " 'https://bsky.social/xrpc/app.bsky.feed.searchPosts',\n",
149 | " headers=headers,\n",
150 | " params=params\n",
151 | ")\n",
152 | "search_response.raise_for_status()\n",
153 | "posts = search_response.json().get('posts', [])"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 8,
159 | "metadata": {},
160 | "outputs": [],
161 | "source": [
162 | "# Extract data and create a list of dictionaries\n",
163 | "data = []\n",
164 | "for post in posts:\n",
165 | " author = post.get('author', {}).get('handle', 'Unknown')\n",
166 | " content = post.get('record', {}).get('text', 'No content')\n",
167 | " created_at = post.get('record', {}).get('createdAt', 'Unknown date')\n",
168 | " data.append({'Date': created_at, 'Content': content, 'Author': author})\n",
169 | "\n",
170 | "# Convert list of dictionaries to DataFrame\n",
171 | "df = pd.DataFrame(data)\n",
172 | "\n",
173 | "# Convert 'Date' column to datetime format for better handling\n",
174 | "df['Date'] = pd.to_datetime(df['Date'], errors='coerce')"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": 9,
180 | "metadata": {},
181 | "outputs": [
182 | {
183 | "data": {
184 | "text/html": [
185 | "\n",
186 | "\n",
199 | "
\n",
200 | " \n",
201 | " \n",
202 | " | \n",
203 | " Date | \n",
204 | " Content | \n",
205 | "
\n",
206 | " \n",
207 | " \n",
208 | " \n",
209 | " 0 | \n",
210 | " 2024-12-31 19:10:07.207000+00:00 | \n",
211 | " \\n#MarjorieTaylorGreene Went Christmas Shoppin... | \n",
212 | "
\n",
213 | " \n",
214 | " 1 | \n",
215 | " 2024-12-31 17:39:40.015000+00:00 | \n",
216 | " Over the past year #AJB and #ADBE swapped from... | \n",
217 | "
\n",
218 | " \n",
219 | " 2 | \n",
220 | " 2024-12-31 17:39:40.014000+00:00 | \n",
221 | " The major changes to the port. over the year w... | \n",
222 | "
\n",
223 | " \n",
224 | " 3 | \n",
225 | " 2024-12-31 16:03:55.965884+00:00 | \n",
226 | " 📊 ADBE Market Analysis - Dec 31, 2024\\n\\nCurre... | \n",
227 | "
\n",
228 | " \n",
229 | " 4 | \n",
230 | " 2024-12-30 16:01:16.570000+00:00 | \n",
231 | " Adobe knows that #DEI is good for people, good... | \n",
232 | "
\n",
233 | " \n",
234 | " ... | \n",
235 | " ... | \n",
236 | " ... | \n",
237 | "
\n",
238 | " \n",
239 | " 93 | \n",
240 | " 2024-12-12 17:19:22.999000+00:00 | \n",
241 | " Hello, Investors! 👋\\nStocks were down modestly... | \n",
242 | "
\n",
243 | " \n",
244 | " 94 | \n",
245 | " 2024-12-12 16:53:12.278000+00:00 | \n",
246 | " Adobe posts record-breaking revenue 📈 but inve... | \n",
247 | "
\n",
248 | " \n",
249 | " 95 | \n",
250 | " 2024-12-12 16:20:44.597227+00:00 | \n",
251 | " $ADBE Technical Analysis | Dec 12\\nPrice: $549... | \n",
252 | "
\n",
253 | " \n",
254 | " 96 | \n",
255 | " 2024-12-12 15:49:51.063000+00:00 | \n",
256 | " $ADBE: Adobe shares dropped 13% as its 2025 ou... | \n",
257 | "
\n",
258 | " \n",
259 | " 97 | \n",
260 | " 2024-12-12 15:38:39.435000+00:00 | \n",
261 | " $ADBE FY-2024: Strong Q3 Perf: Rev Surpasses E... | \n",
262 | "
\n",
263 | " \n",
264 | "
\n",
265 | "
98 rows × 2 columns
\n",
266 | "
"
267 | ],
268 | "text/plain": [
269 | " Date \\\n",
270 | "0 2024-12-31 19:10:07.207000+00:00 \n",
271 | "1 2024-12-31 17:39:40.015000+00:00 \n",
272 | "2 2024-12-31 17:39:40.014000+00:00 \n",
273 | "3 2024-12-31 16:03:55.965884+00:00 \n",
274 | "4 2024-12-30 16:01:16.570000+00:00 \n",
275 | ".. ... \n",
276 | "93 2024-12-12 17:19:22.999000+00:00 \n",
277 | "94 2024-12-12 16:53:12.278000+00:00 \n",
278 | "95 2024-12-12 16:20:44.597227+00:00 \n",
279 | "96 2024-12-12 15:49:51.063000+00:00 \n",
280 | "97 2024-12-12 15:38:39.435000+00:00 \n",
281 | "\n",
282 | " Content \n",
283 | "0 \\n#MarjorieTaylorGreene Went Christmas Shoppin... \n",
284 | "1 Over the past year #AJB and #ADBE swapped from... \n",
285 | "2 The major changes to the port. over the year w... \n",
286 | "3 📊 ADBE Market Analysis - Dec 31, 2024\\n\\nCurre... \n",
287 | "4 Adobe knows that #DEI is good for people, good... \n",
288 | ".. ... \n",
289 | "93 Hello, Investors! 👋\\nStocks were down modestly... \n",
290 | "94 Adobe posts record-breaking revenue 📈 but inve... \n",
291 | "95 $ADBE Technical Analysis | Dec 12\\nPrice: $549... \n",
292 | "96 $ADBE: Adobe shares dropped 13% as its 2025 ou... \n",
293 | "97 $ADBE FY-2024: Strong Q3 Perf: Rev Surpasses E... \n",
294 | "\n",
295 | "[98 rows x 2 columns]"
296 | ]
297 | },
298 | "execution_count": 9,
299 | "metadata": {},
300 | "output_type": "execute_result"
301 | }
302 | ],
303 | "source": [
304 | "# Display the DataFrame\n",
305 | "df[['Date','Content']]"
306 | ]
307 | },
308 | {
309 | "cell_type": "markdown",
310 | "metadata": {},
311 | "source": [
312 | "## 3. Google Gemini Sentiment Analysis"
313 | ]
314 | },
315 | {
316 | "cell_type": "code",
317 | "execution_count": 10,
318 | "metadata": {},
319 | "outputs": [],
320 | "source": [
321 | "class Sentiment(enum.Enum):\n",
322 | " POSITIVE = \"positive\"\n",
323 | " NEGATIVE = \"negative\"\n",
324 | " NEUTRAL = \"neutral\"\n",
325 | "\n",
326 | "class AnalysisResult(TypedDict):\n",
327 | " is_stock_related: bool\n",
328 | " sentiment: Sentiment\n"
329 | ]
330 | },
331 | {
332 | "cell_type": "code",
333 | "execution_count": 11,
334 | "metadata": {},
335 | "outputs": [],
336 | "source": [
337 | "\n",
338 | "def analyze_post(content: str) -> AnalysisResult:\n",
339 | " prompt = f\"\"\"\n",
340 | " Analyze the following post and determine:\n",
341 | " 1. Whether it is related to the company, {search_term}, and relates to or discusses \n",
342 | " past, current, or future stock performance of {search_term} explicitly.\n",
343 | " 2. If related, classify the sentiment as positive, negative, or neutral.\n",
344 | "\n",
345 | " Post: \"{content}\"\n",
346 | " \"\"\"\n",
347 | " response = model.generate_content(\n",
348 | " prompt,\n",
349 | " generation_config=genai.GenerationConfig(\n",
350 | " response_mime_type=\"application/json\",\n",
351 | " response_schema=AnalysisResult\n",
352 | " )\n",
353 | " )\n",
354 | " if response.candidates:\n",
355 | " candidate_content = response.candidates[0].content\n",
356 | " result_text = ''.join(part.text for part in candidate_content.parts)\n",
357 | " try:\n",
358 | " result = json.loads(result_text)\n",
359 | " is_stock_related = result.get('is_stock_related')\n",
360 | " sentiment = result.get('sentiment')\n",
361 | " if is_stock_related is not None and sentiment is not None:\n",
362 | " return is_stock_related, sentiment\n",
363 | " else:\n",
364 | " print(\"Missing expected keys in the response\")\n",
365 | " return None, None\n",
366 | " except json.JSONDecodeError:\n",
367 | " print(\"Failed to decode JSON response\")\n",
368 | " return None, None\n",
369 | " else:\n",
370 | " print(\"No candidates returned\")\n",
371 | " return None, None\n"
372 | ]
373 | },
374 | {
375 | "cell_type": "code",
376 | "execution_count": 12,
377 | "metadata": {},
378 | "outputs": [
379 | {
380 | "name": "stdout",
381 | "output_type": "stream",
382 | "text": [
383 | "Missing expected keys in the response\n",
384 | "Missing expected keys in the response\n"
385 | ]
386 | }
387 | ],
388 | "source": [
389 | "# Apply the analysis to each post\n",
390 | "df[['is_stock_related', 'sentiment']] = df['Content'].apply(\n",
391 | " lambda x: pd.Series(analyze_post(x))\n",
392 | ")"
393 | ]
394 | },
395 | {
396 | "cell_type": "code",
397 | "execution_count": 13,
398 | "metadata": {},
399 | "outputs": [
400 | {
401 | "data": {
402 | "text/html": [
403 | "\n",
404 | "\n",
417 | "
\n",
418 | " \n",
419 | " \n",
420 | " | \n",
421 | " Date | \n",
422 | " Content | \n",
423 | " is_stock_related | \n",
424 | " sentiment | \n",
425 | "
\n",
426 | " \n",
427 | " \n",
428 | " \n",
429 | " 0 | \n",
430 | " 2024-12-31 19:10:07.207000+00:00 | \n",
431 | " \\n#MarjorieTaylorGreene Went Christmas Shoppin... | \n",
432 | " True | \n",
433 | " neutral | \n",
434 | "
\n",
435 | " \n",
436 | " 1 | \n",
437 | " 2024-12-31 17:39:40.015000+00:00 | \n",
438 | " Over the past year #AJB and #ADBE swapped from... | \n",
439 | " True | \n",
440 | " neutral | \n",
441 | "
\n",
442 | " \n",
443 | " 2 | \n",
444 | " 2024-12-31 17:39:40.014000+00:00 | \n",
445 | " The major changes to the port. over the year w... | \n",
446 | " True | \n",
447 | " negative | \n",
448 | "
\n",
449 | " \n",
450 | " 3 | \n",
451 | " 2024-12-31 16:03:55.965884+00:00 | \n",
452 | " 📊 ADBE Market Analysis - Dec 31, 2024\\n\\nCurre... | \n",
453 | " True | \n",
454 | " negative | \n",
455 | "
\n",
456 | " \n",
457 | " 4 | \n",
458 | " 2024-12-30 16:01:16.570000+00:00 | \n",
459 | " Adobe knows that #DEI is good for people, good... | \n",
460 | " True | \n",
461 | " positive | \n",
462 | "
\n",
463 | " \n",
464 | " ... | \n",
465 | " ... | \n",
466 | " ... | \n",
467 | " ... | \n",
468 | " ... | \n",
469 | "
\n",
470 | " \n",
471 | " 93 | \n",
472 | " 2024-12-12 17:19:22.999000+00:00 | \n",
473 | " Hello, Investors! 👋\\nStocks were down modestly... | \n",
474 | " True | \n",
475 | " negative | \n",
476 | "
\n",
477 | " \n",
478 | " 94 | \n",
479 | " 2024-12-12 16:53:12.278000+00:00 | \n",
480 | " Adobe posts record-breaking revenue 📈 but inve... | \n",
481 | " True | \n",
482 | " negative | \n",
483 | "
\n",
484 | " \n",
485 | " 95 | \n",
486 | " 2024-12-12 16:20:44.597227+00:00 | \n",
487 | " $ADBE Technical Analysis | Dec 12\\nPrice: $549... | \n",
488 | " True | \n",
489 | " positive | \n",
490 | "
\n",
491 | " \n",
492 | " 96 | \n",
493 | " 2024-12-12 15:49:51.063000+00:00 | \n",
494 | " $ADBE: Adobe shares dropped 13% as its 2025 ou... | \n",
495 | " True | \n",
496 | " negative | \n",
497 | "
\n",
498 | " \n",
499 | " 97 | \n",
500 | " 2024-12-12 15:38:39.435000+00:00 | \n",
501 | " $ADBE FY-2024: Strong Q3 Perf: Rev Surpasses E... | \n",
502 | " True | \n",
503 | " positive | \n",
504 | "
\n",
505 | " \n",
506 | "
\n",
507 | "
98 rows × 4 columns
\n",
508 | "
"
509 | ],
510 | "text/plain": [
511 | " Date \\\n",
512 | "0 2024-12-31 19:10:07.207000+00:00 \n",
513 | "1 2024-12-31 17:39:40.015000+00:00 \n",
514 | "2 2024-12-31 17:39:40.014000+00:00 \n",
515 | "3 2024-12-31 16:03:55.965884+00:00 \n",
516 | "4 2024-12-30 16:01:16.570000+00:00 \n",
517 | ".. ... \n",
518 | "93 2024-12-12 17:19:22.999000+00:00 \n",
519 | "94 2024-12-12 16:53:12.278000+00:00 \n",
520 | "95 2024-12-12 16:20:44.597227+00:00 \n",
521 | "96 2024-12-12 15:49:51.063000+00:00 \n",
522 | "97 2024-12-12 15:38:39.435000+00:00 \n",
523 | "\n",
524 | " Content is_stock_related \\\n",
525 | "0 \\n#MarjorieTaylorGreene Went Christmas Shoppin... True \n",
526 | "1 Over the past year #AJB and #ADBE swapped from... True \n",
527 | "2 The major changes to the port. over the year w... True \n",
528 | "3 📊 ADBE Market Analysis - Dec 31, 2024\\n\\nCurre... True \n",
529 | "4 Adobe knows that #DEI is good for people, good... True \n",
530 | ".. ... ... \n",
531 | "93 Hello, Investors! 👋\\nStocks were down modestly... True \n",
532 | "94 Adobe posts record-breaking revenue 📈 but inve... True \n",
533 | "95 $ADBE Technical Analysis | Dec 12\\nPrice: $549... True \n",
534 | "96 $ADBE: Adobe shares dropped 13% as its 2025 ou... True \n",
535 | "97 $ADBE FY-2024: Strong Q3 Perf: Rev Surpasses E... True \n",
536 | "\n",
537 | " sentiment \n",
538 | "0 neutral \n",
539 | "1 neutral \n",
540 | "2 negative \n",
541 | "3 negative \n",
542 | "4 positive \n",
543 | ".. ... \n",
544 | "93 negative \n",
545 | "94 negative \n",
546 | "95 positive \n",
547 | "96 negative \n",
548 | "97 positive \n",
549 | "\n",
550 | "[98 rows x 4 columns]"
551 | ]
552 | },
553 | "execution_count": 13,
554 | "metadata": {},
555 | "output_type": "execute_result"
556 | }
557 | ],
558 | "source": [
559 | "df.drop(columns='Author',inplace=True)\n",
560 | "df"
561 | ]
562 | },
563 | {
564 | "cell_type": "code",
565 | "execution_count": 15,
566 | "metadata": {},
567 | "outputs": [
568 | {
569 | "name": "stderr",
570 | "output_type": "stream",
571 | "text": [
572 | "/var/folders/f0/lv4rn9cj3773mrlxlb1vmf380000gn/T/ipykernel_57740/2799276885.py:5: SettingWithCopyWarning:\n",
573 | "\n",
574 | "\n",
575 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
576 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
577 | "\n",
578 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
579 | "\n"
580 | ]
581 | },
582 | {
583 | "data": {
584 | "application/vnd.plotly.v1+json": {
585 | "config": {
586 | "plotlyServerURL": "https://plot.ly"
587 | },
588 | "data": [
589 | {
590 | "hovertemplate": "Date=%{x}
Positive Sentiment Score=%{y}",
591 | "legendgroup": "",
592 | "line": {
593 | "color": "#636efa",
594 | "dash": "solid"
595 | },
596 | "marker": {
597 | "symbol": "circle"
598 | },
599 | "mode": "lines+markers",
600 | "name": "",
601 | "orientation": "v",
602 | "showlegend": false,
603 | "type": "scatter",
604 | "x": [
605 | "2024-12-12",
606 | "2024-12-13",
607 | "2024-12-14",
608 | "2024-12-15",
609 | "2024-12-16",
610 | "2024-12-17",
611 | "2024-12-18",
612 | "2024-12-19",
613 | "2024-12-20",
614 | "2024-12-22",
615 | "2024-12-23",
616 | "2024-12-24",
617 | "2024-12-26",
618 | "2024-12-27",
619 | "2024-12-30",
620 | "2024-12-31"
621 | ],
622 | "xaxis": "x",
623 | "y": [
624 | 0.18181818181818182,
625 | 0.18181818181818182,
626 | 0,
627 | 0.5,
628 | 0.25,
629 | 0,
630 | 0,
631 | 0.3333333333333333,
632 | 0.75,
633 | 0.6666666666666666,
634 | 1,
635 | 0.5,
636 | 0.5,
637 | 1,
638 | 1,
639 | 0
640 | ],
641 | "yaxis": "y"
642 | }
643 | ],
644 | "layout": {
645 | "legend": {
646 | "tracegroupgap": 0
647 | },
648 | "template": {
649 | "data": {
650 | "bar": [
651 | {
652 | "error_x": {
653 | "color": "#2a3f5f"
654 | },
655 | "error_y": {
656 | "color": "#2a3f5f"
657 | },
658 | "marker": {
659 | "line": {
660 | "color": "#E5ECF6",
661 | "width": 0.5
662 | },
663 | "pattern": {
664 | "fillmode": "overlay",
665 | "size": 10,
666 | "solidity": 0.2
667 | }
668 | },
669 | "type": "bar"
670 | }
671 | ],
672 | "barpolar": [
673 | {
674 | "marker": {
675 | "line": {
676 | "color": "#E5ECF6",
677 | "width": 0.5
678 | },
679 | "pattern": {
680 | "fillmode": "overlay",
681 | "size": 10,
682 | "solidity": 0.2
683 | }
684 | },
685 | "type": "barpolar"
686 | }
687 | ],
688 | "carpet": [
689 | {
690 | "aaxis": {
691 | "endlinecolor": "#2a3f5f",
692 | "gridcolor": "white",
693 | "linecolor": "white",
694 | "minorgridcolor": "white",
695 | "startlinecolor": "#2a3f5f"
696 | },
697 | "baxis": {
698 | "endlinecolor": "#2a3f5f",
699 | "gridcolor": "white",
700 | "linecolor": "white",
701 | "minorgridcolor": "white",
702 | "startlinecolor": "#2a3f5f"
703 | },
704 | "type": "carpet"
705 | }
706 | ],
707 | "choropleth": [
708 | {
709 | "colorbar": {
710 | "outlinewidth": 0,
711 | "ticks": ""
712 | },
713 | "type": "choropleth"
714 | }
715 | ],
716 | "contour": [
717 | {
718 | "colorbar": {
719 | "outlinewidth": 0,
720 | "ticks": ""
721 | },
722 | "colorscale": [
723 | [
724 | 0,
725 | "#0d0887"
726 | ],
727 | [
728 | 0.1111111111111111,
729 | "#46039f"
730 | ],
731 | [
732 | 0.2222222222222222,
733 | "#7201a8"
734 | ],
735 | [
736 | 0.3333333333333333,
737 | "#9c179e"
738 | ],
739 | [
740 | 0.4444444444444444,
741 | "#bd3786"
742 | ],
743 | [
744 | 0.5555555555555556,
745 | "#d8576b"
746 | ],
747 | [
748 | 0.6666666666666666,
749 | "#ed7953"
750 | ],
751 | [
752 | 0.7777777777777778,
753 | "#fb9f3a"
754 | ],
755 | [
756 | 0.8888888888888888,
757 | "#fdca26"
758 | ],
759 | [
760 | 1,
761 | "#f0f921"
762 | ]
763 | ],
764 | "type": "contour"
765 | }
766 | ],
767 | "contourcarpet": [
768 | {
769 | "colorbar": {
770 | "outlinewidth": 0,
771 | "ticks": ""
772 | },
773 | "type": "contourcarpet"
774 | }
775 | ],
776 | "heatmap": [
777 | {
778 | "colorbar": {
779 | "outlinewidth": 0,
780 | "ticks": ""
781 | },
782 | "colorscale": [
783 | [
784 | 0,
785 | "#0d0887"
786 | ],
787 | [
788 | 0.1111111111111111,
789 | "#46039f"
790 | ],
791 | [
792 | 0.2222222222222222,
793 | "#7201a8"
794 | ],
795 | [
796 | 0.3333333333333333,
797 | "#9c179e"
798 | ],
799 | [
800 | 0.4444444444444444,
801 | "#bd3786"
802 | ],
803 | [
804 | 0.5555555555555556,
805 | "#d8576b"
806 | ],
807 | [
808 | 0.6666666666666666,
809 | "#ed7953"
810 | ],
811 | [
812 | 0.7777777777777778,
813 | "#fb9f3a"
814 | ],
815 | [
816 | 0.8888888888888888,
817 | "#fdca26"
818 | ],
819 | [
820 | 1,
821 | "#f0f921"
822 | ]
823 | ],
824 | "type": "heatmap"
825 | }
826 | ],
827 | "heatmapgl": [
828 | {
829 | "colorbar": {
830 | "outlinewidth": 0,
831 | "ticks": ""
832 | },
833 | "colorscale": [
834 | [
835 | 0,
836 | "#0d0887"
837 | ],
838 | [
839 | 0.1111111111111111,
840 | "#46039f"
841 | ],
842 | [
843 | 0.2222222222222222,
844 | "#7201a8"
845 | ],
846 | [
847 | 0.3333333333333333,
848 | "#9c179e"
849 | ],
850 | [
851 | 0.4444444444444444,
852 | "#bd3786"
853 | ],
854 | [
855 | 0.5555555555555556,
856 | "#d8576b"
857 | ],
858 | [
859 | 0.6666666666666666,
860 | "#ed7953"
861 | ],
862 | [
863 | 0.7777777777777778,
864 | "#fb9f3a"
865 | ],
866 | [
867 | 0.8888888888888888,
868 | "#fdca26"
869 | ],
870 | [
871 | 1,
872 | "#f0f921"
873 | ]
874 | ],
875 | "type": "heatmapgl"
876 | }
877 | ],
878 | "histogram": [
879 | {
880 | "marker": {
881 | "pattern": {
882 | "fillmode": "overlay",
883 | "size": 10,
884 | "solidity": 0.2
885 | }
886 | },
887 | "type": "histogram"
888 | }
889 | ],
890 | "histogram2d": [
891 | {
892 | "colorbar": {
893 | "outlinewidth": 0,
894 | "ticks": ""
895 | },
896 | "colorscale": [
897 | [
898 | 0,
899 | "#0d0887"
900 | ],
901 | [
902 | 0.1111111111111111,
903 | "#46039f"
904 | ],
905 | [
906 | 0.2222222222222222,
907 | "#7201a8"
908 | ],
909 | [
910 | 0.3333333333333333,
911 | "#9c179e"
912 | ],
913 | [
914 | 0.4444444444444444,
915 | "#bd3786"
916 | ],
917 | [
918 | 0.5555555555555556,
919 | "#d8576b"
920 | ],
921 | [
922 | 0.6666666666666666,
923 | "#ed7953"
924 | ],
925 | [
926 | 0.7777777777777778,
927 | "#fb9f3a"
928 | ],
929 | [
930 | 0.8888888888888888,
931 | "#fdca26"
932 | ],
933 | [
934 | 1,
935 | "#f0f921"
936 | ]
937 | ],
938 | "type": "histogram2d"
939 | }
940 | ],
941 | "histogram2dcontour": [
942 | {
943 | "colorbar": {
944 | "outlinewidth": 0,
945 | "ticks": ""
946 | },
947 | "colorscale": [
948 | [
949 | 0,
950 | "#0d0887"
951 | ],
952 | [
953 | 0.1111111111111111,
954 | "#46039f"
955 | ],
956 | [
957 | 0.2222222222222222,
958 | "#7201a8"
959 | ],
960 | [
961 | 0.3333333333333333,
962 | "#9c179e"
963 | ],
964 | [
965 | 0.4444444444444444,
966 | "#bd3786"
967 | ],
968 | [
969 | 0.5555555555555556,
970 | "#d8576b"
971 | ],
972 | [
973 | 0.6666666666666666,
974 | "#ed7953"
975 | ],
976 | [
977 | 0.7777777777777778,
978 | "#fb9f3a"
979 | ],
980 | [
981 | 0.8888888888888888,
982 | "#fdca26"
983 | ],
984 | [
985 | 1,
986 | "#f0f921"
987 | ]
988 | ],
989 | "type": "histogram2dcontour"
990 | }
991 | ],
992 | "mesh3d": [
993 | {
994 | "colorbar": {
995 | "outlinewidth": 0,
996 | "ticks": ""
997 | },
998 | "type": "mesh3d"
999 | }
1000 | ],
1001 | "parcoords": [
1002 | {
1003 | "line": {
1004 | "colorbar": {
1005 | "outlinewidth": 0,
1006 | "ticks": ""
1007 | }
1008 | },
1009 | "type": "parcoords"
1010 | }
1011 | ],
1012 | "pie": [
1013 | {
1014 | "automargin": true,
1015 | "type": "pie"
1016 | }
1017 | ],
1018 | "scatter": [
1019 | {
1020 | "fillpattern": {
1021 | "fillmode": "overlay",
1022 | "size": 10,
1023 | "solidity": 0.2
1024 | },
1025 | "type": "scatter"
1026 | }
1027 | ],
1028 | "scatter3d": [
1029 | {
1030 | "line": {
1031 | "colorbar": {
1032 | "outlinewidth": 0,
1033 | "ticks": ""
1034 | }
1035 | },
1036 | "marker": {
1037 | "colorbar": {
1038 | "outlinewidth": 0,
1039 | "ticks": ""
1040 | }
1041 | },
1042 | "type": "scatter3d"
1043 | }
1044 | ],
1045 | "scattercarpet": [
1046 | {
1047 | "marker": {
1048 | "colorbar": {
1049 | "outlinewidth": 0,
1050 | "ticks": ""
1051 | }
1052 | },
1053 | "type": "scattercarpet"
1054 | }
1055 | ],
1056 | "scattergeo": [
1057 | {
1058 | "marker": {
1059 | "colorbar": {
1060 | "outlinewidth": 0,
1061 | "ticks": ""
1062 | }
1063 | },
1064 | "type": "scattergeo"
1065 | }
1066 | ],
1067 | "scattergl": [
1068 | {
1069 | "marker": {
1070 | "colorbar": {
1071 | "outlinewidth": 0,
1072 | "ticks": ""
1073 | }
1074 | },
1075 | "type": "scattergl"
1076 | }
1077 | ],
1078 | "scattermapbox": [
1079 | {
1080 | "marker": {
1081 | "colorbar": {
1082 | "outlinewidth": 0,
1083 | "ticks": ""
1084 | }
1085 | },
1086 | "type": "scattermapbox"
1087 | }
1088 | ],
1089 | "scatterpolar": [
1090 | {
1091 | "marker": {
1092 | "colorbar": {
1093 | "outlinewidth": 0,
1094 | "ticks": ""
1095 | }
1096 | },
1097 | "type": "scatterpolar"
1098 | }
1099 | ],
1100 | "scatterpolargl": [
1101 | {
1102 | "marker": {
1103 | "colorbar": {
1104 | "outlinewidth": 0,
1105 | "ticks": ""
1106 | }
1107 | },
1108 | "type": "scatterpolargl"
1109 | }
1110 | ],
1111 | "scatterternary": [
1112 | {
1113 | "marker": {
1114 | "colorbar": {
1115 | "outlinewidth": 0,
1116 | "ticks": ""
1117 | }
1118 | },
1119 | "type": "scatterternary"
1120 | }
1121 | ],
1122 | "surface": [
1123 | {
1124 | "colorbar": {
1125 | "outlinewidth": 0,
1126 | "ticks": ""
1127 | },
1128 | "colorscale": [
1129 | [
1130 | 0,
1131 | "#0d0887"
1132 | ],
1133 | [
1134 | 0.1111111111111111,
1135 | "#46039f"
1136 | ],
1137 | [
1138 | 0.2222222222222222,
1139 | "#7201a8"
1140 | ],
1141 | [
1142 | 0.3333333333333333,
1143 | "#9c179e"
1144 | ],
1145 | [
1146 | 0.4444444444444444,
1147 | "#bd3786"
1148 | ],
1149 | [
1150 | 0.5555555555555556,
1151 | "#d8576b"
1152 | ],
1153 | [
1154 | 0.6666666666666666,
1155 | "#ed7953"
1156 | ],
1157 | [
1158 | 0.7777777777777778,
1159 | "#fb9f3a"
1160 | ],
1161 | [
1162 | 0.8888888888888888,
1163 | "#fdca26"
1164 | ],
1165 | [
1166 | 1,
1167 | "#f0f921"
1168 | ]
1169 | ],
1170 | "type": "surface"
1171 | }
1172 | ],
1173 | "table": [
1174 | {
1175 | "cells": {
1176 | "fill": {
1177 | "color": "#EBF0F8"
1178 | },
1179 | "line": {
1180 | "color": "white"
1181 | }
1182 | },
1183 | "header": {
1184 | "fill": {
1185 | "color": "#C8D4E3"
1186 | },
1187 | "line": {
1188 | "color": "white"
1189 | }
1190 | },
1191 | "type": "table"
1192 | }
1193 | ]
1194 | },
1195 | "layout": {
1196 | "annotationdefaults": {
1197 | "arrowcolor": "#2a3f5f",
1198 | "arrowhead": 0,
1199 | "arrowwidth": 1
1200 | },
1201 | "autotypenumbers": "strict",
1202 | "coloraxis": {
1203 | "colorbar": {
1204 | "outlinewidth": 0,
1205 | "ticks": ""
1206 | }
1207 | },
1208 | "colorscale": {
1209 | "diverging": [
1210 | [
1211 | 0,
1212 | "#8e0152"
1213 | ],
1214 | [
1215 | 0.1,
1216 | "#c51b7d"
1217 | ],
1218 | [
1219 | 0.2,
1220 | "#de77ae"
1221 | ],
1222 | [
1223 | 0.3,
1224 | "#f1b6da"
1225 | ],
1226 | [
1227 | 0.4,
1228 | "#fde0ef"
1229 | ],
1230 | [
1231 | 0.5,
1232 | "#f7f7f7"
1233 | ],
1234 | [
1235 | 0.6,
1236 | "#e6f5d0"
1237 | ],
1238 | [
1239 | 0.7,
1240 | "#b8e186"
1241 | ],
1242 | [
1243 | 0.8,
1244 | "#7fbc41"
1245 | ],
1246 | [
1247 | 0.9,
1248 | "#4d9221"
1249 | ],
1250 | [
1251 | 1,
1252 | "#276419"
1253 | ]
1254 | ],
1255 | "sequential": [
1256 | [
1257 | 0,
1258 | "#0d0887"
1259 | ],
1260 | [
1261 | 0.1111111111111111,
1262 | "#46039f"
1263 | ],
1264 | [
1265 | 0.2222222222222222,
1266 | "#7201a8"
1267 | ],
1268 | [
1269 | 0.3333333333333333,
1270 | "#9c179e"
1271 | ],
1272 | [
1273 | 0.4444444444444444,
1274 | "#bd3786"
1275 | ],
1276 | [
1277 | 0.5555555555555556,
1278 | "#d8576b"
1279 | ],
1280 | [
1281 | 0.6666666666666666,
1282 | "#ed7953"
1283 | ],
1284 | [
1285 | 0.7777777777777778,
1286 | "#fb9f3a"
1287 | ],
1288 | [
1289 | 0.8888888888888888,
1290 | "#fdca26"
1291 | ],
1292 | [
1293 | 1,
1294 | "#f0f921"
1295 | ]
1296 | ],
1297 | "sequentialminus": [
1298 | [
1299 | 0,
1300 | "#0d0887"
1301 | ],
1302 | [
1303 | 0.1111111111111111,
1304 | "#46039f"
1305 | ],
1306 | [
1307 | 0.2222222222222222,
1308 | "#7201a8"
1309 | ],
1310 | [
1311 | 0.3333333333333333,
1312 | "#9c179e"
1313 | ],
1314 | [
1315 | 0.4444444444444444,
1316 | "#bd3786"
1317 | ],
1318 | [
1319 | 0.5555555555555556,
1320 | "#d8576b"
1321 | ],
1322 | [
1323 | 0.6666666666666666,
1324 | "#ed7953"
1325 | ],
1326 | [
1327 | 0.7777777777777778,
1328 | "#fb9f3a"
1329 | ],
1330 | [
1331 | 0.8888888888888888,
1332 | "#fdca26"
1333 | ],
1334 | [
1335 | 1,
1336 | "#f0f921"
1337 | ]
1338 | ]
1339 | },
1340 | "colorway": [
1341 | "#636efa",
1342 | "#EF553B",
1343 | "#00cc96",
1344 | "#ab63fa",
1345 | "#FFA15A",
1346 | "#19d3f3",
1347 | "#FF6692",
1348 | "#B6E880",
1349 | "#FF97FF",
1350 | "#FECB52"
1351 | ],
1352 | "font": {
1353 | "color": "#2a3f5f"
1354 | },
1355 | "geo": {
1356 | "bgcolor": "white",
1357 | "lakecolor": "white",
1358 | "landcolor": "#E5ECF6",
1359 | "showlakes": true,
1360 | "showland": true,
1361 | "subunitcolor": "white"
1362 | },
1363 | "hoverlabel": {
1364 | "align": "left"
1365 | },
1366 | "hovermode": "closest",
1367 | "mapbox": {
1368 | "style": "light"
1369 | },
1370 | "paper_bgcolor": "white",
1371 | "plot_bgcolor": "#E5ECF6",
1372 | "polar": {
1373 | "angularaxis": {
1374 | "gridcolor": "white",
1375 | "linecolor": "white",
1376 | "ticks": ""
1377 | },
1378 | "bgcolor": "#E5ECF6",
1379 | "radialaxis": {
1380 | "gridcolor": "white",
1381 | "linecolor": "white",
1382 | "ticks": ""
1383 | }
1384 | },
1385 | "scene": {
1386 | "xaxis": {
1387 | "backgroundcolor": "#E5ECF6",
1388 | "gridcolor": "white",
1389 | "gridwidth": 2,
1390 | "linecolor": "white",
1391 | "showbackground": true,
1392 | "ticks": "",
1393 | "zerolinecolor": "white"
1394 | },
1395 | "yaxis": {
1396 | "backgroundcolor": "#E5ECF6",
1397 | "gridcolor": "white",
1398 | "gridwidth": 2,
1399 | "linecolor": "white",
1400 | "showbackground": true,
1401 | "ticks": "",
1402 | "zerolinecolor": "white"
1403 | },
1404 | "zaxis": {
1405 | "backgroundcolor": "#E5ECF6",
1406 | "gridcolor": "white",
1407 | "gridwidth": 2,
1408 | "linecolor": "white",
1409 | "showbackground": true,
1410 | "ticks": "",
1411 | "zerolinecolor": "white"
1412 | }
1413 | },
1414 | "shapedefaults": {
1415 | "line": {
1416 | "color": "#2a3f5f"
1417 | }
1418 | },
1419 | "ternary": {
1420 | "aaxis": {
1421 | "gridcolor": "white",
1422 | "linecolor": "white",
1423 | "ticks": ""
1424 | },
1425 | "baxis": {
1426 | "gridcolor": "white",
1427 | "linecolor": "white",
1428 | "ticks": ""
1429 | },
1430 | "bgcolor": "#E5ECF6",
1431 | "caxis": {
1432 | "gridcolor": "white",
1433 | "linecolor": "white",
1434 | "ticks": ""
1435 | }
1436 | },
1437 | "title": {
1438 | "x": 0.05
1439 | },
1440 | "xaxis": {
1441 | "automargin": true,
1442 | "gridcolor": "white",
1443 | "linecolor": "white",
1444 | "ticks": "",
1445 | "title": {
1446 | "standoff": 15
1447 | },
1448 | "zerolinecolor": "white",
1449 | "zerolinewidth": 2
1450 | },
1451 | "yaxis": {
1452 | "automargin": true,
1453 | "gridcolor": "white",
1454 | "linecolor": "white",
1455 | "ticks": "",
1456 | "title": {
1457 | "standoff": 15
1458 | },
1459 | "zerolinecolor": "white",
1460 | "zerolinewidth": 2
1461 | }
1462 | }
1463 | },
1464 | "title": {
1465 | "text": "Daily Positive Sentiment Score"
1466 | },
1467 | "xaxis": {
1468 | "anchor": "y",
1469 | "domain": [
1470 | 0,
1471 | 1
1472 | ],
1473 | "dtick": "D",
1474 | "tickformat": "%Y-%m-%d",
1475 | "title": {
1476 | "text": "Date"
1477 | }
1478 | },
1479 | "yaxis": {
1480 | "anchor": "x",
1481 | "domain": [
1482 | 0,
1483 | 1
1484 | ],
1485 | "title": {
1486 | "text": "Positive Sentiment Score"
1487 | }
1488 | }
1489 | }
1490 | }
1491 | },
1492 | "metadata": {},
1493 | "output_type": "display_data"
1494 | }
1495 | ],
1496 | "source": [
1497 | "# Filter out neutral sentiment\n",
1498 | "filtered_df = df[df['sentiment'] != 'neutral']\n",
1499 | "\n",
1500 | "# Extract the date (day only) and calculate daily positive sentiment score\n",
1501 | "filtered_df['Day'] = filtered_df['Date'].dt.date\n",
1502 | "daily_sentiment = (\n",
1503 | " filtered_df.groupby('Day')['sentiment']\n",
1504 | " .apply(lambda x: (x == 'positive').sum() / len(x))\n",
1505 | " .reset_index(name='positive_sentiment_score')\n",
1506 | ")\n",
1507 | "\n",
1508 | "# Plot the daily sentiment score\n",
1509 | "fig = px.line(\n",
1510 | " daily_sentiment,\n",
1511 | " x='Day',\n",
1512 | " y='positive_sentiment_score',\n",
1513 | " title='Daily Positive Sentiment Score',\n",
1514 | " labels={'positive_sentiment_score': 'Positive Sentiment Score', 'Day': 'Date'},\n",
1515 | " markers=True,\n",
1516 | ")\n",
1517 | "\n",
1518 | "fig.update_xaxes(dtick=\"D\", tickformat=\"%Y-%m-%d\")\n",
1519 | "\n",
1520 | "\n",
1521 | "fig"
1522 | ]
1523 | }
1524 | ],
1525 | "metadata": {
1526 | "kernelspec": {
1527 | "display_name": "general_env",
1528 | "language": "python",
1529 | "name": "python3"
1530 | },
1531 | "language_info": {
1532 | "codemirror_mode": {
1533 | "name": "ipython",
1534 | "version": 3
1535 | },
1536 | "file_extension": ".py",
1537 | "mimetype": "text/x-python",
1538 | "name": "python",
1539 | "nbconvert_exporter": "python",
1540 | "pygments_lexer": "ipython3",
1541 | "version": "3.12.3"
1542 | }
1543 | },
1544 | "nbformat": 4,
1545 | "nbformat_minor": 2
1546 | }
1547 |
--------------------------------------------------------------------------------
/ai_stocks_prediction.py:
--------------------------------------------------------------------------------
1 | ##############
2 | #### Deep Charts Youtube Channel: https://www.youtube.com/@DeepCharts
3 | #### Subscribe for more AI/Machine Learning/Quant Finance Tutorials
4 | ##############
5 |
6 |
7 | ##############
8 | ### PART 1 ###
9 | # LIBRARIES ##
10 | # & OLLAMA ##
11 | ##############
12 |
13 | # Data Importing Libraries
14 | import yfinance as yf
15 | from finvizfinance.quote import finvizfinance
16 |
17 | # Data Modeling Library
18 | from statsmodels.tsa.statespace.sarimax import SARIMAX
19 |
20 | # Charts
21 | import plotly.graph_objects as go
22 |
23 | # Data Manipulation
24 | import pandas as pd
25 | import numpy as np
26 |
27 | # Avoid Forecasting on Holidays
28 | import holidays
29 |
30 | # Create Local LLM Server Connection
31 | from langchain_community.llms import Ollama
32 |
33 | # Interactive Web App UI
34 | import streamlit as st
35 |
36 |
37 | # Connect to local Ollama server
38 | llm = Ollama(model='llama3')
39 |
40 |
41 | ##############
42 | ### PART 2 ###
43 | # FUNCTIONS ##
44 | ##############
45 |
46 | # Function to classify sentiment
47 | def classify_sentiment(title):
48 | output = llm.invoke(f"Classify the sentiment as 'POSITIVE' or 'NEGATIVE' or 'NEUTRAL' with just that one word only, no additional words or reasoning: {title}")
49 | return output.strip() # Ensure the response is clean and without extra spaces
50 |
51 | # Function to get and process news data
52 | def get_news_data(ticker):
53 |
54 | # Data Pull
55 | stock = finvizfinance(ticker)
56 | news_df = stock.ticker_news()
57 |
58 | # Preprocess before putting into LLM
59 | news_df['Title'] = news_df['Title'].str.lower()
60 |
61 | # Classify Sentiment function applied to each row of news_df
62 | news_df['sentiment'] = news_df['Title'].apply(classify_sentiment)
63 |
64 | # Postprocess after putting into LLM
65 | news_df['sentiment'] = news_df['sentiment'].str.upper()
66 | news_df = news_df[news_df['sentiment'] != 'NEUTRAL']
67 | news_df['Date'] = pd.to_datetime(news_df['Date'])
68 | news_df['DateOnly'] = news_df['Date'].dt.date
69 |
70 | return news_df
71 |
72 | # Function to group and process sentiment data
73 | def process_sentiment_data(news_df):
74 |
75 | # Reshape data to have df with columns: Date, # of positive Articles, # of negative Articles
76 | grouped = news_df.groupby(['DateOnly', 'sentiment']).size().unstack(fill_value=0)
77 | grouped = grouped.reindex(columns=['POSITIVE', 'NEGATIVE'], fill_value=0)
78 |
79 | # Create rolling averages that count number of positive and negative sentiment articles within past t days
80 | grouped['7day_avg_positive'] = grouped['POSITIVE'].rolling(window=7, min_periods=1).sum()
81 | grouped['7day_avg_negative'] = grouped['NEGATIVE'].rolling(window=7, min_periods=1).sum()
82 |
83 | # Create "Percent Positive" by creating percentage measure
84 | grouped['7day_pct_positive'] = grouped['POSITIVE'] / (grouped['POSITIVE'] + grouped['NEGATIVE'])
85 | result_df = grouped.reset_index()
86 |
87 | return result_df
88 |
89 | # Function to fetch and process stock data
90 | def get_stock_data(ticker, start_date, end_date):
91 | stock_data = yf.download(ticker, start=start_date, end=end_date) # Pull ticker data
92 | stock_data['Pct_Change'] = stock_data['Close'].pct_change() * 100 # Transform closing value to percent change in closing value since previous day
93 | return stock_data
94 |
95 | # Function to combine sentiment and stock data
96 | def combine_data(result_df, stock_data):
97 | combined_df = result_df.set_index('DateOnly').join(stock_data[['Pct_Change']], how='inner')
98 | combined_df['lagged_7day_pct_positive'] = combined_df['7day_pct_positive'].shift(1) # Lag sentiment feature by 1 day for temporal alignment
99 | return combined_df
100 |
101 | # Function to calculate Pearson correlation
102 | def calculate_correlation(combined_df):
103 | correlation_pct_change = combined_df[['lagged_7day_pct_positive', 'Pct_Change']].corr().iloc[0, 1]
104 | return correlation_pct_change
105 |
106 | # Function to get future dates excluding weekends and holidays
107 | def get_future_dates(start_date, num_days):
108 | us_holidays = holidays.US()
109 | future_dates = []
110 | current_date = start_date
111 | while len(future_dates) < num_days:
112 | if current_date.weekday() < 5 and current_date not in us_holidays:
113 | future_dates.append(current_date)
114 | current_date += pd.Timedelta(days=1)
115 | return future_dates
116 |
117 | # Function to fit ARIMAX model and forecast
118 | def fit_and_forecast(combined_df, forecast_steps=3):
119 | endog = combined_df['Pct_Change'].dropna() # Dependent variable
120 | exog = combined_df['lagged_7day_pct_positive'].dropna() # Predictor variable
121 | endog = endog.loc[exog.index] # Align variables
122 | model = SARIMAX(endog, exog=exog, order=(1, 1, 1)) # ARIMAX model
123 | fit = model.fit(disp=False) # Fit model
124 |
125 | future_dates = get_future_dates(combined_df.index[-1], forecast_steps) # Future dates
126 | future_exog = combined_df['lagged_7day_pct_positive'][-forecast_steps:].values.reshape(-1, 1) # Future exogenous values
127 |
128 | forecast = fit.get_forecast(steps=forecast_steps, exog=future_exog) # Get forecast
129 | forecast_mean = forecast.predicted_mean # Predicted mean
130 | forecast_ci = forecast.conf_int() # Confidence intervals
131 |
132 | return forecast_mean, forecast_ci, future_dates # Return results
133 |
134 |
135 | # Function to create and display plot
136 | def create_plot(combined_df, forecast_mean, forecast_ci, forecast_index):
137 | # Standardize the sentiment proportion
138 | sentiment_std = (combined_df['7day_pct_positive'] - combined_df['7day_pct_positive'].mean()) / combined_df['7day_pct_positive'].std()
139 |
140 | fig = go.Figure()
141 |
142 | # Add standardized sentiment proportion
143 | fig.add_trace(go.Scatter(
144 | x=combined_df.index,
145 | y=sentiment_std,
146 | name='Standardized Sentiment Proportion',
147 | line=dict(color='blue'),
148 | mode='lines'
149 | ))
150 |
151 | # Add stock percentage change
152 | fig.add_trace(go.Scatter(
153 | x=combined_df.index,
154 | y=combined_df['Pct_Change'],
155 | name='Stock Pct Change',
156 | line=dict(color='green'),
157 | yaxis='y2',
158 | mode='lines'
159 | ))
160 |
161 | # Add forecasted stock percentage change
162 | fig.add_trace(go.Scatter(
163 | x=forecast_index,
164 | y=forecast_mean,
165 | name='Forecasted Pct Change',
166 | line=dict(color='red'),
167 | mode='lines'
168 | ))
169 |
170 | # Add confidence intervals for the forecast
171 | fig.add_trace(go.Scatter(
172 | x=np.concatenate([forecast_index, forecast_index[::-1]]),
173 | y=np.concatenate([forecast_ci.iloc[:, 0], forecast_ci.iloc[:, 1][::-1]]),
174 | fill='toself',
175 | fillcolor='rgba(255,0,0,0.2)',
176 | line=dict(color='rgba(255,255,255,0)'),
177 | hoverinfo="skip",
178 | showlegend=False
179 | ))
180 |
181 | # Update layout with appropriate y-axis ranges
182 | fig.update_layout(
183 | title='Sentiment Proportion and Stock Percentage Change with Forecast',
184 | xaxis_title='Date',
185 | yaxis=dict(
186 | title='Standardized Sentiment Proportion',
187 | titlefont=dict(color='blue')
188 | ),
189 | yaxis2=dict(
190 | title='Stock Pct Change',
191 | titlefont=dict(color='green'),
192 | overlaying='y',
193 | side='right'
194 | ),
195 | template='plotly_dark'
196 | )
197 | st.plotly_chart(fig)
198 |
199 |
200 | ##############
201 | ### PART 3 ###
202 | # STREAMLIT ##
203 | ##############
204 |
205 | # Streamlit app
206 | st.sidebar.title("Predicting Stock Prices by News Sentiment")
207 | ticker = st.sidebar.text_input("Enter stock ticker (e.g., SBUX):", value='SBUX')
208 | run_button = st.sidebar.button("Run Analysis")
209 |
210 | if run_button:
211 | news_df = get_news_data(ticker)
212 | result_df = process_sentiment_data(news_df)
213 | start_date = result_df['DateOnly'].min().strftime('%Y-%m-%d')
214 | end_date = result_df['DateOnly'].max().strftime('%Y-%m-%d')
215 | stock_data = get_stock_data(ticker, start_date, end_date)
216 | combined_df = combine_data(result_df, stock_data)
217 | correlation_pct_change = calculate_correlation(combined_df)
218 | st.write(f'Pearson correlation between lagged sentiment score and stock percentage change: {correlation_pct_change}')
219 | forecast_mean, forecast_ci, forecast_index = fit_and_forecast(combined_df)
220 | create_plot(combined_df, forecast_mean, forecast_ci, forecast_index)
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
--------------------------------------------------------------------------------
/app_streamlit_app_builder_ai.py:
--------------------------------------------------------------------------------
1 | # Source: Deep Charts YouTube Channel (https://www.youtube.com/@DeepCharts)
2 |
3 | # Build Streamlit Apps Within a Streamlit App
4 | # Vibe coded with Gemini 2.5 Pro Experimental
5 |
6 | # Project Setup:
7 |
8 | # Create a project folder: e.g., streamlit_ide_prototype
9 | # Create a Python file: app.py inside the folder.
10 | # Create a sub-folder: workspace inside the project folder. This is where the AI will create/edit files.
11 | # Install libraries:
12 | # pip install streamlit google-generativeai python-dotenv stremalit-option-menu streamlit-ace streamlit-antd-components
13 | # API Key:
14 | # Get your Gemini API key (from Google AI Studio).
15 | # Create a file named .env in your project folder.
16 | # Add your API key to the .env file:
17 | # GOOGLE_API_KEY="YOUR_API_KEY_HERE"
18 | # Alternatively, for deployment, use Streamlit Secrets Management. For local testing, .env is often easier.
19 |
20 | ######
21 |
22 | import streamlit as st
23 | import google.generativeai as genai
24 | import os
25 | from pathlib import Path
26 | import json
27 | import time
28 | from dotenv import load_dotenv
29 | import subprocess # Needed to run other Streamlit apps (the preview)
30 | import socket # Needed to find an open network port for the preview
31 | import sys # Needed to get the path to the current Python executable
32 |
33 | # --- UI Components ---
34 | # These libraries provide pre-built UI elements like menus and the code editor.
35 | from streamlit_option_menu import option_menu
36 | from streamlit_ace import st_ace
37 | import streamlit_antd_components as sac # Using for specific buttons (Save/Delete group)
38 |
39 | # --- Configuration ---
40 | st.set_page_config(
41 | layout="wide",
42 | page_title="AI App Gen" # Shorter title
43 | )
44 | load_dotenv() # Load API keys from a file named .env in the same directory
45 |
46 | # --- Constants ---
47 | # Where generated Python app files will be saved
48 | WORKSPACE_DIR = Path("workspace_st_apps")
49 | WORKSPACE_DIR.mkdir(exist_ok=True) # Create the directory if it doesn't exist
50 |
51 | # Code editor appearance settings
52 | ACE_DEFAULT_THEME = "monokai"
53 | ACE_DEFAULT_KEYBINDING = "vscode"
54 |
55 | # Which Google AI model to use for generating code
56 | GEMINI_MODEL_NAME = "gemini-2.5-pro-exp-03-25"
57 |
58 | # Instructions for the Google AI model
59 | # This tells the AI how to format its responses (as JSON commands)
60 | GEMINI_SYSTEM_PROMPT = f"""
61 | You are an AI assistant helping create Streamlit applications.
62 | Your goal is to manage Python files in a workspace based on user requests.
63 | Respond *only* with a valid JSON array containing commands. Do not add any explanations before or after the JSON array.
64 |
65 | Available commands:
66 | 1. `{{"action": "create_update", "filename": "app_name.py", "content": "FULL_PYTHON_CODE_HERE"}}`
67 | - Use this to create a new Python file or completely overwrite an existing one.
68 | - Provide the *entire* file content. Escape backslashes (`\\\\`) and double quotes (`\\"`). Ensure newlines are `\\n`.
69 | - Do *not* include ```python markdown blocks or shebangs (`#!/usr/bin/env python`) in the "content".
70 | 2. `{{"action": "delete", "filename": "old_app.py"}}`
71 | - Use this to delete a Python file from the workspace.
72 | 3. `{{"action": "chat", "content": "Your message here."}}`
73 | - Use this *only* if you need to ask for clarification, report an issue you can't fix with file actions, or confirm understanding.
74 |
75 | Current Python files in workspace: {', '.join([f.name for f in WORKSPACE_DIR.iterdir() if f.is_file() and f.suffix == '.py']) if WORKSPACE_DIR.exists() else 'None'}
76 |
77 | Example Interaction:
78 | User: Create a simple hello world app called hello.py
79 | AI: `[{{"action": "create_update", "filename": "hello.py", "content": "import streamlit as st\\n\\nst.title('Hello World!')\\nst.write('This is a simple app.')"}}`
80 |
81 | Ensure your entire response is *only* the JSON array `[...]`.
82 | """
83 |
84 | # --- API Client Setup ---
85 | try:
86 | google_api_key = os.getenv("GOOGLE_API_KEY")
87 | if not google_api_key:
88 | # Stop the app if the API key is missing
89 | st.error("🔴 Google API Key not found. Please set `GOOGLE_API_KEY` in a `.env` file.")
90 | st.stop() # Halt execution
91 | # Configure the Gemini library with the key
92 | genai.configure(api_key=google_api_key)
93 | # Create the AI model object
94 | model = genai.GenerativeModel(GEMINI_MODEL_NAME)
95 | except Exception as e:
96 | st.error(f"🔴 Failed to set up Google AI: {e}")
97 | st.stop()
98 |
99 | # --- Session State ---
100 | # Streamlit reruns the script on interaction. Session state stores data
101 | # between reruns, like chat history or which file is selected.
102 | def initialize_session_state():
103 | """Sets up default values in Streamlit's session state dictionary."""
104 | state_defaults = {
105 | "messages": [], # List to store chat messages (user and AI)
106 | "selected_file": None, # Name of the file currently shown in the editor
107 | "file_content_on_load": "", # Content of the selected file when loaded (read-only)
108 | "preview_process": None, # Stores the running preview process object
109 | "preview_port": None, # Port number used by the preview
110 | "preview_url": None, # URL to access the preview
111 | "preview_file": None, # Name of the file being previewed
112 | "editor_unsaved_content": "", # Current text typed into the editor
113 | "last_saved_content": "", # Content that was last successfully saved to disk
114 | }
115 | for key, default_value in state_defaults.items():
116 | if key not in st.session_state:
117 | st.session_state[key] = default_value
118 |
119 | initialize_session_state() # Run the initialization
120 |
121 | # --- File System Functions ---
122 | def get_workspace_python_files():
123 | """Gets a list of all '.py' filenames in the workspace directory."""
124 | if not WORKSPACE_DIR.is_dir():
125 | return [] # Return empty list if directory doesn't exist
126 | try:
127 | # List files, filter for .py, sort alphabetically
128 | python_files = sorted([
129 | f.name for f in WORKSPACE_DIR.iterdir() if f.is_file() and f.suffix == '.py'
130 | ])
131 | return python_files
132 | except Exception as e:
133 | st.error(f"Error reading workspace directory: {e}")
134 | return []
135 |
136 | def read_file(filename):
137 | """Reads the text content of a file from the workspace."""
138 | if not filename: # Check if filename is provided
139 | return None
140 | # Prevent accessing files outside the workspace (basic security)
141 | if ".." in filename or filename.startswith(("/", "\\")):
142 | st.error(f"Invalid file path: {filename}")
143 | return None
144 |
145 | filepath = WORKSPACE_DIR / filename # Combine directory and filename
146 | try:
147 | with open(filepath, "r", encoding="utf-8") as f:
148 | return f.read() # Return the file's text content
149 | except FileNotFoundError:
150 | st.warning(f"File not found: {filename}")
151 | return None # Indicate file doesn't exist
152 | except Exception as e:
153 | st.error(f"Error reading file '{filename}': {e}")
154 | return None
155 |
156 | def save_file(filename, content):
157 | """Writes text content to a file in the workspace."""
158 | if not filename:
159 | return False # Cannot save without a filename
160 | if ".." in filename or filename.startswith(("/", "\\")):
161 | st.error(f"Invalid file path: {filename}")
162 | return False
163 |
164 | filepath = WORKSPACE_DIR / filename
165 | try:
166 | # Write the content to the file (overwrites if it exists)
167 | with open(filepath, "w", encoding="utf-8") as f:
168 | f.write(content)
169 | return True # Indicate success
170 | except Exception as e:
171 | st.error(f"Error saving file '{filename}': {e}")
172 | return False # Indicate failure
173 |
174 | def delete_file(filename):
175 | """Deletes a file from the workspace and updates app state."""
176 | if not filename:
177 | return False
178 | if ".." in filename or filename.startswith(("/", "\\")):
179 | st.error(f"Invalid file path: {filename}")
180 | return False
181 |
182 | filepath = WORKSPACE_DIR / filename
183 | try:
184 | if filepath.is_file():
185 | os.remove(filepath) # Delete the actual file
186 | st.toast(f"Deleted: {filename}", icon="🗑️")
187 |
188 | # If the deleted file was being previewed, stop the preview
189 | if st.session_state.preview_file == filename:
190 | stop_preview() # Call the function to stop the process
191 |
192 | # If the deleted file was selected in the editor, clear the selection
193 | if st.session_state.selected_file == filename:
194 | st.session_state.selected_file = None
195 | st.session_state.file_content_on_load = ""
196 | st.session_state.editor_unsaved_content = ""
197 | st.session_state.last_saved_content = ""
198 | return True # Indicate success
199 | else:
200 | st.warning(f"Could not delete: File '{filename}' not found.")
201 | return False
202 | except Exception as e:
203 | st.error(f"Error deleting file '{filename}': {e}")
204 | return False
205 |
206 | # --- AI Interaction Functions ---
207 |
208 | def _clean_ai_response_text(ai_response_text):
209 | """Removes potential code fences (```json ... ```) from AI response."""
210 | text = ai_response_text.strip()
211 | if text.startswith("```json"):
212 | text = text[7:-3].strip() # Remove ```json and ```
213 | elif text.startswith("```"):
214 | text = text[3:-3].strip() # Remove ``` and ```
215 | return text
216 |
217 | def parse_and_execute_ai_commands(ai_response_text):
218 | """
219 | Parses the AI's JSON response and performs the requested file actions.
220 | Returns the list of commands (for chat history display).
221 | """
222 | cleaned_text = _clean_ai_response_text(ai_response_text)
223 | executed_commands_list = [] # To store commands for chat display
224 |
225 | try:
226 | # Attempt to convert the cleaned text into a Python list of dictionaries
227 | commands = json.loads(cleaned_text)
228 |
229 | # Check if the result is actually a list
230 | if not isinstance(commands, list):
231 | st.error("AI response was valid JSON, but not a list of commands.")
232 | # Return a chat message indicating the error for display
233 | return [{"action": "chat", "content": f"AI Error: Response was not a list. Response: {cleaned_text}"}]
234 |
235 | # Process each command dictionary in the list
236 | for command_data in commands:
237 | # Ensure the command is a dictionary before processing
238 | if not isinstance(command_data, dict):
239 | st.warning(f"AI sent an invalid command format (not a dict): {command_data}")
240 | executed_commands_list.append({"action": "chat", "content": f"AI Error: Invalid command format: {command_data}"})
241 | continue # Skip to the next command
242 |
243 | # Add the command to the list we return (used for displaying AI actions)
244 | executed_commands_list.append(command_data)
245 |
246 | # Get action details from the dictionary
247 | action = command_data.get("action")
248 | filename = command_data.get("filename")
249 | content = command_data.get("content")
250 |
251 | # --- Execute the action ---
252 | if action == "create_update":
253 | if filename and content is not None:
254 | success = save_file(filename, content)
255 | if success:
256 | st.toast(f"AI saved: {filename}", icon="💾")
257 | # If this file is currently open in the editor, update the editor's content
258 | if st.session_state.selected_file == filename:
259 | st.session_state.file_content_on_load = content
260 | st.session_state.last_saved_content = content
261 | st.session_state.editor_unsaved_content = content
262 | else:
263 | st.error(f"AI command failed: Could not save '{filename}'.")
264 | # Add error details to chat display list
265 | executed_commands_list.append({"action": "chat", "content": f"Error: Failed saving {filename}"})
266 | else:
267 | st.warning("AI 'create_update' command missing filename or content.")
268 | executed_commands_list.append({"action": "chat", "content": "AI Warning: Invalid create_update"})
269 |
270 | elif action == "delete":
271 | if filename:
272 | success = delete_file(filename)
273 | if not success:
274 | st.error(f"AI command failed: Could not delete '{filename}'.")
275 | executed_commands_list.append({"action": "chat", "content": f"Error: Failed deleting {filename}"})
276 | else:
277 | st.warning("AI 'delete' command missing filename.")
278 | executed_commands_list.append({"action": "chat", "content": "AI Warning: Invalid delete"})
279 |
280 | elif action == "chat":
281 | # No action needed here, the chat message is already in executed_commands_list
282 | # and will be displayed in the chat history.
283 | pass
284 |
285 | else:
286 | # Handle unrecognized actions from the AI
287 | st.warning(f"AI sent unknown action: '{action}'.")
288 | executed_commands_list.append({"action": "chat", "content": f"AI Warning: Unknown action '{action}'"})
289 |
290 | return executed_commands_list # Return the list for chat display
291 |
292 | except json.JSONDecodeError:
293 | st.error(f"AI response was not valid JSON.\nRaw response:\n```\n{cleaned_text}\n```")
294 | # Return a chat message indicating the JSON error for display
295 | return [{"action": "chat", "content": f"AI Error: Invalid JSON received. Response: {ai_response_text}"}]
296 | except Exception as e:
297 | st.error(f"Error processing AI commands: {e}")
298 | return [{"action": "chat", "content": f"Error processing commands: {e}"}]
299 |
300 | def _prepare_gemini_history(chat_history, system_prompt):
301 | """Formats chat history for the Gemini API call."""
302 | gemini_history = []
303 | # Start with the system prompt (instructions for the AI)
304 | gemini_history.append({"role": "user", "parts": [{"text": system_prompt}]})
305 | # Gemini requires a model response to start the turn properly after a system prompt
306 | gemini_history.append({"role": "model", "parts": [{"text": json.dumps([{"action": "chat", "content": "Understood. I will respond only with JSON commands."}])}]})
307 |
308 | # Add the actual user/assistant messages from session state
309 | for msg in chat_history:
310 | role = msg["role"] # "user" or "assistant"
311 | content = msg["content"]
312 | api_role = "model" if role == "assistant" else "user" # Map to API roles
313 |
314 | # Convert assistant messages (which are lists of commands) back to JSON strings
315 | if role == "assistant" and isinstance(content, list):
316 | try:
317 | content_str = json.dumps(content)
318 | except Exception:
319 | content_str = str(content) # Fallback if conversion fails
320 | else:
321 | content_str = str(content) # User messages are already strings
322 |
323 | if content_str: # Avoid sending empty messages
324 | gemini_history.append({"role": api_role, "parts": [{"text": content_str}]})
325 |
326 | return gemini_history
327 |
328 | def ask_gemini_ai(chat_history):
329 | """Sends the conversation history to the Gemini AI and returns its response."""
330 |
331 | # Get current list of files to include in the prompt context
332 | current_files = get_workspace_python_files()
333 | file_list_info = f"Current Python files: {', '.join(current_files) if current_files else 'None'}"
334 | # Update the system prompt with the current file list
335 | updated_system_prompt = GEMINI_SYSTEM_PROMPT.replace(
336 | "Current Python files: ...", # Placeholder text to replace
337 | file_list_info
338 | )
339 |
340 | # Prepare the history in the format the API expects
341 | gemini_api_history = _prepare_gemini_history(chat_history, updated_system_prompt)
342 |
343 | try:
344 | # Make the API call to Google
345 | # print(f"DEBUG: Sending history:\n{json.dumps(gemini_api_history, indent=2)}") # Uncomment for debugging API calls
346 | response = model.generate_content(gemini_api_history)
347 | # print(f"DEBUG: Received response:\n{response.text}") # Uncomment for debugging API calls
348 | return response.text # Return the AI's raw text response
349 |
350 | except Exception as e:
351 | # Handle potential errors during the API call
352 | error_message = f"Gemini API call failed: {type(e).__name__}"
353 | st.error(f"🔴 {error_message}: {e}")
354 |
355 | # Try to give a more user-friendly error message for common issues
356 | error_content = f"AI Error: {str(e)[:150]}..." # Default message
357 | if "API key not valid" in str(e):
358 | error_content = "AI Error: Invalid Google API Key."
359 | elif "429" in str(e) or "quota" in str(e).lower() or "resource has been exhausted" in str(e).lower():
360 | error_content = "AI Error: API Quota or Rate Limit Exceeded."
361 | # Handle cases where the AI's response might be blocked for safety
362 | try:
363 | if response and response.prompt_feedback and response.prompt_feedback.block_reason:
364 | error_content = f"AI Error: Input blocked by safety filters ({response.prompt_feedback.block_reason})."
365 | elif response and response.candidates and response.candidates[0].finish_reason != 'STOP':
366 | error_content = f"AI Error: Response stopped ({response.candidates[0].finish_reason}). May be due to safety filters or length limits."
367 | except Exception:
368 | pass # Ignore errors during safety check parsing
369 |
370 | # Return the error as a JSON chat command so it appears in the chat history
371 | return json.dumps([{"action": "chat", "content": error_content}])
372 |
373 | # --- Live Preview Process Management ---
374 | def _find_available_port():
375 | """Finds an unused network port."""
376 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
377 | s.bind(('', 0)) # Bind to port 0 to let the OS choose a free port
378 | return s.getsockname()[1] # Return the chosen port number
379 |
380 | def stop_preview():
381 | """Stops the currently running Streamlit preview process."""
382 | process_to_stop = st.session_state.get("preview_process")
383 | pid = getattr(process_to_stop, 'pid', None) # Get process ID if available
384 |
385 | if process_to_stop and pid:
386 | st.info(f"Stopping preview process (PID: {pid})...")
387 | try:
388 | # Check if the process is still running
389 | if process_to_stop.poll() is None:
390 | # Ask the process to terminate gracefully
391 | process_to_stop.terminate()
392 | try:
393 | # Wait up to 3 seconds for it to close
394 | process_to_stop.wait(timeout=3)
395 | st.toast(f"Preview process {pid} stopped.", icon="⏹️")
396 | except subprocess.TimeoutExpired:
397 | # If it didn't stop, force kill it
398 | st.warning(f"Preview process {pid} did not stop gracefully, killing...")
399 | if process_to_stop.poll() is None: # Check again before kill
400 | process_to_stop.kill()
401 | process_to_stop.wait(timeout=1) # Brief wait for kill
402 | st.toast(f"Preview process {pid} killed.", icon="💀")
403 | else:
404 | # Process was already finished
405 | st.warning(f"Preview process {pid} had already stopped.")
406 | except ProcessLookupError:
407 | st.warning(f"Preview process {pid} not found (already gone?).")
408 | except Exception as e:
409 | st.error(f"Error trying to stop preview process {pid}: {e}")
410 |
411 | # Always clear the preview state variables after attempting to stop
412 | st.session_state.preview_process = None
413 | st.session_state.preview_port = None
414 | st.session_state.preview_url = None
415 | st.session_state.preview_file = None
416 | st.rerun() # Update the UI immediately
417 |
418 | def start_preview(python_filename):
419 | """Starts a Streamlit app preview in a separate process."""
420 | filepath = WORKSPACE_DIR / python_filename
421 | # Basic check: ensure the file exists and is a Python file
422 | if not filepath.is_file() or filepath.suffix != '.py':
423 | st.error(f"Cannot preview: '{python_filename}' is not a valid Python file.")
424 | return False
425 |
426 | # Stop any currently running preview first
427 | if st.session_state.get("preview_process"):
428 | st.warning("Stopping existing preview first...")
429 | stop_preview() # This function will rerun, so we might need to adjust flow
430 | # Let's add a small delay here AFTER stop_preview (which reruns) handles its part.
431 | # This might mean the button needs to be clicked twice sometimes, but simplifies state.
432 | # A more complex approach would involve flags in session state.
433 | time.sleep(0.5) # Brief pause
434 |
435 | with st.spinner(f"Starting preview for `{python_filename}`..."):
436 | try:
437 | port = _find_available_port()
438 | # Command to run: python -m streamlit run --port [options]
439 | command = [
440 | sys.executable, # Use the same Python interpreter running this script
441 | "-m", "streamlit", "run",
442 | str(filepath.resolve()), # Use the full path to the file
443 | "--server.port", str(port),
444 | "--server.headless", "true", # Don't open a browser automatically
445 | "--server.runOnSave", "false", # Don't automatically rerun on save
446 | "--server.fileWatcherType", "none" # Don't watch for file changes
447 | ]
448 |
449 | # Start the command as a new process
450 | preview_proc = subprocess.Popen(
451 | command,
452 | stdout=subprocess.PIPE, # Capture output (optional)
453 | stderr=subprocess.PIPE, # Capture errors
454 | text=True, encoding='utf-8'
455 | )
456 |
457 | # Give Streamlit a moment to start up or fail
458 | time.sleep(2.5) # Wait a bit
459 |
460 | # Check if the process started successfully (is still running)
461 | if preview_proc.poll() is None:
462 | # Success! Store process info in session state
463 | st.session_state.preview_process = preview_proc
464 | st.session_state.preview_port = port
465 | st.session_state.preview_url = f"http://localhost:{port}"
466 | st.session_state.preview_file = python_filename
467 | st.success(f"Preview started: {st.session_state.preview_url}")
468 | st.toast(f"Preview running for {python_filename}", icon="🚀")
469 | return True
470 | else:
471 | # Failure: Process ended quickly, likely an error
472 | st.error(f"Preview failed to start for `{python_filename}`.")
473 | # Try to show error output from the failed process
474 | try:
475 | stderr_output = preview_proc.stderr.read()
476 | if stderr_output:
477 | st.error("Preview Error Output:")
478 | st.code(stderr_output, language=None)
479 | else: # If no stderr, maybe there was stdout?
480 | stdout_output = preview_proc.stdout.read()
481 | if stdout_output:
482 | st.error("Preview Output (may contain errors):")
483 | st.code(stdout_output, language=None)
484 | except Exception as read_e:
485 | st.error(f"Could not read output from failed preview process: {read_e}")
486 | # Clear any partial state
487 | st.session_state.preview_process = None
488 | return False
489 | except Exception as e:
490 | st.error(f"Error trying to start preview process: {e}")
491 | st.session_state.preview_process = None # Ensure clean state
492 | return False
493 |
494 | # --- Streamlit App UI ---
495 |
496 | st.title("🤖 AI Streamlit App Generator")
497 |
498 | # --- Sidebar ---
499 | with st.sidebar:
500 | st.header("💬 Chat & Controls")
501 | st.divider()
502 |
503 | # --- Chat History Display ---
504 | chat_container = st.container(height=400)
505 | with chat_container:
506 | if not st.session_state.messages:
507 | st.info("Chat history is empty. Type your instructions below.")
508 | else:
509 | # Loop through messages stored in session state
510 | for message in st.session_state.messages:
511 | role = message["role"] # "user" or "assistant"
512 | content = message["content"]
513 | avatar = "🧑💻" if role == "user" else "🤖"
514 |
515 | # Display message using Streamlit's chat message element
516 | with st.chat_message(role, avatar=avatar):
517 | if role == "assistant" and isinstance(content, list):
518 | # Assistant message contains commands - format them nicely
519 | file_actions_summary = ""
520 | chat_responses = []
521 | code_snippets = []
522 |
523 | for command in content:
524 | if not isinstance(command, dict): continue # Skip malformed
525 |
526 | action = command.get("action")
527 | filename = command.get("filename")
528 | cmd_content = command.get("content")
529 |
530 | if action == "create_update":
531 | file_actions_summary += f"📝 **Saved:** `{filename}`\n"
532 | if cmd_content:
533 | code_snippets.append({"filename": filename, "content": cmd_content})
534 | elif action == "delete":
535 | file_actions_summary += f"🗑️ **Deleted:** `{filename}`\n"
536 | elif action == "chat":
537 | chat_responses.append(str(cmd_content or "..."))
538 | else:
539 | file_actions_summary += f"⚠️ **Unknown Action:** `{action}`\n"
540 |
541 | # Display the formatted summary and chat responses
542 | full_display_text = (file_actions_summary + "\n".join(chat_responses)).strip()
543 | if full_display_text:
544 | st.markdown(full_display_text)
545 | else: # Handle cases where AI might return empty actions
546 | st.markdown("(AI performed no displayable actions)")
547 |
548 | # Show code snippets in collapsible sections
549 | for snippet in code_snippets:
550 | with st.expander(f"View Code for `{snippet['filename']}`", expanded=False):
551 | st.code(snippet['content'], language="python")
552 |
553 | elif isinstance(content, str):
554 | # Simple text message (from user or AI chat action)
555 | st.write(content)
556 | else:
557 | # Fallback for unexpected content type
558 | st.write(f"Unexpected message format: {content}")
559 |
560 | # --- Chat Input Box ---
561 | user_prompt = st.chat_input("Tell the AI what to do (e.g., 'Create hello.py')")
562 | if user_prompt:
563 | # 1. Add user's message to the chat history (in session state)
564 | st.session_state.messages.append({"role": "user", "content": user_prompt})
565 |
566 | # 2. Show a spinner while waiting for the AI
567 | with st.spinner("🧠 AI Thinking..."):
568 | # 3. Send the *entire* chat history to the AI
569 | ai_response_text = ask_gemini_ai(st.session_state.messages)
570 | # 4. Parse the AI's response and execute file commands
571 | ai_commands_executed = parse_and_execute_ai_commands(ai_response_text)
572 |
573 | # 5. Add the AI's response (the list of executed commands) to chat history
574 | st.session_state.messages.append({"role": "assistant", "content": ai_commands_executed})
575 |
576 | # 6. Rerun the script immediately to show the new messages and update file list/editor
577 | st.rerun()
578 |
579 | st.divider()
580 |
581 | # --- Status Info ---
582 | st.subheader("Status & Info")
583 | st.success(f"Using AI model: {GEMINI_MODEL_NAME}", icon="✅")
584 | st.warning(
585 | "**Notes:** Review AI code before running previews. `create_update` overwrites files.",
586 | )
587 |
588 |
589 | # --- Main Area Tabs ---
590 | selected_tab = option_menu(
591 | menu_title=None,
592 | options=["Workspace", "Live Preview"],
593 | icons=["folder-fill", "play-btn-fill"],
594 | orientation="horizontal",
595 | key="main_tab_menu"
596 | # Removed custom styles for simplicity
597 | )
598 |
599 | # --- Workspace Tab ---
600 | if selected_tab == "Workspace":
601 | st.header("📂 Workspace & Editor")
602 | st.divider()
603 |
604 | # Create two columns: one for file list, one for editor
605 | file_list_col, editor_col = st.columns([0.3, 0.7]) # 30% width for files, 70% for editor
606 |
607 | with file_list_col:
608 | st.subheader("Files")
609 | python_files = get_workspace_python_files()
610 |
611 | # Prepare options for the dropdown menu
612 | select_options = ["--- Select a file ---"] + python_files
613 | current_selection_in_state = st.session_state.get("selected_file")
614 |
615 | # Find the index of the currently selected file to set the dropdown default
616 | try:
617 | current_index = select_options.index(current_selection_in_state) if current_selection_in_state else 0
618 | except ValueError:
619 | current_index = 0 # If file in state doesn't exist, default to "Select"
620 |
621 | # The dropdown widget
622 | selected_option = st.selectbox(
623 | "Edit file:",
624 | options=select_options,
625 | index=current_index,
626 | key="file_selector_dropdown",
627 | label_visibility="collapsed" # Hide the label "Edit file:"
628 | )
629 |
630 | # --- Handle File Selection Change ---
631 | # If the dropdown selection is different from what's stored in session state...
632 | newly_selected_filename = selected_option if selected_option != "--- Select a file ---" else None
633 | if newly_selected_filename != current_selection_in_state:
634 | st.session_state.selected_file = newly_selected_filename # Update state
635 | # Read the content of the newly selected file
636 | file_content = read_file(newly_selected_filename) if newly_selected_filename else ""
637 | # Handle case where file read failed (e.g., it was deleted)
638 | if file_content is None and newly_selected_filename:
639 | file_content = f"# ERROR: Could not read file '{newly_selected_filename}'"
640 |
641 | # Update session state with the file's content for the editor
642 | st.session_state.file_content_on_load = file_content
643 | st.session_state.editor_unsaved_content = file_content # Start editor with file content
644 | st.session_state.last_saved_content = file_content # Mark as saved initially
645 | st.rerun() # Rerun script to load the new file into the editor
646 |
647 | with editor_col:
648 | st.subheader("Code Editor")
649 | selected_filename = st.session_state.selected_file
650 |
651 | if selected_filename:
652 | st.caption(f"Editing: `{selected_filename}`")
653 |
654 | # Display the Ace code editor widget
655 | editor_current_text = st_ace(
656 | value=st.session_state.get('editor_unsaved_content', ''), # Show unsaved content
657 | language="python",
658 | theme=ACE_DEFAULT_THEME,
659 | keybinding=ACE_DEFAULT_KEYBINDING,
660 | font_size=14, tab_size=4, wrap=True,
661 | auto_update=False, # Don't trigger reruns on every keystroke
662 | key=f"ace_editor_{selected_filename}" # Unique key helps reset state on file change
663 | )
664 |
665 | # Check if the editor's current text is different from the last saved text
666 | has_unsaved_changes = (editor_current_text != st.session_state.last_saved_content)
667 |
668 | # If the text in the editor box changes, update our 'unsaved' state variable
669 | if editor_current_text != st.session_state.editor_unsaved_content:
670 | st.session_state.editor_unsaved_content = editor_current_text
671 | st.rerun() # Rerun to update the 'Save Changes' button state
672 |
673 | # --- Editor Action Buttons ---
674 | # Using sac.buttons here for the nice grouped layout with icons.
675 | editor_buttons = [
676 | sac.ButtonsItem(label="💾 Save Changes", icon="save", disabled=not has_unsaved_changes),
677 | sac.ButtonsItem(label="🗑️ Delete File", icon="trash", color="red"),
678 | ]
679 | clicked_editor_button = sac.buttons(
680 | items=editor_buttons, index=None, format_func='title',
681 | align='end', size='small', return_index=False,
682 | key="editor_action_buttons"
683 | )
684 |
685 | # --- Handle Button Clicks ---
686 | if clicked_editor_button == "💾 Save Changes":
687 | if save_file(selected_filename, editor_current_text):
688 | # Update state to reflect the save
689 | st.session_state.file_content_on_load = editor_current_text
690 | st.session_state.last_saved_content = editor_current_text
691 | st.toast(f"Saved: `{selected_filename}`", icon="💾")
692 | time.sleep(0.5) # Let toast message show
693 | st.rerun() # Rerun to disable the save button
694 | else:
695 | st.error(f"Error: Could not save '{selected_filename}'.")
696 |
697 | elif clicked_editor_button == "🗑️ Delete File":
698 | # Use sac.confirm_button for a confirmation pop-up
699 | needs_confirmation = True # Flag to show confirmation
700 | if needs_confirmation:
701 | confirmed = sac.confirm_button(
702 | f"Delete `{selected_filename}`?", # Confirmation message
703 | color="error", key="confirm_delete_button"
704 | )
705 | if confirmed:
706 | if delete_file(selected_filename):
707 | # Deletion successful, file list and editor will update on rerun
708 | st.rerun()
709 | # No 'else' needed, delete_file shows errors
710 |
711 | # Show a warning if there are unsaved changes
712 | if has_unsaved_changes:
713 | st.warning("You have unsaved changes.")
714 |
715 | else:
716 | # Show a placeholder message if no file is selected
717 | st.info("Select a Python file from the list on the left to view or edit.")
718 | st_ace(value="# Select a file...", language="python", readonly=True, key="ace_placeholder")
719 |
720 | # --- Live Preview Tab ---
721 | elif selected_tab == "Live Preview":
722 | st.header("▶️ Live Preview")
723 | st.divider()
724 | st.warning("⚠️ Running AI-generated code can have unintended consequences. Review code first!")
725 |
726 | # Get preview status from session state
727 | is_preview_running = st.session_state.get("preview_process") is not None
728 | file_being_previewed = st.session_state.get("preview_file")
729 | preview_url = st.session_state.get("preview_url")
730 | selected_file_for_preview = st.session_state.get("selected_file") # File selected in Workspace
731 |
732 | # --- Preview Controls ---
733 | st.subheader("Controls")
734 | if not selected_file_for_preview:
735 | st.info("Select a file in the 'Workspace' tab to enable preview controls.")
736 | # Allow stopping a preview even if no file is selected
737 | if is_preview_running:
738 | st.warning(f"Preview is running for: `{file_being_previewed}`")
739 | if st.button(f"⏹️ Stop Preview ({file_being_previewed})", key="stop_other_preview"):
740 | stop_preview() # Will stop and rerun
741 | else:
742 | # Controls for the file selected in the Workspace
743 | st.write(f"File selected for preview: `{selected_file_for_preview}`")
744 | is_python = selected_file_for_preview.endswith(".py")
745 |
746 | if not is_python:
747 | st.error("Cannot preview: Selected file is not a Python (.py) file.")
748 | else:
749 | # Layout Run and Stop buttons side-by-side
750 | run_col, stop_col = st.columns(2)
751 | with run_col:
752 | # Disable Run button if a preview is already running
753 | run_disabled = is_preview_running
754 | if st.button("🚀 Run Preview", disabled=run_disabled, type="primary", use_container_width=True):
755 | if start_preview(selected_file_for_preview):
756 | st.rerun() # Rerun to show the preview iframe
757 | with stop_col:
758 | # Disable Stop button if no preview is running OR if the running preview
759 | # is for a DIFFERENT file than the one currently selected in the workspace.
760 | stop_disabled = not is_preview_running or (file_being_previewed != selected_file_for_preview)
761 | if st.button("⏹️ Stop Preview", disabled=stop_disabled, use_container_width=True):
762 | stop_preview() # Will stop and rerun
763 |
764 | st.divider()
765 |
766 | # --- Preview Display ---
767 | st.subheader("Preview Window")
768 | if is_preview_running:
769 | # Check if the running preview matches the file selected in the workspace
770 | if file_being_previewed == selected_file_for_preview:
771 | st.info(f"Showing preview for `{file_being_previewed}`")
772 | st.caption(f"URL: {preview_url}")
773 | # Check if the process is still alive before showing iframe
774 | live_process = st.session_state.preview_process
775 | if live_process and live_process.poll() is None:
776 | # Display the running Streamlit app in an iframe
777 | st.components.v1.iframe(preview_url, height=600, scrolling=True)
778 | else:
779 | # The process died unexpectedly
780 | st.warning(f"Preview for `{file_being_previewed}` stopped unexpectedly.")
781 | # Attempt to show error output if available
782 | if live_process:
783 | try:
784 | stderr = live_process.stderr.read()
785 | if stderr:
786 | with st.expander("Show error output from stopped process"): st.code(stderr)
787 | except Exception: pass # Ignore errors reading output
788 | # Clear the dead process state (stop_preview handles this and reruns)
789 | if live_process: # Check again in case state changed
790 | stop_preview()
791 | else:
792 | # A preview is running, but not for the file selected in the workspace
793 | st.warning(f"Preview is running for `{file_being_previewed}`. Select that file in the Workspace to see it here, or stop it using the controls above.")
794 | else:
795 | # No preview is currently running
796 | st.info("Click 'Run Preview' on a selected Python file to see it here.")
--------------------------------------------------------------------------------
/fast.py:
--------------------------------------------------------------------------------
1 | ## Deep Charts Youtube Channel: https://www.youtube.com/@DeepCharts
2 | ## Subscribe for more AI/Machine Learning/Data Science Tutorials
3 |
4 | ##################################
5 | ## 1. Data Import
6 | ##################################
7 |
8 | import os
9 | import markdown
10 | import pandas as pd
11 | from fasthtml.common import *
12 | from fastcore.basics import NotStr
13 | import plotly.express as px
14 | import nfl_data_py as nfl
15 |
16 |
17 |
18 | ##################################
19 | ## 2. Initialize FastHTML app
20 | ##################################
21 |
22 | app, rt = fast_app()
23 |
24 |
25 |
26 | ##################################
27 | ## 3. Input and Process Markdown Blog Files
28 | ##################################
29 |
30 | # Directory containing Markdown files
31 | POSTS_DIR = 'posts'
32 |
33 | # Load and convert Markdown files to HTML
34 | def load_posts():
35 | posts = []
36 | # List all Markdown files with their full paths
37 | md_files = [os.path.join(POSTS_DIR, f) for f in os.listdir(POSTS_DIR) if f.endswith('.md')]
38 | # Sort files by last modified time in descending order
39 | md_files.sort(key=os.path.getmtime, reverse=True)
40 | for filepath in md_files:
41 | with open(filepath, 'r', encoding='utf-8') as file:
42 | html_content = markdown.markdown(file.read())
43 | title = os.path.basename(filepath).replace('_', ' ').replace('.md', '').title()
44 | posts.append({"title": title, "content": html_content})
45 | return posts
46 |
47 |
48 |
49 | ##################################
50 | ## 4. Function to import, wrangle, and graph data
51 | ##################################
52 |
53 | # Generate NFL Cumulative Offensive Yards Chart
54 | def generate_offensive_yards_chart():
55 | # Fetch play-by-play data for the 2024 season
56 | df = nfl.import_pbp_data([2024])
57 |
58 | # Filter for rushing and passing plays
59 | rushing_plays = df[df['play_type'] == 'run']
60 | passing_plays = df[df['play_type'] == 'pass']
61 |
62 | # Group by offensive team and week, then sum yards gained
63 | weekly_rushing_yards = rushing_plays.groupby(['posteam', 'week'])['yards_gained'].sum().reset_index()
64 | weekly_passing_yards = passing_plays.groupby(['posteam', 'week'])['yards_gained'].sum().reset_index()
65 |
66 | # Add a 'play_type' column
67 | weekly_rushing_yards['play_type'] = 'Rushing'
68 | weekly_passing_yards['play_type'] = 'Passing'
69 |
70 | # Combine the dataframes
71 | combined_df = pd.concat([weekly_rushing_yards, weekly_passing_yards])
72 |
73 | # Pivot the table to have teams as columns and weeks as rows
74 | pivot_df = combined_df.pivot_table(index='week', columns=['posteam', 'play_type'], values='yards_gained', fill_value=0)
75 |
76 | # Calculate cumulative yards
77 | cumulative_yards = pivot_df.cumsum()
78 |
79 | # Reset index for plotting
80 | cumulative_yards = cumulative_yards.reset_index()
81 | cumulative_yards.columns = ['week'] + [f'{team}_{ptype}' for team, ptype in cumulative_yards.columns[1:]]
82 |
83 | # Melt the dataframe for Plotly Express
84 | melted_df = cumulative_yards.melt(id_vars=['week'], var_name='team_playtype', value_name='cumulative_yards')
85 | melted_df[['team', 'play_type']] = melted_df['team_playtype'].str.split('_', expand=True)
86 |
87 | # Create Plotly Express figure
88 | fig = px.line(melted_df, x='week', y='cumulative_yards', color='team', facet_col='play_type',
89 | title='Cumulative Offensive Yards by Week (2024 Season)',
90 | labels={'week': 'Week', 'cumulative_yards': 'Cumulative Yards'},
91 | category_orders={'play_type': ['Rushing', 'Passing']})
92 |
93 | fig.update_layout(legend_title_text='Team')
94 | fig.update_xaxes(type='category')
95 |
96 | return fig.to_html(full_html=False, include_plotlyjs='cdn')
97 |
98 |
99 |
100 | ##################################
101 | ## 5. Homepage Route for Content Layout
102 | ##################################
103 |
104 | @rt('/')
105 | def home():
106 | posts = load_posts()
107 | chart_html = generate_offensive_yards_chart()
108 |
109 | # Create a list of article components for each post
110 | article_posts = [
111 | Article(
112 | H1(post['title'], cls='post-title'),
113 | Div(NotStr(post['content']))
114 | )
115 | for post in posts
116 | ]
117 | return Html(
118 | Head(
119 | Title('Deep Charts: NFL Yards Tracker'),
120 | Link(rel='stylesheet', href='https://cdn.jsdelivr.net/npm/@picocss/pico@latest/css/pico.min.css'),
121 | Style("""
122 | .header {
123 | text-align: center;
124 | padding: 1em;
125 | background-color: #f8f9fa;
126 | position: fixed;
127 | top: 0;
128 | width: 100%;
129 | z-index: 10;
130 | }
131 | .container {
132 | display: flex;
133 | max-width: 100%;
134 | margin-top: 80px; /* Space for the fixed header */
135 | }
136 | .posts {
137 | flex: 2;
138 | overflow-y: auto;
139 | height: calc(100vh - 80px); /* Adjust for header */
140 | padding: 1em;
141 | margin-right: 40%;
142 | box-sizing: border-box;
143 | }
144 | .chart {
145 | flex: 1;
146 | position: fixed;
147 | right: 0;
148 | top: 80px; /* Space for the fixed header */
149 | width: 40%;
150 | height: calc(100vh - 80px); /* Adjust for header */
151 | padding: 1em;
152 | box-sizing: border-box;
153 | }
154 | h1.post-title {
155 | font-size: 1.5em;
156 | font-weight: bold;
157 | }
158 | article {
159 | margin-bottom: 2em;
160 | }
161 | """)
162 | ),
163 | Body(
164 | Div(
165 | H1('Deep Charts: NFL Yards Tracker', cls='header'),
166 | Div(
167 | Div(*article_posts, cls="posts"),
168 | Div(NotStr(chart_html), cls="chart"),
169 | cls="container"
170 | )
171 | )
172 | )
173 | )
174 |
175 |
176 |
177 | ##################################
178 | ## 6. Serve the App
179 | ##################################
180 |
181 | serve()
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
--------------------------------------------------------------------------------
/scikit-ollama-tutorial.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "Source: Deep Charts Youtube Channel: https://www.youtube.com/@DeepCharts"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "# AI Sentiment Analysis with Ollama and Scikit-Ollama"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "## Import Libraries"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 1,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "import pandas as pd\n",
31 | "from finvizfinance.quote import finvizfinance\n",
32 | "from skollama.models.ollama.classification.zero_shot import ZeroShotOllamaClassifier\n",
33 | "from skollama.models.ollama.classification.few_shot import FewShotOllamaClassifier"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {},
39 | "source": [
40 | "## Pull Stock News Headline Data"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": null,
46 | "metadata": {},
47 | "outputs": [],
48 | "source": [
49 | "# Initialize the finvizfinance object for INTC\n",
50 | "stock = finvizfinance('INTC')\n",
51 | "\n",
52 | "# Fetch the latest news articles\n",
53 | "news_df = stock.ticker_news()\n",
54 | "\n",
55 | "news_df.head()"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {},
61 | "source": [
62 | "Data Wrangling (Reorder dataframe, remove headlines without company name in headline)"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": null,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "# Reorder Columns\n",
72 | "news_df = news_df[['Date','Link','Title']]\n",
73 | "\n",
74 | "# Define the keywords to filter by\n",
75 | "keywords = ['INTC', 'Intel']\n",
76 | "\n",
77 | "# Create a regex pattern by joining keywords with '|'\n",
78 | "pattern = '|'.join(keywords)\n",
79 | "\n",
80 | "# Filter the DataFrame using str.contains\n",
81 | "filtered_news_df = news_df[news_df['Title'].str.contains(pattern, case=False, na=False)]\n",
82 | "\n",
83 | "filtered_news_df.head()"
84 | ]
85 | },
86 | {
87 | "cell_type": "markdown",
88 | "metadata": {},
89 | "source": [
90 | "## Run Zero Shot Classifier"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": null,
96 | "metadata": {},
97 | "outputs": [],
98 | "source": [
99 | "# Initialize the ZeroShotOllamaClassifier\n",
100 | "clf = ZeroShotOllamaClassifier(model='llama3')\n",
101 | "\n",
102 | "# Define the candidate labels\n",
103 | "candidate_labels = ['positive', 'negative', 'neutral']\n",
104 | "\n",
105 | "# Fit the classifier (no training data needed for zero-shot)\n",
106 | "clf.fit(None, candidate_labels)\n",
107 | "\n",
108 | "# Predict the sentiment of each news title as a new colum in our DataFrame\n",
109 | "filtered_news_df['Sentiment_zero'] = clf.predict(filtered_news_df['Title'])\n"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": null,
115 | "metadata": {},
116 | "outputs": [],
117 | "source": [
118 | "filtered_news_df[['Title','Sentiment_zero']]"
119 | ]
120 | },
121 | {
122 | "cell_type": "markdown",
123 | "metadata": {},
124 | "source": [
125 | "## Train and Run Few Shot Classifier"
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {},
131 | "source": [
132 | "1. Start by randomly selecting a few training examples from the original dataset"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": null,
138 | "metadata": {},
139 | "outputs": [],
140 | "source": [
141 | "# Randomly select 6 headlines for few-shot training and add a training indicator\n",
142 | "few_shot_df = filtered_news_df.sample(n=7, random_state=1)\n",
143 | "filtered_news_df['Few Shot Training Example'] = filtered_news_df.index.isin(few_shot_df.index)\n",
144 | "\n",
145 | "# View training examples\n",
146 | "list(few_shot_df['Title'])"
147 | ]
148 | },
149 | {
150 | "cell_type": "markdown",
151 | "metadata": {},
152 | "source": [
153 | "2. Manually review each training example and give human guided label assignment"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 7,
159 | "metadata": {},
160 | "outputs": [],
161 | "source": [
162 | "# Manually assigned labels corresponding to the selected headlines\n",
163 | "# Fill in below based on above headlines\n",
164 | "user_labels = [\n",
165 | " 'neutral',\n",
166 | " 'negative',\n",
167 | " 'neutral',\n",
168 | " 'positive',\n",
169 | " 'positive',\n",
170 | " 'neutral',\n",
171 | " 'positive'\n",
172 | "]\n",
173 | "\n",
174 | "# Add the user-provided labels to the few-shot DataFrame\n",
175 | "few_shot_df['User_Sentiment'] = user_labels"
176 | ]
177 | },
178 | {
179 | "cell_type": "markdown",
180 | "metadata": {},
181 | "source": [
182 | "3. Initialize and run few shot classifier on the rest of the dataset"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": null,
188 | "metadata": {},
189 | "outputs": [],
190 | "source": [
191 | "# Initialize the FewShotOllamaClassifier\n",
192 | "few_shot_clf = FewShotOllamaClassifier(model='llama3')\n",
193 | "\n",
194 | "# Fit the classifier with user-provided examples directly from the DataFrame columns\n",
195 | "few_shot_clf.fit(few_shot_df['Title'], few_shot_df['User_Sentiment'])\n",
196 | "\n",
197 | "# Predict the sentiment of all news titles in the filtered DataFrame\n",
198 | "filtered_news_df['Sentiment_few'] = few_shot_clf.predict(filtered_news_df['Title'])\n"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": null,
204 | "metadata": {},
205 | "outputs": [],
206 | "source": [
207 | "filtered_news_df_2 = filtered_news_df[['Title','Sentiment_zero','Sentiment_few','Few Shot Training Example']]\n",
208 | "filtered_news_df_2"
209 | ]
210 | }
211 | ],
212 | "metadata": {
213 | "kernelspec": {
214 | "display_name": "general_env",
215 | "language": "python",
216 | "name": "python3"
217 | },
218 | "language_info": {
219 | "codemirror_mode": {
220 | "name": "ipython",
221 | "version": 3
222 | },
223 | "file_extension": ".py",
224 | "mimetype": "text/x-python",
225 | "name": "python",
226 | "nbconvert_exporter": "python",
227 | "pygments_lexer": "ipython3",
228 | "version": "3.12.3"
229 | }
230 | },
231 | "nbformat": 4,
232 | "nbformat_minor": 2
233 | }
234 |
--------------------------------------------------------------------------------
/stock_sentiment_agents.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Stock Sentiment Agent Workflow"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts)"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 2,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "## LIbraries\n",
24 | "from phi.agent import Agent\n",
25 | "from phi.model.openai import OpenAIChat\n",
26 | "from phi.tools.googlesearch import GoogleSearch\n",
27 | "from phi.tools.yfinance import YFinanceTools"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": null,
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "## Put Open AI API key into Python environment\n",
37 | "import os\n",
38 | "os.environ[\"OPENAI_API_KEY\"] = 'sk-xxxxxxxx'"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 3,
44 | "metadata": {},
45 | "outputs": [
46 | {
47 | "data": {
48 | "application/vnd.jupyter.widget-view+json": {
49 | "model_id": "ed0b90db992446c4a4d7a9625f5ed101",
50 | "version_major": 2,
51 | "version_minor": 0
52 | },
53 | "text/plain": [
54 | "Output()"
55 | ]
56 | },
57 | "metadata": {},
58 | "output_type": "display_data"
59 | },
60 | {
61 | "data": {
62 | "text/html": [
63 | "\n"
64 | ],
65 | "text/plain": []
66 | },
67 | "metadata": {},
68 | "output_type": "display_data"
69 | }
70 | ],
71 | "source": [
72 | "## Create Agents\n",
73 | "\n",
74 | "# Sentiment Agent\n",
75 | "sentiment_agent = Agent(\n",
76 | " name=\"Sentiment Agent\",\n",
77 | " role=\"Search and interpret news articles.\",\n",
78 | " model=OpenAIChat(id=\"gpt-4o\"),\n",
79 | " tools=[GoogleSearch()],\n",
80 | " instructions=[\n",
81 | " \"Find relevant news articles for each company and analyze the sentiment.\",\n",
82 | " \"Provide sentiment scores from 1 (negative) to 10 (positive) with reasoning and sources.\"\n",
83 | " \"Cite your sources. Be specific and provide links.\"\n",
84 | " ],\n",
85 | " show_tool_calls=True,\n",
86 | " markdown=True,\n",
87 | ")\n",
88 | "\n",
89 | "# Finance Agent\n",
90 | "finance_agent = Agent(\n",
91 | " name=\"Finance Agent\",\n",
92 | " role=\"Get financial data and interpret trends.\",\n",
93 | " model=OpenAIChat(id=\"gpt-4o\"),\n",
94 | " tools=[YFinanceTools(stock_price=True, analyst_recommendations=True, company_info=True)],\n",
95 | " instructions=[\n",
96 | " \"Retrieve stock prices, analyst recommendations, and key financial data.\",\n",
97 | " \"Focus on trends and present the data in tables with key insights.\"\n",
98 | " ],\n",
99 | " show_tool_calls=True,\n",
100 | " markdown=True,\n",
101 | ")\n",
102 | "\n",
103 | "# Analyst Agent\n",
104 | "analyst_agent = Agent(\n",
105 | " name=\"Analyst Agent\",\n",
106 | " role=\"Ensure thoroughness and draw conclusions.\",\n",
107 | " model=OpenAIChat(id=\"gpt-4o\"),\n",
108 | " instructions=[\n",
109 | " \"Check outputs for accuracy and completeness.\",\n",
110 | " \"Synthesize data to provide a final sentiment score (1-10) with justification.\"\n",
111 | " ],\n",
112 | " show_tool_calls=True,\n",
113 | " markdown=True,\n",
114 | ")\n",
115 | "\n",
116 | "# Team of Agents\n",
117 | "agent_team = Agent(\n",
118 | " model=OpenAIChat(id=\"gpt-4o\"),\n",
119 | " team=[sentiment_agent, finance_agent, analyst_agent],\n",
120 | " instructions=[\n",
121 | " \"Combine the expertise of all agents to provide a cohesive, well-supported response.\",\n",
122 | " \"Always include references and dates for all data points and sources.\",\n",
123 | " \"Present all data in structured tables for clarity.\",\n",
124 | " \"Explain the methodology used to arrive at the sentiment scores.\"\n",
125 | " ],\n",
126 | " show_tool_calls=True,\n",
127 | " markdown=True,\n",
128 | ")\n",
129 | "\n",
130 | "## Run Agent Team\n",
131 | "\n",
132 | "# Final Prompt\n",
133 | "agent_team.print_response(\n",
134 | " \"Analyze the sentiment for the following companies during the week of December 2nd-6th, 2024: NVDA, MSFT. \\n\\n\"\n",
135 | " \"1. **Sentiment Analysis**: Search for relevant news articles and interpret th–e sentiment for each company. Provide sentiment scores on a scale of 1 to 10, explain your reasoning, and cite your sources.\\n\\n\"\n",
136 | " \"2. **Financial Data**: Analyze stock price movements, analyst recommendations, and any notable financial data. Highlight key trends or events, and present the data in tables.\\n\\n\"\n",
137 | " \"3. **Consolidated Analysis**: Combine the insights from sentiment analysis and financial data to assign a final sentiment score (1-10) for each company. Justify the scores and provide a summary of the most important findings.\\n\\n\"\n",
138 | " \"Ensure your response is accurate, comprehensive, and includes references to sources with publication dates.\",\n",
139 | " stream=True\n",
140 | ")"
141 | ]
142 | }
143 | ],
144 | "metadata": {
145 | "kernelspec": {
146 | "display_name": "general_env",
147 | "language": "python",
148 | "name": "python3"
149 | },
150 | "language_info": {
151 | "codemirror_mode": {
152 | "name": "ipython",
153 | "version": 3
154 | },
155 | "file_extension": ".py",
156 | "mimetype": "text/x-python",
157 | "name": "python",
158 | "nbconvert_exporter": "python",
159 | "pygments_lexer": "ipython3",
160 | "version": "3.12.3"
161 | }
162 | },
163 | "nbformat": 4,
164 | "nbformat_minor": 2
165 | }
166 |
--------------------------------------------------------------------------------
/stocks_dashboard.py:
--------------------------------------------------------------------------------
1 | # Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts)
2 |
3 | import streamlit as st
4 | import plotly.express as px
5 | import plotly.graph_objects as go
6 | import pandas as pd
7 | import yfinance as yf
8 | from datetime import datetime, timedelta
9 | import pytz
10 | import ta
11 |
12 | ##########################################################################################
13 | ## PART 1: Define Functions for Pulling, Processing, and Creating Techincial Indicators ##
14 | ##########################################################################################
15 |
16 | # Fetch stock data based on the ticker, period, and interval
17 | def fetch_stock_data(ticker, period, interval):
18 | end_date = datetime.now()
19 | if period == '1wk':
20 | start_date = end_date - timedelta(days=7)
21 | data = yf.download(ticker, start=start_date, end=end_date, interval=interval)
22 | else:
23 | data = yf.download(ticker, period=period, interval=interval)
24 | return data
25 |
26 | # Process data to ensure it is timezone-aware and has the correct format
27 | def process_data(data):
28 | if data.index.tzinfo is None:
29 | data.index = data.index.tz_localize('UTC')
30 | data.index = data.index.tz_convert('US/Eastern')
31 | data.reset_index(inplace=True)
32 | data.rename(columns={'Date': 'Datetime'}, inplace=True)
33 | return data
34 |
35 | # Calculate basic metrics from the stock data
36 | def calculate_metrics(data):
37 | last_close = data['Close'].iloc[-1]
38 | prev_close = data['Close'].iloc[0]
39 | change = last_close - prev_close
40 | pct_change = (change / prev_close) * 100
41 | high = data['High'].max()
42 | low = data['Low'].min()
43 | volume = data['Volume'].sum()
44 | return last_close, change, pct_change, high, low, volume
45 |
46 | # Add simple moving average (SMA) and exponential moving average (EMA) indicators
47 | def add_technical_indicators(data):
48 | data['SMA_20'] = ta.trend.sma_indicator(data['Close'], window=20)
49 | data['EMA_20'] = ta.trend.ema_indicator(data['Close'], window=20)
50 | return data
51 |
52 | ###############################################
53 | ## PART 2: Creating the Dashboard App layout ##
54 | ###############################################
55 |
56 |
57 | # Set up Streamlit page layout
58 | st.set_page_config(layout="wide")
59 | st.title('Real Time Stock Dashboard')
60 |
61 |
62 | # 2A: SIDEBAR PARAMETERS ############
63 |
64 | # Sidebar for user input parameters
65 | st.sidebar.header('Chart Parameters')
66 | ticker = st.sidebar.text_input('Ticker', 'ADBE')
67 | time_period = st.sidebar.selectbox('Time Period', ['1d', '1wk', '1mo', '1y', 'max'])
68 | chart_type = st.sidebar.selectbox('Chart Type', ['Candlestick', 'Line'])
69 | indicators = st.sidebar.multiselect('Technical Indicators', ['SMA 20', 'EMA 20'])
70 |
71 | # Mapping of time periods to data intervals
72 | interval_mapping = {
73 | '1d': '1m',
74 | '1wk': '30m',
75 | '1mo': '1d',
76 | '1y': '1wk',
77 | 'max': '1wk'
78 | }
79 |
80 |
81 | # 2B: MAIN CONTENT AREA ############
82 |
83 | # Update the dashboard based on user input
84 | if st.sidebar.button('Update'):
85 | data = fetch_stock_data(ticker, time_period, interval_mapping[time_period])
86 | data = process_data(data)
87 | data = add_technical_indicators(data)
88 |
89 | last_close, change, pct_change, high, low, volume = calculate_metrics(data)
90 |
91 | # Display main metrics
92 | st.metric(label=f"{ticker} Last Price", value=f"{last_close:.2f} USD", delta=f"{change:.2f} ({pct_change:.2f}%)")
93 |
94 | col1, col2, col3 = st.columns(3)
95 | col1.metric("High", f"{high:.2f} USD")
96 | col2.metric("Low", f"{low:.2f} USD")
97 | col3.metric("Volume", f"{volume:,}")
98 |
99 | # Plot the stock price chart
100 | fig = go.Figure()
101 | if chart_type == 'Candlestick':
102 | fig.add_trace(go.Candlestick(x=data['Datetime'],
103 | open=data['Open'],
104 | high=data['High'],
105 | low=data['Low'],
106 | close=data['Close']))
107 | else:
108 | fig = px.line(data, x='Datetime', y='Close')
109 |
110 | # Add selected technical indicators to the chart
111 | for indicator in indicators:
112 | if indicator == 'SMA 20':
113 | fig.add_trace(go.Scatter(x=data['Datetime'], y=data['SMA_20'], name='SMA 20'))
114 | elif indicator == 'EMA 20':
115 | fig.add_trace(go.Scatter(x=data['Datetime'], y=data['EMA_20'], name='EMA 20'))
116 |
117 | # Format graph
118 | fig.update_layout(title=f'{ticker} {time_period.upper()} Chart',
119 | xaxis_title='Time',
120 | yaxis_title='Price (USD)',
121 | height=600)
122 | st.plotly_chart(fig, use_container_width=True)
123 |
124 | # Display historical data and technical indicators
125 | st.subheader('Historical Data')
126 | st.dataframe(data[['Datetime', 'Open', 'High', 'Low', 'Close', 'Volume']])
127 |
128 | st.subheader('Technical Indicators')
129 | st.dataframe(data[['Datetime', 'SMA_20', 'EMA_20']])
130 |
131 |
132 | # 2C: SIDEBAR PRICES ############
133 |
134 | # Sidebar section for real-time stock prices of selected symbols
135 | st.sidebar.header('Real-Time Stock Prices')
136 | stock_symbols = ['AAPL', 'GOOGL', 'AMZN', 'MSFT']
137 | for symbol in stock_symbols:
138 | real_time_data = fetch_stock_data(symbol, '1d', '1m')
139 | if not real_time_data.empty:
140 | real_time_data = process_data(real_time_data)
141 | last_price = real_time_data['Close'].iloc[-1]
142 | change = last_price - real_time_data['Open'].iloc[0]
143 | pct_change = (change / real_time_data['Open'].iloc[0]) * 100
144 | st.sidebar.metric(f"{symbol}", f"{last_price:.2f} USD", f"{change:.2f} ({pct_change:.2f}%)")
145 |
146 | # Sidebar information section
147 | st.sidebar.subheader('About')
148 | st.sidebar.info('This dashboard provides stock data and technical indicators for various time periods. Use the sidebar to customize your view.')
149 |
150 |
151 |
--------------------------------------------------------------------------------
/structured_outputs.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Structured Outputs: From Text to Tabular Data"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Source: @DeepCharts Youtube Channel (https://www.youtube.com/@DeepCharts)"
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "### Import Libraries"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 2,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "from ollama import chat\n",
31 | "from pydantic import BaseModel\n",
32 | "import pandas as pd\n",
33 | "from gnews import GNews"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {},
39 | "source": [
40 | "### Pull News Headline Data"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": null,
46 | "metadata": {},
47 | "outputs": [],
48 | "source": [
49 | "\n",
50 | "# Fetch news articles\n",
51 | "google_news = GNews()\n",
52 | "news = google_news.get_news(\"NVDA\")\n",
53 | "\n",
54 | "# Extract top 6 news titles\n",
55 | "news_titles = [article['title'] for article in news[:6]]\n",
56 | "news_titles"
57 | ]
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {},
62 | "source": [
63 | "### LLM Model and Structured Outputs"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": [
72 | "# Define BaseModel for news analysis\n",
73 | "class NewsAnalysis(BaseModel):\n",
74 | " sentiment: str \n",
75 | " future_looking: bool \n",
76 | "\n",
77 | "# Initialize an empty list to store results\n",
78 | "results = []\n",
79 | "\n",
80 | "# Loop through the news titles and analyze each\n",
81 | "for title in news_titles:\n",
82 | " response = chat(\n",
83 | " messages=[\n",
84 | " {\n",
85 | " 'role': 'user',\n",
86 | " 'content': f\"\"\"Analyze the following title for sentiment (positive, negative, or neutral) \n",
87 | " and whether it provides future-looking financial insight, predictions, or \n",
88 | " guidance on whether to buy/hold/sell the stock (True or False): {title}\n",
89 | " \"\"\",\n",
90 | " }\n",
91 | " ],\n",
92 | " model='llama3.2',\n",
93 | " format=NewsAnalysis.model_json_schema(),\n",
94 | " )\n",
95 | "\n",
96 | " # Parse the response into the NewsAnalysis model\n",
97 | " sentiment_analysis = NewsAnalysis.model_validate_json(response['message']['content'])\n",
98 | "\n",
99 | " # Append the results to the list\n",
100 | " results.append({\n",
101 | " 'title': title,\n",
102 | " 'sentiment': sentiment_analysis.sentiment,\n",
103 | " 'future_looking': sentiment_analysis.future_looking\n",
104 | " })\n",
105 | "\n",
106 | "# Convert the results to a DataFrame\n",
107 | "df = pd.DataFrame(results)\n",
108 | "df\n"
109 | ]
110 | }
111 | ],
112 | "metadata": {
113 | "kernelspec": {
114 | "display_name": "structured_output",
115 | "language": "python",
116 | "name": "python3"
117 | },
118 | "language_info": {
119 | "codemirror_mode": {
120 | "name": "ipython",
121 | "version": 3
122 | },
123 | "file_extension": ".py",
124 | "mimetype": "text/x-python",
125 | "name": "python",
126 | "nbconvert_exporter": "python",
127 | "pygments_lexer": "ipython3",
128 | "version": "3.9.21"
129 | }
130 | },
131 | "nbformat": 4,
132 | "nbformat_minor": 2
133 | }
134 |
--------------------------------------------------------------------------------