├── py
├── ai
│ ├── fininsightgpt
│ │ ├── src
│ │ │ ├── __init__.py
│ │ │ ├── requirements.txt
│ │ │ ├── master_file_generator.py
│ │ │ ├── main.py
│ │ │ └── document_processor.py
│ │ ├── company_data
│ │ │ └── Description.txt
│ │ ├── README.md
│ │ └── prompt_master
│ │ │ └── Equity_Research_Report_Template.md
│ ├── market_analyzer
│ │ ├── .env
│ │ ├── output
│ │ │ ├── Description.txt
│ │ │ └── Avanti feeds_chat_log_20250323_211534.pdf
│ │ ├── stock_chat.py
│ │ ├── requirements.txt
│ │ └── analysis_utils.py
│ ├── turnaround
│ │ ├── output
│ │ │ └── description.txt
│ │ ├── my_tools
│ │ │ ├── __init__.py
│ │ │ ├── web_fetcher.py
│ │ │ ├── markdown_report.py
│ │ │ ├── fs_reader.py
│ │ │ └── cmd_executor.py
│ │ ├── requirements.txt
│ │ ├── data
│ │ │ └── financial_data.csv
│ │ ├── main.py
│ │ └── README.md
│ ├── nse_announcements
│ │ ├── requirements.txt
│ │ └── weekly_nse_announcements_analysis.py
│ └── newsarranger
│ │ ├── requirements.txt
│ │ └── get_news_arrange.py
├── eodhd
│ ├── price_data
│ │ ├── RELIANCE_M.csv
│ │ ├── RELIANCE_W.csv
│ │ └── RELIANCE_D.csv
│ ├── pricereader.py
│ ├── ath_scan.py
│ ├── my_rsi.py
│ ├── how_many_weeks_high.py
│ ├── saucer_crs.py
│ ├── gareebman_entry_exit.py
│ ├── mip12_scanner.py
│ └── stocks.csv
├── beta
│ ├── chatgpt
│ │ ├── model.py
│ │ └── generate_report_for_company.py
│ └── concall_transcript_summarize.py
└── yf
│ ├── daily_rs_55_bo.py
│ ├── glb_scan.py
│ ├── ars_srs_scan.py
│ ├── weeklyRSIVolStopBO.py
│ ├── newHighMonthly.py
│ ├── multimonthBO.py
│ ├── box_scan.py
│ ├── green_dot.py
│ ├── trendreversal_ha.py
│ ├── supply_exhaustion_6m_scan.py
│ ├── ss_result_parser.py
│ ├── limevolume.py
│ └── stock_sector_strength.py
├── .gitignore
└── README.md
/py/ai/fininsightgpt/src/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/py/ai/market_analyzer/.env:
--------------------------------------------------------------------------------
1 | GOOGLE_API_KEY='YOUR API KEY'
2 |
--------------------------------------------------------------------------------
/py/ai/turnaround/output/description.txt:
--------------------------------------------------------------------------------
1 | Output reports will be here
2 |
--------------------------------------------------------------------------------
/py/ai/market_analyzer/output/Description.txt:
--------------------------------------------------------------------------------
1 | Your outputs are saved here
2 |
--------------------------------------------------------------------------------
/py/ai/fininsightgpt/company_data/Description.txt:
--------------------------------------------------------------------------------
1 | Create folder for each business (company) you wish to analyse and place all documents here.
--------------------------------------------------------------------------------
/py/ai/market_analyzer/output/Avanti feeds_chat_log_20250323_211534.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QueryType/bharattrader/HEAD/py/ai/market_analyzer/output/Avanti feeds_chat_log_20250323_211534.pdf
--------------------------------------------------------------------------------
/py/ai/fininsightgpt/src/requirements.txt:
--------------------------------------------------------------------------------
1 | pymupdf>=1.22.5
2 | python-docx>=0.8.11
3 | python-pptx>=0.6.21
4 | pandas>=2.0.0
5 | openpyxl>=3.1.2
6 | Pillow>=10.0.0
7 | pytesseract>=0.3.10
8 | openai>=1.3.0
9 | tiktoken>=0.5.0
10 | python-dotenv>=1.0.0
--------------------------------------------------------------------------------
/py/ai/turnaround/my_tools/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Tools submodule for turnaround.
3 |
4 | Contains all the individual tool implementations.
5 | """
6 |
7 | from .fs_reader import fs_reader
8 | from .cmd_executor import cmd_executor
9 | from .web_fetcher import search_web
10 | from .markdown_report import save_report
11 |
12 | __all__ = [
13 | "fs_reader",
14 | "cmd_executor",
15 | "search_web",
16 | "save_report"
17 | ]
18 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Environment variables
2 | py/ai/fininsightgpt/.env
3 |
4 | # Python
5 | __pycache__/
6 | *.py[cod]
7 | *$py.class
8 | *.so
9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 |
26 | # Virtual Environment
27 | venv/
28 | ENV/
29 | env/
30 |
31 | # IDE specific files
32 | .idea/
33 | .vscode/
34 | *.swp
35 | *.swo
36 |
37 | # OS specific files
38 | .DS_Store
39 | .DS_Store?
40 | ._*
41 | .Spotlight-V100
42 | .Trashes
43 | ehthumbs.db
44 | Thumbs.db
45 |
46 | # Jupyter Notebook
47 | .ipynb_checkpoints
--------------------------------------------------------------------------------
/py/eodhd/price_data/RELIANCE_M.csv:
--------------------------------------------------------------------------------
1 | Date,Open,High,Low,Close,Volume,Adj Close
2 | 1994-11-03,375.0,400.0,356.75,380.25,879250,380.25
3 | 1994-12-01,378.5,382.5,329.0,341.2,851600,341.2
4 | 1995-01-02,341.0,343.0,235.15,272.45,4167200,272.45
5 | 1995-02-01,275.0,287.0,237.25,271.75,6171750,271.75
6 | 1995-03-01,275.0,297.0,250.0,265.45,12290250,265.45
7 | 1995-04-03,267.0,288.05,246.5,249.9,6403250,249.9
8 | 1995-05-02,249.5,280.0,225.55,271.85,21273350,271.85
9 | 1995-06-01,274.95,284.0,260.4,264.0,27161750,264.0
10 | 1995-07-03,262.25,303.0,226.35,267.4,26989650,267.4
11 | 1995-08-01,266.0,280.0,256.5,261.85,32501950,261.85
12 | 1995-09-01,261.0,278.0,257.0,275.0,19358500,275.0
13 | 1995-10-04,278.0,290.0,234.25,244.0,44468050,244.0
14 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # bharattrader
2 | Utilities for trading , scanning and other things.
3 |
4 | I am not going to pretend that I am a coder wiz. I am just a normal software guy, who has written these codes for my own personal use. I know there are intelligent and smart people out there, who can not only write better code but also develop better algos. In case some people find something valuable here, you can pick up and honour the license under which the code is released.
5 |
6 | This repo is strictly as-is. Please do not come to me for changes and modifications. The source is licensed under GNU GPL, so please go ahead.
7 | Also, I am not responsible for any trading/investing/financial loss that you encounter after using these codes. Please do your own analysis.
8 |
--------------------------------------------------------------------------------
/py/ai/turnaround/requirements.txt:
--------------------------------------------------------------------------------
1 | # AI Agent Framework
2 | smolagents>=0.3.0
3 |
4 | # OpenAI API Integration
5 | openai>=1.0.0
6 |
7 | # LLM Model Support
8 | litellm>=1.0.0
9 |
10 | # Environment Variables Management
11 | python-dotenv>=1.0.0
12 |
13 | # Data Processing
14 | pandas>=2.0.0
15 | numpy>=1.24.0
16 |
17 | # HTTP Requests (for web fetching)
18 | requests>=2.31.0
19 |
20 | # JSON Processing (built-in, but some tools might need enhanced support)
21 | jsonschema>=4.17.0
22 |
23 | # File I/O and CSV Processing (built-in, but for completeness)
24 | # csv - built-in
25 | # os - built-in
26 | # datetime - built-in
27 |
28 | # Optional: For enhanced web scraping capabilities
29 | beautifulsoup4>=4.12.0
30 | selenium>=4.15.0
31 |
32 | # Optional: For better logging and debugging
33 | loguru>=0.7.0
34 |
35 | # Optional: For data validation
36 | pydantic>=2.0.0
37 |
--------------------------------------------------------------------------------
/py/ai/turnaround/data/financial_data.csv:
--------------------------------------------------------------------------------
1 | Name,BSE Code,NSE Code
2 | 63 Moons Tech.,526881,63MOONS
3 | Apex Frozen Food,540692,APEX
4 | Arman Financial,531179,ARMANFIN
5 | Ashima,514286,ASHIMASYN
6 | Bajaj Hindusthan,500032,BAJAJHIND
7 | Concord Enviro,544315,CEWATER
8 | Elpro Internatio,504000,
9 | Embassy Develop,532832,EMBDL
10 | Graphite India,509488,GRAPHITE
11 | Gujarat Alkalies,530001,GUJALKALI
12 | IFCI,500106,IFCI
13 | Meghmani Organi.,543331,MOL
14 | Munjal Auto Inds,520059,MUNJALAU
15 | Nuvoco Vistas,543334,NUVOCO
16 | PNB Gilts,532366,PNBGILTS
17 | Precision Camshf,539636,PRECAM
18 | Prince Pipes,542907,PRINCEPIPE
19 | Reliance Infra.,500390,RELINFRA
20 | RSWM Ltd,500350,RSWM
21 | S Chand & Compan,540497,SCHAND
22 | Saurashtra Cem.,502175,SAURASHCEM
23 | Snowman Logistic,538635,SNOWMAN
24 | T N Newsprint,531426,TNPL
25 | Texmaco Infrast.,505400,TEXINFRA
26 | Utkarsh Small F.,543942,UTKARSHBNK
27 | Visaka Industrie,509055,VISAKAIND
28 |
--------------------------------------------------------------------------------
/py/ai/nse_announcements/requirements.txt:
--------------------------------------------------------------------------------
1 | annotated-types==0.6.0
2 | anyio==4.2.0
3 | arxiv==2.1.0
4 | autogenstudio==0.0.25a0
5 | boto3==1.34.145
6 | botocore==1.34.145
7 | certifi==2023.11.17
8 | charset-normalizer==3.3.2
9 | click==8.1.7
10 | diskcache==5.6.3
11 | distro==1.9.0
12 | exceptiongroup==1.2.0
13 | fastapi==0.109.0
14 | feedparser==6.0.10
15 | FLAML==2.1.1
16 | h11==0.14.0
17 | httpcore==1.0.2
18 | httpx==0.26.0
19 | idna==3.6
20 | jiter==0.5.0
21 | jmespath==1.0.1
22 | numpy==1.26.3
23 | openai==1.42.0
24 | packaging==24.1
25 | pandas==2.2.2
26 | pillow==10.4.0
27 | plotly==5.22.0
28 | pyautogen==0.2.6
29 | pydantic==2.5.3
30 | pydantic_core==2.14.6
31 | PyMuPDF==1.24.9
32 | PyMuPDFb==1.24.9
33 | python-dateutil==2.9.0.post0
34 | python-dotenv==1.0.0
35 | pytz==2024.1
36 | regex==2023.12.25
37 | requests==2.31.0
38 | s3transfer==0.10.2
39 | sgmllib3k==1.0.0
40 | six==1.16.0
41 | sniffio==1.3.0
42 | starlette==0.35.1
43 | tenacity==8.5.0
44 | termcolor==2.4.0
45 | tiktoken==0.5.2
46 | tqdm==4.66.1
47 | typer==0.9.0
48 | typing_extensions==4.12.2
49 | tzdata==2024.1
50 | urllib3==2.1.0
51 | uvicorn==0.25.0
52 |
--------------------------------------------------------------------------------
/py/ai/turnaround/my_tools/web_fetcher.py:
--------------------------------------------------------------------------------
1 | from smolagents import tool
2 | import os
3 | from openai import OpenAI
4 | import json
5 |
6 | model="gpt-4.1-mini"
7 | client = OpenAI()
8 |
9 | @tool
10 | def search_web(query: str) -> str:
11 | """
12 | This tool searches the web for the given query and returns the results.
13 | It is useful for gathering information from the web to assist in decision-making or analysis.
14 | Args:
15 | query (str): The search query to use. Be as specific as possible to get relevant results.
16 | Returns:
17 | str: The search results or an error message if the search fails. It is json formatted string.
18 | """
19 | # check if the file exists on the filesystem
20 | if not query:
21 | return "No file path provided."
22 |
23 | response = client.responses.create(
24 | model=model, # or another supported model
25 | input=query,
26 | tools=[
27 | {
28 | "type": "web_search"
29 | }
30 | ]
31 | )
32 | return json.dumps(response.output, default=lambda o: o.__dict__, indent=2)
--------------------------------------------------------------------------------
/py/ai/turnaround/my_tools/markdown_report.py:
--------------------------------------------------------------------------------
1 | from smolagents import tool
2 | import datetime
3 |
4 | instructions = """You are simple file writer tool that dumps the input text into a file."""
5 |
6 | @tool
7 | def save_report(md_report: str, business_name: str) -> None:
8 | """
9 | This tool saves a markdown formatted report to a file.
10 | Args:
11 | md_report (str): The markdown report content to save.
12 | business_name (str): The name of the business for which the report is generated.
13 | Returns:
14 | None: The function does not return anything, but saves the report to a file.
15 | """
16 | # check if the file exists on the filesystem
17 | if not md_report:
18 | return "No file path provided."
19 |
20 | output_file = f"output/{business_name}" + datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + "_report.md"
21 |
22 | # Save the output to a file
23 | try:
24 | with open(output_file, "w", encoding="utf-8") as file:
25 | file.write(md_report)
26 | except Exception as e:
27 | return f"An error occurred while saving the report: {str(e)}"
--------------------------------------------------------------------------------
/py/ai/turnaround/my_tools/fs_reader.py:
--------------------------------------------------------------------------------
1 | from smolagents import tool
2 | import os
3 | from huggingface_hub import list_models
4 |
5 | @tool
6 | def fs_reader(task: str) -> str:
7 | """
8 | This tool reads a file from the filesystem and returns its content.
9 | This can read on plain text files, markdown files, source code files, etc.
10 | It is useful for reading files that are part of the project or for reading
11 | files that are provided as input to the agent.
12 | Args:
13 | task (str): The path to the file to read.
14 | Returns:
15 | str: The content of the file or an error message if the file cannot be read.
16 | """
17 | # check if the file exists on the filesystem
18 | if not task:
19 | return "No file path provided."
20 |
21 | # Expand user path (handle ~ symbol)
22 | expanded_path = os.path.expanduser(task)
23 |
24 | try:
25 | with open(expanded_path, "r", encoding="utf-8") as file:
26 | content = file.read()
27 | return content
28 | except FileNotFoundError:
29 | return f"File not found: {expanded_path} (original path: {task})"
30 | except Exception as e:
31 | return f"An error occurred while reading the file: {str(e)}"
--------------------------------------------------------------------------------
/py/eodhd/price_data/RELIANCE_W.csv:
--------------------------------------------------------------------------------
1 | Date,Open,High,Low,Close,Volume,Adj Close
2 | 1994-11-03,375.0,400.0,375.0,396.0,42650,396.0
3 | 1994-11-07,396.0,399.0,369.5,372.75,297500,372.75
4 | 1994-11-14,373.0,390.0,356.75,387.5,259000,387.5
5 | 1994-11-21,385.0,387.0,367.0,379.5,174750,379.5
6 | 1994-11-28,377.5,387.0,372.5,374.25,158850,374.25
7 | 1994-12-05,373.5,382.5,358.5,363.0,223050,363.0
8 | 1994-12-12,355.0,355.5,329.0,344.0,367400,344.0
9 | 1994-12-19,345.0,345.5,337.0,344.75,137600,344.75
10 | 1994-12-26,339.0,344.75,336.35,341.2,70050,341.2
11 | 1995-01-02,341.0,343.0,315.0,316.75,235400,316.75
12 | 1995-01-09,319.25,319.25,277.5,286.3,1330100,286.3
13 | 1995-01-16,288.0,294.0,271.0,272.5,724000,272.5
14 | 1995-01-23,274.5,274.5,235.15,256.2,1151100,256.2
15 | 1995-01-30,250.0,282.55,248.0,279.05,1962650,279.05
16 | 1995-02-06,280.0,281.0,262.0,269.85,1348800,269.85
17 | 1995-02-13,270.0,274.7,250.0,255.45,1114200,255.45
18 | 1995-02-20,255.0,282.0,237.25,266.7,1911100,266.7
19 | 1995-02-28,263.5,291.5,258.0,285.5,2423250,285.5
20 | 1995-03-06,286.0,297.0,271.0,286.45,2130300,286.45
21 | 1995-03-13,282.0,290.0,250.0,266.95,4470750,266.95
22 | 1995-03-20,255.0,267.25,250.0,259.75,2033150,259.75
23 | 1995-03-27,264.75,274.45,258.0,265.45,1794400,265.45
24 | 1995-04-03,267.0,288.05,262.0,282.75,1939300,282.75
25 | 1995-04-10,282.0,282.25,275.0,280.7,841750,280.7
26 | 1995-04-17,284.0,286.0,274.25,279.55,1512550,279.55
27 | 1995-04-24,280.0,280.2,246.5,249.9,2109650,249.9
28 |
--------------------------------------------------------------------------------
/py/eodhd/pricereader.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | base_path = 'price_data'
3 |
4 | def get_price_data(stockname, period):
5 | """
6 | Fetches stock price data from CSV files for the given stock name and period.
7 | Sets the 'Date' column as a DatetimeIndex.
8 |
9 | :param stockname: Name of the stock (str)
10 | :param period: List of periods for which to fetch data ['d', 'w', 'm']
11 | :return: Dictionary of DataFrames with keys as the period
12 | """
13 |
14 | df = pd.DataFrame()
15 |
16 | # Mapping of period to file suffix
17 | period_suffix = {'d': '_D.csv', 'w': '_W.csv', 'm': '_M.csv'}
18 |
19 |
20 | # Construct file path based on stock name and period
21 | file_path = f"{base_path}/{stockname}{period_suffix[period]}"
22 | try:
23 | # Read the data from the file and set the 'Date' column as the index
24 | df = pd.read_csv(file_path, parse_dates=['Date'])
25 | df.set_index('Date', inplace=True)
26 | except FileNotFoundError:
27 | print(f"No data available for {stockname} for period: {period}")
28 |
29 | return df
30 |
31 | '''
32 | This requires to pass df, after selection of the timeframe
33 | '''
34 | def get_price_daterange(df, start_date, end_date):
35 | # Ensure the dates are in the correct format
36 | start_date = pd.to_datetime(start_date)
37 | end_date = pd.to_datetime(end_date)
38 |
39 | # Filter the dataframe
40 | filtered_df = df[(df.index >= start_date) & (df.index <= end_date)]
41 |
42 | return filtered_df
43 |
--------------------------------------------------------------------------------
/py/ai/newsarranger/requirements.txt:
--------------------------------------------------------------------------------
1 | annotated-types==0.7.0
2 | anyio==4.8.0
3 | beautifulsoup4==4.12.3
4 | Brotli==1.1.0
5 | cachetools==5.5.0
6 | certifi==2024.12.14
7 | cffi==1.17.1
8 | charset-normalizer==3.4.1
9 | cobble==0.1.4
10 | cryptography==44.0.0
11 | cssselect2==0.7.0
12 | defusedxml==0.7.1
13 | distro==1.9.0
14 | docopt==0.6.2
15 | et_xmlfile==2.0.0
16 | fonttools==4.55.3
17 | google-auth==2.37.0
18 | google-genai==0.4.0
19 | h11==0.14.0
20 | httpcore==1.0.7
21 | httpx==0.28.1
22 | idna==3.10
23 | jiter==0.8.2
24 | lxml==5.3.0
25 | mammoth==1.9.0
26 | markdown2==2.5.2
27 | Markdown2PDF==0.1.4
28 | markdownify==0.14.1
29 | -e git+https://github.com/microsoft/markitdown.git@f58a864951da6c720d3e10987371133c67db296a#egg=markitdown
30 | md2pdf==1.0.1
31 | numpy==2.2.1
32 | olefile==0.47
33 | openai==1.59.6
34 | openpyxl==3.1.5
35 | pandas==2.2.3
36 | pathvalidate==3.2.3
37 | pdfkit==1.0.0
38 | pdfminer.six==20240706
39 | pillow==11.1.0
40 | puremagic==1.28
41 | pyasn1==0.6.1
42 | pyasn1_modules==0.4.1
43 | pycparser==2.22
44 | pydantic==2.10.5
45 | pydantic_core==2.27.2
46 | pydub==0.25.1
47 | pydyf==0.11.0
48 | pyphen==0.17.0
49 | python-dateutil==2.9.0.post0
50 | python-dotenv==1.0.1
51 | python-pptx==1.0.2
52 | pytz==2024.2
53 | requests==2.32.3
54 | rsa==4.9
55 | setuptools==75.1.0
56 | six==1.17.0
57 | sniffio==1.3.1
58 | soupsieve==2.6
59 | SpeechRecognition==3.13.0
60 | tinycss2==1.4.0
61 | tinyhtml5==2.0.0
62 | tqdm==4.67.1
63 | typing_extensions==4.12.2
64 | tzdata==2024.2
65 | urllib3==2.3.0
66 | weasyprint==63.1
67 | webencodings==0.5.1
68 | websockets==14.1
69 | wheel==0.44.0
70 | xlrd==2.0.1
71 | XlsxWriter==3.2.0
72 | youtube-transcript-api==0.6.3
73 | zopfli==0.2.3.post1
74 |
--------------------------------------------------------------------------------
/py/ai/market_analyzer/stock_chat.py:
--------------------------------------------------------------------------------
1 | from analysis_utils import initialize_client, show_parts, log_message, log_message_r, start_log_file, end_log_file
2 | from datetime import datetime
3 | import os
4 |
5 | chat_output_folder = "output"
6 | my_model = 'gemini-2.0-flash'
7 | client = initialize_client('GOOGLE_API_KEY')
8 |
9 | def main():
10 | search_tool = {'google_search': {}}
11 | stock_chat = client.chats.create(model=my_model, config={'tools': [search_tool]})
12 |
13 | while True:
14 | stock = input('Enter stock or company to chat on (or type bye to leave): ')
15 | if stock == 'bye':
16 | break
17 |
18 | timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
19 | log_file = f"{chat_output_folder}/{stock}_chat_log_{timestamp}.html"
20 | start_log_file(log_file)
21 | log_message(log_file, f"User selected stock/company: {stock}", "info")
22 |
23 | date_now = datetime.now().strftime('%Y-%m-%d')
24 | stock_prompt_prefix = f'Date today is: {date_now}. Answer following in context of the company/stock_code {stock}, listed in India.\n'
25 |
26 | while True:
27 | input_txt = input('Ask >> : ')
28 | if input_txt == 'exit':
29 | break
30 | log_message(log_file, f"User input: {input_txt}", "user")
31 | print("-" * 80)
32 | response = stock_chat.send_message(f"{stock_prompt_prefix}{input_txt}")
33 | show_parts(response)
34 | log_message_r(log_file, response, "model")
35 | print(f'Working on: {stock}\n Type exit to work on new stock/company.')
36 |
37 | end_log_file(log_file)
38 |
39 | if __name__ == "__main__":
40 | main()
41 |
--------------------------------------------------------------------------------
/py/beta/chatgpt/model.py:
--------------------------------------------------------------------------------
1 | import openai
2 | import os
3 | from dotenv import load_dotenv, find_dotenv
4 |
5 | model_name = 'gpt-4' #gpt-3.5-turbo
6 |
7 | def get_completion(prompt, model=model_name):
8 | messages = [{"role": "user", "content": prompt}]
9 | response = openai.ChatCompletion.create(
10 | model=model,
11 | messages=messages,
12 | temperature=0, # this is the degree of randomness of the model's output
13 | )
14 | return response.choices[0].message["content"]
15 |
16 | def get_completion_large(messages,
17 | model=model_name,
18 | temperature=0,
19 | max_tokens=1000):
20 | continuation_token = None
21 |
22 | while True:
23 | response = openai.Completion.create(
24 | model=model,
25 | messages=messages,
26 | temperature=temperature,
27 | max_tokens=max_tokens,
28 | continuation_token=continuation_token
29 | )
30 |
31 | chunk = response.choices[0].message['content']
32 | messages.append({'role': 'system', 'content': chunk})
33 |
34 | continuation_token = response['choices'][0]['finish_reason']
35 |
36 | if continuation_token == 'stop':
37 | break
38 |
39 | return response.choices[0].message["content"]
40 |
41 |
42 | def get_completion_from_messages(messages,
43 | model=model_name,
44 | temperature=0,
45 | max_tokens=500):
46 | response = openai.ChatCompletion.create(
47 | model=model,
48 | messages=messages,
49 | temperature=temperature,
50 | max_tokens=max_tokens,
51 | )
52 | return response.choices[0].message["content"]
53 |
54 | def set_api():
55 | _ = load_dotenv(find_dotenv()) # read local .env file
56 | openai.api_key = os.environ['OPENAI_API_KEY']
57 |
--------------------------------------------------------------------------------
/py/eodhd/ath_scan.py:
--------------------------------------------------------------------------------
1 | import pricereader as pr
2 | import pandas as pd
3 | import time
4 |
5 | # Read the list of stocks from the CSV file
6 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
7 |
8 | # Set the bar time frame
9 | data_interval = 'm'
10 |
11 | # Initialize a list to store the results
12 | results = []
13 |
14 | # Iterate through the list of stocks
15 | for stock in stocks["Ticker"]:
16 | try:
17 | # Get the stock data
18 | data = pr.get_price_data(stock, data_interval)
19 | # Drop those with NaN
20 | data = data.dropna()
21 | # Drop last row, if 2nd last is already of the month
22 | if data.index[-1].month == data.index[-2].month:
23 | # Replace the values in the second-to-last row with the values in the last row
24 | data.loc[data.index[-2]] = data.loc[data.index[-1]]
25 | # Delete the last row
26 | data = data.drop(data.index[-1])
27 |
28 | # print(data)
29 | # data = data.iloc[:-1 , :] // If previous month ATH stocks are desired
30 |
31 | # Initialize the ATH to the first close price and the ATH date to the first date
32 | ath = data.at[data.index[0], 'High']
33 | ath_date = data.index[0]
34 |
35 | data_iter = data.iloc[:-1]
36 |
37 | # Loop through each row of the dataframe
38 | for index, row in data_iter.iterrows():
39 | # Update the ATH and ATH date if the current close price is higher
40 | if row['High'] > ath:
41 | ath = row['High']
42 | ath_date = index
43 |
44 | # print(stock + " green line: " + str(green_line) + " green line date: " + str(green_line_date))
45 | last_close = data.at[data.index[-1], 'Close']
46 |
47 | if last_close > ath:
48 | # print(stock +" close: " + str(last_close) + " ath: " + str(ath) + " ath date: " + str(ath_date))
49 | results.append(stock)
50 |
51 | except Exception as e:
52 | print("Error for ticker: " + stock)
53 | print(e)
54 |
55 | # Print the results
56 | print(results)
57 | print("Done")
--------------------------------------------------------------------------------
/py/yf/daily_rs_55_bo.py:
--------------------------------------------------------------------------------
1 | '''
2 | Detect breakout of CRS from 55 day average
3 | Daily timeframe
4 | '''
5 |
6 | import yfinance as yf
7 | import pandas as pd
8 |
9 | # Set the bar time frame
10 | data_interval = '1d'
11 |
12 | # Set the time frame to max
13 | time_frame = '1y'
14 |
15 | # Set CRS average length
16 | average_length = 55
17 |
18 | # Specify the benchmark symbol
19 | benchmark = "^NSEI"
20 |
21 | # Read the list of stocks from the CSV file
22 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
23 |
24 | def main():
25 | print('Started')
26 |
27 | # Use yfinance to retrieve the benchmark data
28 | benchmark_ticker = yf.Ticker(benchmark)
29 | benchmark_data = benchmark_ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
30 | benchmark_data = benchmark_data.dropna()
31 |
32 | # Iterate through the list of stocks
33 | for stock in stocks["Ticker"]:
34 | try:
35 | ticker = yf.Ticker(stock+".NS")
36 | stock_history = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
37 | stock_history = stock_history.dropna()
38 |
39 | # Create a new column in the stock dataframe for relative strength
40 | rs_column = 'Relative_Strength'
41 | stock_history[rs_column] = stock_history['Close'] / benchmark_data['Close']
42 |
43 | # Calculate the average_length-day moving average of the 'Relative_Strength' column
44 | crs_average_column = f'{average_length}_RS_MA'
45 | stock_history[crs_average_column] = stock_history[rs_column].rolling(window=average_length).mean()
46 |
47 | # Check if there is a cross over of crs
48 | isCrossOver = stock_history.iloc[-2][rs_column] <= stock_history.iloc[-2][crs_average_column] and \
49 | stock_history.iloc[-1][rs_column] > stock_history.iloc[-1][crs_average_column]
50 | if (isCrossOver):
51 | print(stock)
52 |
53 | except Exception as e:
54 | print(f"Error: {stock} ==> {e}")
55 |
56 | if __name__ == "__main__":
57 | main()
58 |
--------------------------------------------------------------------------------
/py/beta/concall_transcript_summarize.py:
--------------------------------------------------------------------------------
1 | import os
2 | from PyPDF2 import PdfReader
3 | from transformers import PegasusTokenizer, PegasusForConditionalGeneration
4 |
5 | #path of the folder where your pdfs are located
6 | folder_path = "concallpdfs"
7 |
8 | # Max token size
9 | max_seq_length = 512
10 |
11 | # Max token for pegasus financial summarization
12 | max_length_pegasus_fin_summ = 32
13 |
14 | # Pick model
15 | # model_name = "google/pegasus-xsum" -- used for testing
16 | model_name = "human-centered-summarization/financial-summarization-pegasus"
17 |
18 | # Load pretrained tokenizer
19 | pegasus_tokenizer = PegasusTokenizer.from_pretrained(model_name)
20 |
21 | # Make model from pre-trained model
22 | model = PegasusForConditionalGeneration.from_pretrained(model_name)
23 |
24 | for filename in os.listdir(folder_path):
25 | if filename.endswith(".pdf"):
26 | pdf_path = os.path.join(folder_path, filename)
27 | with open(pdf_path, "rb") as file:
28 | print(f'Summarizing {filename}')
29 | reader = PdfReader(file)
30 | page_summaries = []
31 | count = 0
32 | for page in reader.pages: # summarize page by page
33 | page_text = page.extract_text()
34 | # Generate input tokens
35 | input_ids = pegasus_tokenizer(page_text, max_length=max_seq_length, truncation=True, return_tensors="pt").input_ids
36 | # Generate Summary
37 | summary_ids = model.generate(input_ids, max_length=max_length_pegasus_fin_summ, num_beams=5, early_stopping=True)
38 | tgt_texts = pegasus_tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
39 | page_summaries.append(tgt_texts[0])
40 | count = count + 1
41 | # print(f'{count} page(s) done')
42 | # Merge all page summaries
43 | merged_summary = "\n".join(page_summaries)
44 | # Write the merged summary to a file
45 | with open(f'{folder_path}/{filename}_summary.txt', 'w') as f:
46 | f.write(merged_summary)
47 | print(f'{filename} done')
48 |
--------------------------------------------------------------------------------
/py/ai/market_analyzer/requirements.txt:
--------------------------------------------------------------------------------
1 | aiofiles==23.2.1
2 | annotated-types==0.6.0
3 | anthropic==0.34.2
4 | anyio==4.3.0
5 | attrs==24.2.0
6 | black==24.10.0
7 | cachetools==5.3.2
8 | cattrs==23.2.3
9 | certifi==2023.11.17
10 | charset-normalizer==3.3.2
11 | click==8.1.7
12 | colorama==0.4.6
13 | dill==0.3.9
14 | distro==1.9.0
15 | dnspython==2.7.0
16 | ell-ai==0.0.14
17 | email_validator==2.2.0
18 | exceptiongroup==1.2.0
19 | fastapi==0.115.6
20 | fastapi-cli==0.0.5
21 | ffmpy==0.5.0
22 | filelock==3.16.1
23 | fsspec==2024.10.0
24 | google-ai-generativelanguage==0.6.10
25 | google-api-core==2.15.0
26 | google-api-python-client==2.125.0
27 | google-auth==2.25.2
28 | google-auth-httplib2==0.2.0
29 | google-genai==0.1.0
30 | google-generativeai==0.8.3
31 | googleapis-common-protos==1.62.0
32 | gradio==5.9.1
33 | gradio_client==1.5.2
34 | groq==0.11.0
35 | grpcio==1.60.0
36 | grpcio-status==1.60.0
37 | h11==0.14.0
38 | httpcore==1.0.5
39 | httplib2==0.22.0
40 | httptools==0.6.4
41 | httpx==0.27.0
42 | huggingface-hub==0.26.2
43 | idna==3.6
44 | Jinja2==3.1.4
45 | jiter==0.7.0
46 | markdown-it-py==3.0.0
47 | markdown2==2.5.2
48 | MarkupSafe==2.1.5
49 | mdurl==0.1.2
50 | mypy-extensions==1.0.0
51 | numpy==2.1.3
52 | openai==1.54.3
53 | orjson==3.10.12
54 | packaging==24.2
55 | pandas==2.2.3
56 | pathspec==0.12.1
57 | pillow==10.4.0
58 | platformdirs==4.3.6
59 | proto-plus==1.23.0
60 | protobuf==4.25.1
61 | psutil==5.9.8
62 | pyasn1==0.5.1
63 | pyasn1-modules==0.3.0
64 | pydantic==2.7.0
65 | pydantic_core==2.18.1
66 | pydub==0.25.1
67 | Pygments==2.18.0
68 | pyparsing==3.1.2
69 | python-dateutil==2.9.0.post0
70 | python-dotenv==1.0.0
71 | python-multipart==0.0.20
72 | pytz==2024.2
73 | PyYAML==6.0.2
74 | requests==2.32.3
75 | rich==13.9.4
76 | rsa==4.9
77 | ruff==0.8.4
78 | safehttpx==0.1.6
79 | semantic-version==2.10.0
80 | shellingham==1.5.4
81 | six==1.17.0
82 | sniffio==1.3.1
83 | SQLAlchemy==2.0.36
84 | sqlmodel==0.0.21
85 | starlette==0.41.3
86 | tokenizers==0.20.3
87 | tomli==2.0.2
88 | tomlkit==0.13.2
89 | tqdm==4.66.1
90 | typer==0.13.0
91 | typing_extensions==4.12.2
92 | tzdata==2024.2
93 | uritemplate==4.1.1
94 | urllib3==2.1.0
95 | uvicorn==0.30.6
96 | uvloop==0.21.0
97 | watchfiles==0.24.0
98 | websockets==14.0
99 |
--------------------------------------------------------------------------------
/py/ai/market_analyzer/analysis_utils.py:
--------------------------------------------------------------------------------
1 | # analysis_utils.py
2 |
3 | import os
4 | import json
5 | from datetime import datetime
6 | from rich.console import Console
7 | from rich.markdown import Markdown
8 | import markdown2
9 | from dotenv import load_dotenv, find_dotenv
10 | from google import genai
11 |
12 | console = Console()
13 |
14 | def initialize_client(api_key_env_var):
15 | load_dotenv(find_dotenv())
16 | api_key = os.getenv(api_key_env_var)
17 | if not api_key:
18 | raise ValueError(f"API key not found in environment variable {api_key_env_var}")
19 | return genai.Client(api_key=api_key)
20 |
21 | def show_json(obj):
22 | print(json.dumps(obj.model_dump(exclude_none=True), indent=2))
23 |
24 | def show_parts(response):
25 | parts = response.candidates[0].content.parts
26 | if parts is None:
27 | print(f'finish_reason={response.candidates[0].finish_reason}')
28 | return
29 | for part in parts:
30 | if part.text:
31 | console.print(Markdown(part.text, hyperlinks=True))
32 | grounding_metadata = response.candidates[0].grounding_metadata
33 | if grounding_metadata and grounding_metadata.search_entry_point:
34 | console.print(grounding_metadata.search_entry_point.rendered_content)
35 |
36 | def log_message(log_file, message, message_type="info"):
37 | timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
38 | log_entry = f"
{timestamp} - {message}
\n"
39 | with open(log_file, 'a', encoding='utf-8') as file:
40 | file.write(log_entry)
41 |
42 | def log_message_r(log_file, response, message_type="model"):
43 | parts = response.candidates[0].content.parts
44 | log_message_content = "Response: "
45 | if parts is None:
46 | log_message_content += f"\n{response.candidates[0].finish_reason}"
47 | else:
48 | log_message_content += "".join(part.text for part in parts if part.text)
49 | log_message_content = markdown2.markdown(log_message_content)
50 | grounding_metadata = response.candidates[0].grounding_metadata
51 | if grounding_metadata and grounding_metadata.search_entry_point:
52 | log_message_content += grounding_metadata.search_entry_point.rendered_content
53 | log_message(log_file, log_message_content, message_type)
54 |
55 | def start_log_file(log_file):
56 | with open(log_file, 'w') as file:
57 | file.write("\n")
58 |
59 | def end_log_file(log_file):
60 | with open(log_file, 'a') as file:
61 | file.write("")
62 |
--------------------------------------------------------------------------------
/py/ai/newsarranger/get_news_arrange.py:
--------------------------------------------------------------------------------
1 | import os
2 | import requests
3 | from markitdown import MarkItDown
4 | from dotenv import load_dotenv, find_dotenv
5 | from google import genai
6 | import datetime
7 |
8 | """
9 | Retrieve the news content from a location https://example.xxxstockxxxnews.com
10 | Then convert it to markdown format using markitdown library.
11 | Then pass on the markdown content to Google Gemini API to arrange and group the news feed provided based on the order of importance for an investor in the markets.
12 | """
13 |
14 | news_url = 'https://example.xxxstockxxxnews.com'
15 |
16 | # Initialize the client, using Google Gemini API key
17 | def initialize_client(api_key_env_var):
18 | load_dotenv(find_dotenv())
19 | api_key = os.getenv(api_key_env_var)
20 | if not api_key:
21 | raise ValueError(f"API key not found in environment variable {api_key_env_var}")
22 | return genai.Client(api_key=api_key)
23 |
24 | my_model = 'gemini-2.0-flash'
25 | client = initialize_client('GOOGLE_API_KEY')
26 |
27 | # Main function
28 | if __name__ == '__main__':
29 |
30 | # Current time is, dd-mm-YYYY HH:MM:SS
31 | timenow = datetime.datetime.now().strftime("%d-%m-%Y %H:%M:%S")
32 |
33 | # get the news file
34 | print(f"Start getting the news file at {timenow}...")
35 | response = requests.get(news_url)
36 | html_content = response.text
37 | # Save it to a file
38 | with open('output.html', 'w') as file:
39 | file.write(html_content)
40 |
41 | print(f"Start converting the news file to markdown format at {timenow}...")
42 | md = MarkItDown()
43 | result = md.convert("output.html")
44 | #print(result.text_content)
45 | # Save the markdown content to a file
46 | with open('output.md', 'w') as file:
47 | file.write(result.text_content)
48 |
49 | print(f"Start arranging the news file at {timenow}...")
50 | analyzer = client.chats.create(model=my_model)
51 | response = analyzer.send_message(f"Arrange and group the news feed provided based on the order of importance for an investor in the markets. Include whatever data related to the news is available in the input, such as short summaries, hyperlinks etc. If available include time of report of the news. The time now is: {timenow}. The input is in markdown. Input: {result.text_content}")
52 | output = ""
53 | parts = response.candidates[0].content.parts
54 | if parts is None:
55 | print(f'finish_reason={response.candidates[0].finish_reason}')
56 | for part in parts:
57 | if part.text:
58 | #print(part.text)
59 | # join the text parts
60 | output += part.text
61 |
62 | # Save the output to a file
63 | with open('output_arranged.md', 'w') as file:
64 | file.write(output)
65 |
--------------------------------------------------------------------------------
/py/yf/glb_scan.py:
--------------------------------------------------------------------------------
1 | import yfinance as yf
2 | import pandas as pd
3 | import time
4 |
5 | # Read the list of stocks from the CSV file
6 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
7 | # Exchange, ".BO, .NS"
8 | exchange = ".NS"
9 |
10 | # Set the time frame to max
11 | time_frame = 'max'
12 |
13 | # Set the bar time frame
14 | data_interval = '1mo'
15 |
16 | # Set the green line to the all-time high of the stock
17 | green_line = 0.0
18 |
19 | # Set the minimum number of months since the ath/green line was breached
20 | min_months = 2
21 |
22 | # Initialize a list to store the results
23 | results = []
24 |
25 | # Iterate through the list of stocks
26 | for stock in stocks["Ticker"]:
27 | try:
28 | # Get the stock data from yfinance, dont adjust OHLC
29 | ticker = yf.Ticker(f'{stock}{exchange}')
30 | data = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
31 | # Drop those with NaN
32 | data = data.dropna()
33 | # Drop last row, if 2nd last is already of the month
34 | if data.index[-1].month == data.index[-2].month:
35 | # Replace the values in the second-to-last row with the values in the last row
36 | data.loc[data.index[-2]] = data.loc[data.index[-1]]
37 | # Delete the last row
38 | data = data.drop(data.index[-1])
39 |
40 | # print(data)
41 |
42 | # Initialize the ATH to the first close price and the ATH date to the first date
43 | ath = data.at[data.index[0], 'High']
44 | ath_date = data.index[0]
45 | green_line = ath
46 | green_line_date = ath_date
47 |
48 | # Loop through each row of the dataframe
49 | for index, row in data.iterrows():
50 | # Update the ATH and ATH date if the current close price is higher
51 | if row['High'] > ath:
52 | ath = row['High']
53 | ath_date = index
54 | # Update Greenline if condition of minimum months is met
55 | if data.index.get_loc(index) - data.index.get_loc(ath_date) >= min_months:
56 | green_line = ath
57 | green_line_date = ath_date
58 |
59 | # print(stock + " green line: " + str(green_line) + " green line date: " + str(green_line_date))
60 | last_close = data.at[data.index[-1], 'Close']
61 | second_last_close = data.at[data.index[-2], 'Close']
62 | if second_last_close < green_line and last_close > green_line:
63 | # print(stock +" close: " + str(last_close) + " second last close: " + str(second_last_close) + " green line: " + str(green_line) + " green line date: " + str(green_line_date))
64 | results.append(stock)
65 |
66 | except Exception as e:
67 | print("Error for ticker: " + stock)
68 | print(e)
69 |
70 | # Print the results
71 | print(results)
72 | ex = 'NSE' if exchange == '.NS' else 'BSE'
73 | for stk in results:
74 | print(f'{ex}:{stk},')
75 | print("Done")
76 |
--------------------------------------------------------------------------------
/py/eodhd/price_data/RELIANCE_D.csv:
--------------------------------------------------------------------------------
1 | Date,Open,High,Low,Close,Volume,Adj Close
2 | 1994-11-03,375.0,400.0,375.0,396.0,42650,396.0
3 | 1994-11-07,396.0,398.25,393.0,395.5,58700,395.5
4 | 1994-11-08,398.0,398.75,393.0,396.0,49050,396.0
5 | 1994-11-09,399.0,399.0,385.75,387.0,57500,387.0
6 | 1994-11-10,387.0,390.0,380.0,380.5,67250,380.5
7 | 1994-11-11,371.0,378.5,369.5,372.75,65000,372.75
8 | 1994-11-14,373.0,373.0,358.0,361.5,30450,361.5
9 | 1994-11-15,360.0,364.0,356.75,361.25,66900,361.25
10 | 1994-11-16,364.25,377.0,363.0,373.75,72800,373.75
11 | 1994-11-17,377.0,390.0,373.75,387.5,88850,387.5
12 | 1994-11-21,385.0,387.0,377.5,378.25,30350,378.25
13 | 1994-11-22,381.0,381.25,370.0,371.0,27700,371.0
14 | 1994-11-23,375.0,375.0,367.0,367.25,25300,367.25
15 | 1994-11-24,370.0,379.0,369.0,378.0,54500,378.0
16 | 1994-11-25,381.0,384.0,378.0,379.5,36900,379.5
17 | 1994-11-28,377.5,385.0,374.0,383.0,6000,383.0
18 | 1994-11-29,385.0,387.0,380.0,380.5,77400,380.5
19 | 1994-11-30,382.0,382.0,379.0,380.25,21950,380.25
20 | 1994-12-01,378.5,381.75,378.0,379.0,42800,379.0
21 | 1994-12-02,376.75,377.0,372.5,374.25,10700,374.25
22 | 1994-12-05,373.5,382.5,369.5,380.25,55800,380.25
23 | 1994-12-06,378.0,380.0,371.0,372.5,47800,372.5
24 | 1994-12-07,373.0,373.0,364.0,366.25,40950,366.25
25 | 1994-12-08,363.5,363.5,359.0,361.0,29900,361.0
26 | 1994-12-09,360.0,365.0,358.5,363.0,48600,363.0
27 | 1994-12-12,355.0,355.5,344.0,347.0,54750,347.0
28 | 1994-12-13,345.0,346.0,329.0,332.75,55800,332.75
29 | 1994-12-14,333.0,342.5,332.25,336.5,107100,336.5
30 | 1994-12-15,336.0,347.0,336.0,344.75,84000,344.75
31 | 1994-12-16,345.0,350.0,342.75,344.0,65750,344.0
32 | 1994-12-19,345.0,345.0,338.0,338.5,26200,338.5
33 | 1994-12-20,342.0,342.0,337.5,339.5,26450,339.5
34 | 1994-12-21,337.0,344.0,337.0,343.0,34100,343.0
35 | 1994-12-22,342.0,342.0,339.0,340.0,15700,340.0
36 | 1994-12-23,339.0,345.5,338.5,344.75,35150,344.75
37 | 1994-12-26,339.0,344.75,338.55,339.0,37050,339.0
38 | 1994-12-27,340.0,340.0,336.35,337.75,7450,337.75
39 | 1994-12-28,340.5,340.5,339.25,339.25,7650,339.25
40 | 1994-12-29,339.5,339.75,338.75,339.2,3150,339.2
41 | 1994-12-30,339.0,341.5,339.0,341.2,14750,341.2
42 | 1995-01-02,341.0,343.0,340.5,341.2,13600,341.2
43 | 1995-01-03,342.5,342.5,336.1,336.2,11450,336.2
44 | 1995-01-04,339.95,340.5,330.3,332.0,62600,332.0
45 | 1995-01-05,332.0,333.0,320.25,321.3,59200,321.3
46 | 1995-01-06,323.0,323.5,315.0,316.75,88550,316.75
47 | 1995-01-09,319.25,319.25,299.0,302.35,264800,302.35
48 | 1995-01-10,303.0,303.0,283.0,287.35,351750,287.35
49 | 1995-01-11,290.0,300.0,280.0,295.25,168000,295.25
50 | 1995-01-12,294.0,297.5,277.5,278.25,375850,278.25
51 | 1995-01-13,280.0,288.5,279.0,286.3,169700,286.3
52 | 1995-01-16,288.0,294.0,282.0,284.1,149800,284.1
53 | 1995-01-17,285.0,286.0,275.05,277.3,255950,277.3
54 | 1995-01-18,279.25,284.7,278.0,283.35,88500,283.35
55 | 1995-01-19,282.0,287.5,282.0,283.95,64350,283.95
56 | 1995-01-20,284.0,285.45,271.0,272.5,165400,272.5
57 | 1995-01-23,274.5,274.5,250.3,251.7,175050,251.7
58 | 1995-01-24,250.0,251.75,235.15,246.4,287400,246.4
59 | 1995-01-25,245.0,250.0,238.0,242.5,372400,242.5
60 | 1995-01-27,245.0,261.0,242.0,256.2,316250,256.2
61 | 1995-01-30,250.0,269.0,248.0,267.95,425600,267.95
62 | 1995-01-31,265.0,276.65,264.0,272.45,301000,272.45
63 |
--------------------------------------------------------------------------------
/py/yf/ars_srs_scan.py:
--------------------------------------------------------------------------------
1 | import yfinance as yf
2 | import pandas as pd
3 | import time
4 | import datetime
5 |
6 | def cleanUp_data(data):
7 | # Drop those with NaN
8 | data = data.dropna()
9 | return data
10 |
11 | # set the file name of stocks
12 | stock_filename = "stocks.csv"
13 |
14 | # Set the time frame to max
15 | time_frame = '2y'
16 |
17 | # Set the bar time frame
18 | data_interval = '1d'
19 |
20 | # Specify the benchmark symbol
21 | benchmark = "^NSEI"
22 |
23 |
24 | # Specify the reference date
25 | reference_date = "2022-06-03"
26 |
27 | # Specify the number of rows to look back for the Static RS calculation
28 | srs_length = 123
29 |
30 | # Read the list of stocks from the CSV file
31 | stocks = pd.read_csv(stock_filename, header=0, usecols=["Ticker"])
32 |
33 | # Use yfinance to retrieve the benchmark data
34 | benchmark_ticker = yf.Ticker(benchmark)
35 | benchmark_data = benchmark_ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
36 | benchmark_data = cleanUp_data(benchmark_data)
37 |
38 | # Create an empty list to store the stock data
39 | stock_data_list = []
40 |
41 | # Iterate through the list of stocks
42 | for stock in stocks["Ticker"]:
43 | try:
44 | ticker = yf.Ticker(stock+".NS")
45 |
46 | # Use yfinance to retrieve the stock data
47 | stock_data = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
48 | stock_data = cleanUp_data(stock_data)
49 |
50 | # Calculate the Adaptive relative strength (ARS) using the formula you provided
51 | stock_data["Adaptive RS"] = (stock_data["Close"] / stock_data.loc[stock_data.index == reference_date, "Close"].values[0]) / (benchmark_data["Close"] / benchmark_data.loc[benchmark_data.index == reference_date, "Close"].values[0]) - 1
52 |
53 | # Calculate the Static relative strength (SRS) using the formula you provided and the specified number of rows to look back
54 | stock_close_123 = stock_data.at[stock_data.index[-123], 'Close']
55 | benchmark_close_123 = benchmark_data.at[benchmark_data.index[-123], 'Close']
56 | stock_data["Static RS"] = (stock_data["Close"] /stock_close_123) / (benchmark_data["Close"] / benchmark_close_123) - 1
57 |
58 | # Get the last row of the stock data
59 | last_row = stock_data.tail(1)
60 |
61 | # Extract the ARS and SRS values from the last row
62 | ars = round(last_row["Adaptive RS"].values[0], 2)
63 | srs = round(last_row["Static RS"].values[0], 2)
64 |
65 | # Create a dictionary with the stock name, ARS, and SRS values
66 | stock_data_dict = {"Stock": stock, "Adaptive RS": ars, "Static RS": srs}
67 |
68 | # Add the dictionary to the list
69 | stock_data_list.append(stock_data_dict)
70 | except Exception as e:
71 | print("Error " + stock)
72 | print(e)
73 |
74 | # print(stock_data_list)
75 |
76 | # Get the current timestamp
77 | timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
78 |
79 | # Construct the file name using the timestamp
80 | filename = "rs_stock_data_" + timestamp + ".csv"
81 |
82 | # Convert the list of dictionaries to a dataframe
83 | stock_data_df = pd.DataFrame(stock_data_list)
84 |
85 | # Write the dataframe to the CSV file
86 | stock_data_df.to_csv(filename, index=False)
87 |
--------------------------------------------------------------------------------
/py/eodhd/my_rsi.py:
--------------------------------------------------------------------------------
1 | """
2 | This script calculates the Combined Relative Strength Index (RSI) for a list of stocks.
3 | The Combined RSI is a technical indicator used in the analysis of financial markets.
4 | It is intended to chart the current and historical strength or weakness of a stock or market based on the closing
5 | prices of a recent trading period. The Combined RSI is calculated by combining the traditional RSI with the volume.
6 | """
7 |
8 | import pricereader as pr
9 | import pandas as pd
10 | import numpy as np
11 | import datetime
12 |
13 | # Set output folder path
14 | output_path = "output"
15 |
16 | # Read the list of stocks from the CSV file
17 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
18 |
19 | def calculate_combined_rsi(df, period=14):
20 | """
21 | Calculate the Combined Relative Strength Index (RSI) for a given DataFrame.
22 |
23 | Parameters:
24 | - df (pandas.DataFrame): DataFrame containing the stock data.
25 | - period (int): Number of periods to consider for calculating the RSI. Default is 14.
26 |
27 | Returns:
28 | - combined_rsi (pandas.Series): Series containing the Combined RSI values.
29 | """
30 | # Calculate daily price change
31 | df['Price Change'] = df['Close'].diff()
32 |
33 | # Calculate volume ratio and volatility
34 | avg_volume = df['Volume'].rolling(window=period).mean()
35 | df['Volume Ratio'] = df['Volume'] / avg_volume
36 | volatility = df['Price Change'].rolling(window=period).std()
37 |
38 | # Combine volume and volatility adjustments
39 | df['Combined Gain'] = np.where(df['Price Change'] > 0, (df['Price Change'] * df['Volume Ratio']) / volatility, 0)
40 | df['Combined Loss'] = np.where(df['Price Change'] < 0, -(df['Price Change'] * df['Volume Ratio']) / volatility, 0)
41 |
42 | # Compute average combined gain and loss
43 | avg_combined_gain = df['Combined Gain'].rolling(window=period).mean()
44 | avg_combined_loss = df['Combined Loss'].rolling(window=period).mean()
45 |
46 | # Calculate Combined RS and RSI
47 | combined_rs = avg_combined_gain / avg_combined_loss
48 | combined_rsi = 100 - (100 / (1 + combined_rs))
49 |
50 | return combined_rsi
51 |
52 |
53 | def main():
54 | """
55 | Main function that calculates the Combined RSI for a list of stocks and saves the results to a CSV file.
56 | """
57 | print("Started...")
58 | # Create the DataFrame
59 | result_df = pd.DataFrame(columns=['stock', 'my_rsi'])
60 | # Iterate through the list of stocks
61 | for stock in stocks["Ticker"]:
62 | try:
63 | # Get the daily stock data
64 | stock_data = pr.get_price_data(stock, 'd')
65 | # Drop those with NaN
66 | stock_data = stock_data.dropna()
67 |
68 | # Calculate combined RSI
69 | stock_data['Combined_RSI'] = calculate_combined_rsi(stock_data)
70 | # print(stock_data.tail())
71 | last_row_idx = stock_data.index[-1]
72 | row = {'stock': stock, 'my_rsi': str(round(stock_data.loc[last_row_idx, 'Combined_RSI'], 2))}
73 | # Append the new row to the DataFrame
74 | result_df.loc[len(result_df)] = row
75 |
76 | except Exception as e:
77 | print("Error: " + stock)
78 | print(e)
79 |
80 | # Append current timestamp to the file name
81 | now = datetime.datetime.now()
82 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
83 | file_name = 'my_rsi_' + timestamp + '.csv'
84 | # Export the DataFrame to CSV
85 | result_df.to_csv(output_path + "/" + file_name, index=False)
86 | print(f'Saved file {file_name}')
87 |
88 |
89 | if __name__ == "__main__":
90 | main()
91 |
--------------------------------------------------------------------------------
/py/ai/turnaround/my_tools/cmd_executor.py:
--------------------------------------------------------------------------------
1 | from smolagents import tool
2 | import subprocess
3 | import shlex
4 |
5 | @tool
6 | def cmd_executor(command: str, confirmed: bool = False) -> str:
7 | """
8 | This tool executes readonly shell commands in a Linux/macOS environment.
9 | It is restricted to safe, readonly commands that do not modify the filesystem
10 | or system state. Useful for exploring directory structures, searching files,
11 | and gathering information about the system.
12 |
13 | LIMITATION: Only readonly commands are allowed for security. Commands that
14 | modify files, install software, or change system state are blocked.
15 |
16 | Allowed commands include:
17 | - ls, find, locate, which, whereis
18 | - grep, egrep, fgrep, zgrep
19 | - cat, head, tail, less, more
20 | - wc, sort, uniq, cut, awk, sed (readonly operations)
21 | - ps, top, htop, df, du, free
22 | - pwd, whoami, id, uname, date
23 | - file, stat, lsof
24 |
25 | Args:
26 | command (str): The shell command to execute (must be readonly).
27 | confirmed (bool): Must be set to True to confirm command execution.
28 | Defaults to False for safety.
29 | Returns:
30 | str: The output of the command or an error message.
31 | """
32 | # check if operation is confirmed
33 | if not confirmed:
34 | return "Error: Command execution not confirmed. Set confirmed=True to proceed with running the command."
35 |
36 | if not command.strip():
37 | return "No command provided."
38 |
39 | # List of allowed readonly commands
40 | allowed_commands = {
41 | 'ls', 'find', 'locate', 'which', 'whereis',
42 | 'grep', 'egrep', 'fgrep', 'zgrep', 'rg', 'ag',
43 | 'cat', 'head', 'tail', 'less', 'more',
44 | 'wc', 'sort', 'uniq', 'cut', 'awk', 'sed',
45 | 'ps', 'top', 'htop', 'df', 'du', 'free',
46 | 'pwd', 'whoami', 'id', 'uname', 'date',
47 | 'file', 'stat', 'lsof', 'tree'
48 | }
49 |
50 | # Parse the command to get the base command
51 | try:
52 | parsed_command = shlex.split(command)
53 | base_command = parsed_command[0] if parsed_command else ""
54 | except ValueError:
55 | return "Error: Invalid command syntax."
56 |
57 | # Check if the base command is allowed
58 | if base_command not in allowed_commands:
59 | return f"Error: Command '{base_command}' is not allowed. Only readonly commands are permitted."
60 |
61 | # Additional safety checks for potentially dangerous flags
62 | dangerous_patterns = ['rm', 'mv', 'cp', 'chmod', 'chown', 'sudo', '>', '>>', '|', '&&', '||', ';']
63 | for pattern in dangerous_patterns:
64 | if pattern in command:
65 | return f"Error: Command contains potentially dangerous pattern '{pattern}'. Only readonly operations are allowed."
66 |
67 | try:
68 | # Execute the command with timeout for safety
69 | result = subprocess.run(
70 | command,
71 | shell=True,
72 | capture_output=True,
73 | text=True,
74 | timeout=30, # 30 second timeout
75 | cwd=None # Use current working directory
76 | )
77 |
78 | if result.returncode == 0:
79 | return result.stdout if result.stdout else "Command executed successfully (no output)."
80 | else:
81 | return f"Command failed with return code {result.returncode}:\n{result.stderr}"
82 |
83 | except subprocess.TimeoutExpired:
84 | return "Error: Command timed out after 30 seconds."
85 | except Exception as e:
86 | return f"An error occurred while executing the command: {str(e)}"
--------------------------------------------------------------------------------
/py/eodhd/how_many_weeks_high.py:
--------------------------------------------------------------------------------
1 | """
2 | This scrip will fetch the current high price of a stock and calculate how many weeks it
3 | has been since the stock was at that price.
4 | """
5 | import pricereader as pr
6 | import pandas as pd
7 | import time
8 | import datetime
9 |
10 | # Read the list of stocks from the CSV file
11 | stocks = pd.read_csv("stocks5.csv", header=0, usecols=["Ticker"])
12 |
13 | # Set output folder path
14 | output_path = "output"
15 |
16 | # Function to get the number of bars to reach the high that t
17 | # stock_data: DataFrame containing the stock data
18 | # Date,Open,High,Low,Close,Volume,Adj Close
19 | # 2002-07-01,283.25,331.0,283.25,317.8,11803,317.8
20 | # 2002-07-08,303.6,327.0,300.0,300.45,10390,300.45
21 | # 2002-07-15,296.2,305.0,290.3,300.0,4744,300.0
22 | # 2002-07-22,286.0,315.0,280.0,304.4,21643,304.4
23 | def get_previous_index_prce_for_last_high(stock_data):
24 | """
25 | This function will first fetch the high price of the latest date (latest week)
26 | Then for each row before this, it will check if this high price was reached or crossed
27 | If it was, it will return the number of weeks it took to reach this price
28 | If it was not, it will return -1, indicating that the stock is ATH (All time high)
29 | stock_data: DataFrame containing the stock data, in acsending order of date
30 | """
31 | # Get the high price of the latest date
32 | latest_high = stock_data['High'].iloc[-1]
33 |
34 | # Iterate through the rows in reverse order
35 | for index in reversed(stock_data.index[:-1]):
36 | # Check if the high price was reached or crossed
37 | if stock_data.loc[index, 'High'] >= latest_high:
38 | # Return the index of the row where this price was reached
39 | return index, stock_data.loc[index, 'High']
40 |
41 | # Return last index if the high price was not reached or crossed
42 | return stock_data.index[-1], latest_high
43 |
44 |
45 | def main():
46 | print("Started...")
47 | # Create the DataFrame
48 | result_df = pd.DataFrame(columns=['stock', 'High of latest week', 'Last such week high', \
49 | 'Days passed', 'High of that week', 'diff%'])
50 | # Iterate through the list of stocks
51 | for stock in stocks["Ticker"]:
52 | try:
53 | # Get the daily stock data
54 | stock_data = pr.get_price_data(stock, 'w')
55 | # Drop those with NaN
56 | stock_data = stock_data.dropna()
57 |
58 | # Get the index and high price of the week when the stock was at its high
59 | index, high = get_previous_index_prce_for_last_high(stock_data)
60 |
61 | # Get the high price of the latest date
62 | latest_high = stock_data['High'].iloc[-1]
63 | # Current / last date
64 | latest_date = stock_data.index[-1]
65 | diff = round((latest_high - high) / high * 100,2)
66 | days_diff = (latest_date - index).days
67 | latest_high = round(latest_high,2)
68 | high = round(high,2)
69 | # Append the result to the DataFrame
70 | row = {'stock': stock, 'High of latest week': latest_high, 'Last such week high':index, \
71 | 'Days passed': f'{days_diff}', 'High of that week': high, 'diff%': f'{diff}%'}
72 | result_df.loc[len(result_df)] = row
73 | print(f"Processed: {stock}")
74 |
75 | except Exception as e:
76 | print("Error: " + stock)
77 | print(e)
78 |
79 | # Append current timestamp to the file name
80 | now = datetime.datetime.now()
81 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
82 | file_name = 'weeks_to_high_' + timestamp + '.csv'
83 | # Export the DataFrame to CSV
84 | result_df.to_csv(output_path + "/" + file_name, index=False)
85 | print(f'Saved file {file_name}')
86 |
87 | if __name__ == "__main__":
88 | main()
89 |
--------------------------------------------------------------------------------
/py/ai/fininsightgpt/README.md:
--------------------------------------------------------------------------------
1 | # FinInsightGPT: AI-Powered Investment Analysis
2 |
3 | FinInsightGPT is an application that helps with investment decisions and business analysis by processing company data files, converting them to structured markdown, and generating comprehensive equity research reports using AI.
4 |
5 | ## Features
6 |
7 | - **Document Processing**: Converts various file formats (PDF, DOCX, PPTX, TXT, XLSX, images) to markdown
8 | - **Intelligent Image Analysis**: Uses OCR and AI vision to extract text and analyze charts/graphs
9 | - **Master File Generation**: Consolidates all company documents into a comprehensive master file
10 | - **AI Report Generation**: Creates detailed equity research reports using LLM models
11 | - **Command-line Interface**: Easy-to-use CLI for all operations
12 |
13 | ## Installation
14 |
15 | 1. Clone this repository
16 | 2. Install the required dependencies:
17 |
18 | ```bash
19 | pip install -r requirements.txt
20 | ```
21 |
22 | 3. Install Tesseract OCR (for image processing):
23 | - macOS: `brew install tesseract`
24 | - Ubuntu/Debian: `sudo apt-get install tesseract-ocr`
25 | - Windows: Download from [GitHub](https://github.com/UB-Mannheim/tesseract/wiki)
26 |
27 | 4. Set up your environment variables:
28 | - Copy the template file: `cp .env.example .env`
29 | - Edit the `.env` file and add your OpenAI API key and model preferences:
30 |
31 | ```
32 | # OpenAI API Key
33 | OPENAI_API_KEY=your_openai_api_key_here
34 |
35 | # OpenAI Model IDs
36 | OPENAI_TEXT_MODEL=gpt-4-turbo
37 | OPENAI_VISION_MODEL=gpt-4-vision-preview
38 | ```
39 |
40 | ## Usage
41 |
42 | ### Directory Structure
43 |
44 | Place company files in folders under `company_data`:
45 |
46 | ```
47 | company_data/
48 | ├── company1/
49 | │ ├── file1.pdf
50 | │ ├── file2.txt
51 | │ └── image1.jpg
52 | └── company2/
53 | ├── presentation.pptx
54 | └── financials.xlsx
55 | ```
56 |
57 | ### Commands
58 |
59 | #### List available companies:
60 |
61 | ```bash
62 | python src/main.py list
63 | ```
64 |
65 | #### Process files for a company:
66 |
67 | ```bash
68 | python src/main.py process
69 | ```
70 |
71 | #### Generate master file from processed files:
72 |
73 | ```bash
74 | python src/main.py master [--output-dir ]
75 | ```
76 |
77 | #### Generate report from master file:
78 |
79 | ```bash
80 | python src/main.py report [--template ] [--output-dir ] [--model ]
81 | ```
82 |
83 | #### Run the entire pipeline (process files, generate master, create report):
84 |
85 | ```bash
86 | python src/main.py all [--template ] [--model ]
87 | ```
88 |
89 | ### Examples
90 |
91 | Process files for CDSL:
92 |
93 | ```bash
94 | python src/main.py process cdsl
95 | ```
96 |
97 | Generate a report for JyothyLabs using previously created master file:
98 |
99 | ```bash
100 | python src/main.py report jyothylabs_master_20250504_123456.md --model gpt-4-vision-preview
101 | ```
102 |
103 | Run the entire pipeline for a new company:
104 |
105 | ```bash
106 | python src/main.py all mynewcompany --model gpt-4-turbo
107 | ```
108 |
109 | ## Report Templates
110 |
111 | The system uses the template file in `prompt_master/Equity_Research_Report_Template.md` by default. This template contains:
112 |
113 | 1. A system prompt to instruct the AI model
114 | 2. A user prompt that defines the report structure and analysis requirements
115 |
116 | You can modify this template or create custom templates for different analysis styles.
117 |
118 | ## Dependencies
119 |
120 | - pymupdf: PDF processing
121 | - python-docx: DOCX processing
122 | - python-pptx: PowerPoint processing
123 | - pandas & openpyxl: Excel processing
124 | - Pillow & pytesseract: Image processing
125 | - openai: AI model integration
126 | - tiktoken: Token counting for LLM API calls
--------------------------------------------------------------------------------
/py/ai/turnaround/main.py:
--------------------------------------------------------------------------------
1 | # load .env into environment
2 | import os
3 | from dotenv import load_dotenv
4 | load_dotenv()
5 |
6 | from smolagents import CodeAgent, MLXModel
7 | from my_tools import search_web as web_fetcher
8 | from my_tools import save_report as save_report
9 | from my_tools import fs_reader as fs_reader
10 | from my_tools import cmd_executor as cmd_executor
11 | from smolagents import CodeAgent, LiteLLMModel
12 |
13 | import datetime
14 | import csv
15 |
16 | # Initialize the tools and models
17 | #local_model=mlx_model = MLXModel("Path to local model directory")
18 | model = LiteLLMModel(model_id="openai/gpt-4.1-mini", api_key=os.getenv("OPENAI_API_KEY"))
19 |
20 | #Create the agent with the model and tools
21 | agent = CodeAgent(tools=[web_fetcher, save_report, fs_reader, cmd_executor], model=model, additional_authorized_imports=["os", "openai", "json", "csv"]) # Not adding base tools.
22 |
23 | # Define the data directory and today's date
24 | data_dir = "data/financial_data.csv"
25 | date_today = datetime.datetime.now().strftime("%Y-%m-%d")
26 |
27 | instructions = f"""
28 | You are an expert financial analyst specializing in identifying turnaround in companies. Analyze for the company mentioned below in Step 1. With the searched financial data and your analysis generate a comprehensive markdown report that detects potential turnarounds if any for the company. To achieve this, you will follow these steps in sequence:
29 | Step 1. Company/Business Name/Stock Codes: {{business_name}}.
30 | Step 2. For this business, analyse if the business is experiencing a turnaround. Give a short report of your analysis. You will gather additional latest information using the web_fetcher tool. This includes searching for the latest financial reports, news, and other relevant information about the company.
31 | Step 3. After gathering enough information, you will prepare a report that includes a verdict about the turnaround potential of each business. The verdict can be "Strong Turnaround", "Weak Turnaround", or "No Turnaround".
32 | Step 4. Finally, format the report into a well-structured markdown document and save it to a file. You will ensre that the report contains the following sections:
33 | - Business Name
34 | - Summary of Financial Data
35 | - Analysis of Financial Health
36 | - Turnaround Potential Verdict
37 | Step 5. You will use the save_report tool to persist the report on disk. The report will be saved per business. You will pass the report content and the business name to the save_report tool.
38 |
39 | General instructions:
40 | You will use the web_fetcher tool to gather additional information about these businesses and the reporter tool to generate the markdown report. You can look up for latest financial reports, news and other relevant information for the company.
41 | Today is: {date_today}.
42 | Always search for tools available to you before writing new code, esp. the cmd_executor tool, which can execute read only shell commands to gather more information if needed.
43 | """
44 |
45 | # Read the financial data file and start the analysis
46 | print("Loading financial data from:", data_dir)
47 | if not os.path.exists(data_dir):
48 | raise FileNotFoundError(f"The financial data file {data_dir} does not exist. Please check the path.")
49 | businesses = []
50 | with open(data_dir, 'r', encoding='utf-8') as file:
51 | reader = csv.DictReader(file)
52 | # For each row in the CSV, create a entry, that contains the Name, Stock Symbol. Assuming the columns are Name,BSE Code,NSE Code. It is possible that BSE Code or NSE Code is not available, Create the entry for businesses list as a concatenation of Name/NSE Code/BSE Code.
53 | for row in reader:
54 | name = row.get('Name', 'Unknown')
55 | bse_code = row.get('BSE Code', '').strip()
56 | nse_code = row.get('NSE Code', '').strip()
57 | if not nse_code and not bse_code:
58 | business_entry = name
59 | elif not nse_code and bse_code:
60 | business_entry = f"""Name: {name} / BSE: {bse_code}"""
61 | elif nse_code and not bse_code:
62 | business_entry = f"""Name: {name} / NSE: {nse_code}"""
63 | else:
64 | business_entry = f"""Name: {name} / NSE: {nse_code} / BSE: {bse_code}"""
65 | businesses.append(business_entry)
66 |
67 | total_businesses = len(businesses)
68 | count = 0
69 | for business in businesses:
70 | print(f"Starting analyzing financial data and generating a report for {business}... Please wait.")
71 | final_instructions = instructions.format(business_name=business)
72 | #print(f"Final instructions for the agent: {final_instructions}")
73 | response = agent.run(final_instructions, max_steps=20)
74 | # Print progress
75 | count += 1
76 | print(f"Completed {count}/{total_businesses} businesses. Current business: {business}")
--------------------------------------------------------------------------------
/py/ai/fininsightgpt/prompt_master/Equity_Research_Report_Template.md:
--------------------------------------------------------------------------------
1 | # Equity Research Report Template
2 |
3 | ## System Prompt
4 |
5 | You are a financial analyst specializing in creating concise company reports. I need a comprehensive research report on {company}, an Indian publicly listed company, with deep insights based on publicly available data, including concalls, annual reports, news, and competitive analysis. The report should be for company called {company}. Current datetime is {timestamp}.
6 |
7 | ## User Prompt
8 |
9 | The structure of the {company} report shall be as follows.
10 |
11 | ---
12 |
13 | ### 1. 📌 Company Overview
14 |
15 | - **Business Model and Key Segments**
16 | Briefly describe the company’s core business activities and primary segments. If diversified, outline the major business segments and how revenue mix has evolved over the last 3–5 years.
17 |
18 | - **Key Milestones**
19 | Highlight key events such as IPOs, product/service launches, expansions, strategic partnerships, or diversification moves.
20 |
21 | ---
22 |
23 | ### 2. 📈 Strategic Developments & Execution Analysis
24 |
25 | #### A. Business Expansion & Innovation
26 | - Detail new product or service launches, acquisitions, or innovations.
27 | - Mention R&D pipelines and any noteworthy technologies under development.
28 | - Include subsidiaries, joint ventures, and their strategic significance.
29 |
30 | #### B. Order Book & Execution Capacity
31 | - Present current size and growth trend of the order book. Include insights on order pipeline and execution win-rates.
32 | - Clarify execution visibility over the next 12–18 months.
33 |
34 | #### C. Capacity Expansion
35 | - Describe ongoing or planned projects/plants and their impact on production/revenue capacity.
36 | - Explain the funding strategy for capex (equity, debt, internal accruals).
37 |
38 | #### D. Risk Analysis
39 | - Discuss regulatory, operational, geopolitical, and credit-related risks.
40 | - Provide the latest credit ratings and any notable outlooks.
41 |
42 | #### E. Management & Governance
43 | - Assess management’s historical performance versus guidance.
44 | - Mention promoter shareholding patterns and governance quality.
45 | - Flag any litigation, controversies, or red flags if applicable.
46 |
47 | ---
48 |
49 | ### 3. 🏗 Recent Milestones & Notable Projects
50 | - Showcase recognition from industry/government bodies and key partnerships.
51 | - Highlight delivery of high-profile projects or export milestones.
52 | - Summarize improvements in net worth, margins, interest coverage, and profitability.
53 |
54 | ---
55 |
56 | ### 4. 🧭 “What’s New vs. What’s Next” Summary Table
57 |
58 | Create a table outlining:
59 | - Recent achievements, future plans, expected timelines, and revenue/strategic impact across areas such as Order Book, Capacity, Exports, R&D, and Financials.
60 |
61 | ---
62 |
63 | ### 5. 🆚 Competitive Analysis
64 |
65 | - Identify and compare peers using valuation (P/E, EV/EBITDA), RoE, margins, etc.
66 | - Discuss relative market positioning and entry barriers.
67 | - List key strengths and weaknesses compared to competitors.
68 | - Include visual peer comparison charts or tables.
69 |
70 | ---
71 |
72 | ### 6. 📰 News and Media Perception
73 |
74 | - List significant headlines from the past 2 years.
75 | - Compare media coverage with management’s communicated vision and strategies.
76 | - Mention any frauds, disputes, or controversies.
77 | - Provide an overall sentiment assessment (positive/neutral/negative).
78 |
79 | ---
80 |
81 | ### 7. 📊 Valuation & Investment Perspective
82 |
83 | - Include valuation metrics (P/E, EV/EBITDA, etc.) and compare with historical ranges and peers.
84 | - Perform a DCF analysis with assumptions and forecasts.
85 | - Evaluate whether the current market price justifies future earnings and growth.
86 | - Add broker/analyst consensus and sentiment.
87 | - Correlate stock price performance with earnings visibility.
88 |
89 | ---
90 |
91 | ### 8. 🚀 Key Catalysts to Watch (Near-Term)
92 |
93 | Track important near-term triggers such as:
94 | - Plant/project commissionings
95 | - Export order wins
96 | - Quarterly financial results and trends
97 |
98 | ---
99 |
100 | ### 9. 🧾 Conclusion & Investment Rationale
101 |
102 | - Provide a clear investment rating (Buy/Hold/Avoid) with reasoning.
103 | - Outline key upside/downside triggers.
104 | - Call out any immediate risks (regulatory, geopolitical, macro).
105 | - Mention near-term events that could impact re-rating potential.
106 |
107 | ---
108 |
109 | ## 📁 Data Sources to Refer to:
110 |
111 | - Last 3–5 Years of Annual Reports
112 | - Investor Presentations (especially the latest)
113 | - Earnings Call Transcripts (latest quarter)
114 | - Credit Rating Reports (if public)
115 | - Exchange Filings (NSE/BSE)
116 | - Financial News Sources (ET, BloombergQuint, Moneycontrol, etc.)
117 |
--------------------------------------------------------------------------------
/py/yf/weeklyRSIVolStopBO.py:
--------------------------------------------------------------------------------
1 | '''
2 | BOs of nifty500 stocks, that gave a weekly breakout from RSI(14) > 60
3 | Also, check if they are above the volstop(10,2.5)
4 | Also, check if they are abover the 20-EMA
5 | Prefer, stocks with relative ratio on an increasing trend on 5-6M average
6 | All calculations on weekly timeframes
7 |
8 | Generally, such stocks that take repitative support on a bullish RSI level,
9 | with backing of sectoral tailwind or strong fundamentals give good long term moves
10 | Exits can be planned on volstop break, or 20-EMA break or both with partial booking on
11 | break of one
12 | '''
13 |
14 | import yfinance as yf
15 | import pandas as pd
16 | import ta
17 | import datetime
18 |
19 | # Set output folder path
20 | output_path = "output"
21 |
22 | # Read the list of stocks from the CSV file
23 | stocks = pd.read_csv("stocks500.csv", header=0, usecols=["Ticker"])
24 |
25 | # Set start Date
26 | start_date = '2020-02-01'
27 |
28 | # Set end Date
29 | end_date = '2023-02-26'
30 |
31 | # Specify the benchmark symbol
32 | benchmark = "^NSEI"
33 |
34 | # Interval
35 | data_interval_weekly = '1wk'
36 |
37 | import yfinance as yf
38 | import pandas as pd
39 | import numpy as np
40 |
41 | def rsi_crossover(data, rsi_level):
42 | current_rsi = data.iloc[-1]['RSI']
43 | previous_rsi = data.iloc[-2]['RSI']
44 | return previous_rsi <= 60.0 and current_rsi > 60.0
45 |
46 | def volatility_stop(data, period, multiplier):
47 | high = data['High']
48 | low = data['Low']
49 | close = data['Close']
50 |
51 | atr = pd.Series((high - low).abs().rolling(period).mean(), name='ATR')
52 | direction = np.where(close.diff() > 0, 1, -1)
53 | vol_stop = close - direction * atr * multiplier
54 |
55 | data['volStop'] = vol_stop
56 | return data
57 |
58 | def ratio_mean(data, benchmark_data, length):
59 | # Calculate the relative strength of the stock by dividing its weekly closing price by the weekly closing price of the Nifty 50 index
60 | relative_strength = data['Close'] / benchmark_data['Close']
61 | data[f'relativeRatio'] = relative_strength
62 | # print(relative_strength.tail(10))
63 |
64 | # Calculate the mean of the relative strength values for length
65 | data[f'ratio{length}W'] = relative_strength.rolling(window=length).mean()
66 | return data
67 |
68 |
69 | def main():
70 | print("Started...")
71 | # Create the DataFrame
72 | result_df = pd.DataFrame(columns=['stock', 'Close', 'volStop10_2.5', 'ema20', 'RS-ratio', 'ratio-21W', 'RSI(14)'])
73 |
74 | # Benchmark data
75 | # Use yfinance to retrieve the benchmark data
76 | benchmark_ticker = yf.Ticker(benchmark)
77 | benchmark_data = benchmark_ticker.history(start=start_date, end=end_date, interval=data_interval_weekly,auto_adjust=False, prepost=False)
78 | benchmark_data = benchmark_data.dropna()
79 |
80 | # Iterate through the list of stocks
81 | for stock in stocks["Ticker"]:
82 | try:
83 | # Get the stock data from yfinance, dont adjust OHLC
84 | data = yf.Ticker(stock+".NS").history(start=start_date, end=end_date,interval=data_interval_weekly,auto_adjust=False, prepost=False)
85 | # Drop those with NaN
86 | data = data.dropna()
87 |
88 | # Calculate the RSI using a 14-day period
89 | data['RSI'] = ta.momentum.RSIIndicator(data['Close'], window=14).rsi()
90 | # Check if a crossover from value lower than 60 has happend, we need to however look at RSI trend on a charting platform
91 | if (rsi_crossover(data, 60)):
92 | # Calculate volStop
93 | data = volatility_stop(data, 10, 2.5)
94 | # Calculate ema20W
95 | data['ema20'] = ta.trend.EMAIndicator(data['Close'], window=20).ema_indicator()
96 | # Calculate the relative ratio and average 21W
97 | data = ratio_mean(data, benchmark_data, 21)
98 | curr_data = data.iloc[-1]
99 | row = {'stock': stock, 'Close': curr_data['Close'], 'volStop10_2.5': str(round(curr_data['volStop'], 2)), 'ema20': str(round(curr_data['ema20'], 2)), \
100 | 'RS-ratio': str(round(curr_data['relativeRatio'], 2)), 'ratio-21W': str(round(curr_data['ratio21W'], 2)), 'RSI(14)': str(round(curr_data['RSI'], 2))}
101 | # Append the new row to the DataFrame
102 | result_df.loc[len(result_df)] = row
103 |
104 | except Exception as e:
105 | print("Error: " + stock)
106 | print(e)
107 |
108 | # Append current timestamp to the file name
109 | now = datetime.datetime.now()
110 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
111 | file_name = 'weeklyRSIVolStopBO_' + timestamp + '.csv'
112 | # Export the DataFrame to CSV
113 | result_df.to_csv(file_name, index=False)
114 | print('Done')
115 |
116 | if __name__ == "__main__":
117 | main()
--------------------------------------------------------------------------------
/py/yf/newHighMonthly.py:
--------------------------------------------------------------------------------
1 |
2 | '''
3 | This code, also searches for new monthly highs, but not just ATH
4 | This it does by boxing a lookback limit and a minimum duration where the new high should be
5 | with respect to the historical high.
6 | '''
7 | import yfinance as yf
8 | import pandas as pd
9 | import time
10 | import os
11 | from datetime import datetime, timedelta
12 |
13 | # Set output folder path
14 | output_path = "output"
15 |
16 | # Read the list of stocks from the CSV file
17 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
18 |
19 | # Set the time frame to max
20 | time_frame = 'max'
21 |
22 | # Set the bar time frame
23 | data_interval = '1mo'
24 |
25 | # Set the maximum number of months to lookback
26 | LOOKBACK_LIIMIT = 15 * 12 # Years in months
27 |
28 | # Set minimum numbber of months that this BO should be after
29 | MIN_BO_LENGTH = 50 #5 * 12 # Years in months
30 |
31 | # Initialize a list to store the results
32 | results = []
33 |
34 | # Crore
35 | One_Cr = 10000000
36 |
37 | # Columnns in the report
38 | report_columns = ["Stock", "mcap", "High Close", "High Close Date", "Current Close", "#MonthsBO", "Diff", "sector" , "industry"]
39 |
40 | def write_dataframe_to_file(df, name):
41 | # Get the current timestamp
42 | timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
43 |
44 | # Create the filename
45 | filename = f'{name}_{timestamp}.csv'
46 | # Save the DataFrame as a CSV file with specific column names as the header
47 | df.to_csv(f'{output_path}/{filename}',index=False)
48 |
49 |
50 |
51 | def main():
52 | print("Started...")
53 | # create an empty dataframe to store the results
54 | results_df = pd.DataFrame(columns=report_columns)
55 | # Iterate through the list of stocks
56 | for stock in stocks["Ticker"]:
57 | try:
58 | # Get the stock data from yfinance, dont adjust OHLC
59 | ticker = yf.Ticker(stock+".NS")
60 | data = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
61 | # Drop those with NaN
62 | data = data.dropna()
63 | # Drop last row, if 2nd last is already of the month
64 | if data.index[-1].month == data.index[-2].month:
65 | # Replace the values in the second-to-last row with the values in the last row
66 | data.loc[data.index[-2]] = data.loc[data.index[-1]]
67 | # Delete the last row
68 | data = data.drop(data.index[-1])
69 |
70 | if (len(data) < MIN_BO_LENGTH + 1):
71 | print(f'Skipping. Not enough data for {stock}, only {len(data)} available, minimum required {MIN_BO_LENGTH+1}')
72 | continue
73 |
74 | # Reverse the data frame to start from current candle
75 | stk_df = data.iloc[::-1]
76 | max_loopback = LOOKBACK_LIIMIT
77 | if (len(stk_df) < LOOKBACK_LIIMIT): # Limit lookback if not available data for so long
78 | max_loopback = len(stk_df)
79 |
80 | stk_df_max_lookback = stk_df.head(max_loopback)
81 | current_close = stk_df_max_lookback['Close'][0]
82 | for i in range(1, len(stk_df_max_lookback)):
83 | this_close = stk_df_max_lookback['Close'][i]
84 | if this_close > current_close:
85 | if i >= MIN_BO_LENGTH:
86 | highest_close_date = stk_df_max_lookback.index[i].strftime('%Y-%m-%d')
87 | diff = round((this_close - current_close)/current_close * 100, 2)
88 | # Essential data
89 | sector = ''
90 | industry = ''
91 | marketCap = ''
92 | try:
93 | if ticker.info:
94 | marketCap = round(ticker.info['marketCap'] / One_Cr, 0)
95 | industry = ticker.info['industry']
96 | sector = ticker.info['sector']
97 | except Exception as err:
98 | pass
99 | new_row = pd.DataFrame({"Stock": stock, "mcap": marketCap, "High Close": round(this_close, 2), "High Close Date": highest_close_date, \
100 | "Current Close": round(current_close, 2), "#MonthsBO": i, "Diff": diff, "sector": sector, "industry": industry}, index=[0])
101 | results_df = pd.concat([results_df, new_row])
102 | break
103 | else:
104 | break # A newer high exist before MIN_BO_LENGTH
105 | except Exception as e:
106 | print(f'Error for ticker: {stock} ==> {e}')
107 |
108 | # print(results_df)
109 | write_dataframe_to_file(results_df, "newHighMonthly_BO_")
110 | print("Done")
111 |
112 | if __name__ == "__main__":
113 | main()
114 |
--------------------------------------------------------------------------------
/py/yf/multimonthBO.py:
--------------------------------------------------------------------------------
1 | import yfinance as yf
2 | import pandas as pd
3 | import time
4 | import os
5 | from datetime import datetime, timedelta
6 |
7 | # Set output folder path
8 | output_path = "output"
9 |
10 | # Read the list of stocks from the CSV file
11 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
12 |
13 | # Set the time frame to max
14 | time_frame = 'max'
15 |
16 | # Set the bar time frame
17 | data_interval = '1mo'
18 |
19 | # Set the minimum number of months since the last ath was breached
20 | MIN_MONTHS = 11
21 |
22 | # Threshold to previous ATH
23 | threshold = 1.0
24 |
25 | # Initialize a list to store the results
26 | results = []
27 |
28 | # Crore
29 | One_Cr = 10000000
30 |
31 | # determine if highest close was minimum_low_length ago.
32 | def highestClose(stock_data, min_months):
33 |
34 | highest_close = stock_data["Close"][0]
35 | highest_close_date = stock_data.index[0]
36 | highest_close_idx = 0
37 | for i in range(1, len(stock_data)):
38 | if stock_data["Close"][i] > highest_close:
39 | highest_close = stock_data["Close"][i]
40 | highest_close_date = stock_data.index[i]
41 | highest_close_idx = i
42 | if len(stock_data) - highest_close_idx >= min_months:
43 | return [True, highest_close, highest_close_date]
44 | else:
45 | return [False, '', '']
46 |
47 | def write_dataframe_to_file(df, name):
48 | # Get the current timestamp
49 | timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
50 |
51 | # Create the filename
52 | filename = f'{name}_{timestamp}.csv'
53 | # Save the DataFrame as a CSV file with specific column names as the header
54 | df.to_csv(output_path + "/" + filename, index=False, columns=["Stock", "mcap", "Highest Close", "Highest Close Date", "Current Close", "Diff", "sector", "industry"])
55 |
56 |
57 | def main():
58 | print("Started...")
59 | # create an empty dataframe to store the results
60 | results_df = pd.DataFrame(columns=["Stock", "mcap", "Highest Close", "Highest Close Date", "Current Close", "Diff", "sector" , "industry"])
61 | # Iterate through the list of stocks
62 | for stock in stocks["Ticker"]:
63 | try:
64 | # Get the stock data from yfinance, dont adjust OHLC
65 | ticker = yf.Ticker(stock+".NS")
66 | data = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
67 | # Drop those with NaN
68 | data = data.dropna()
69 | # Drop last row, if 2nd last is already of the month
70 | if data.index[-1].month == data.index[-2].month:
71 | # Replace the values in the second-to-last row with the values in the last row
72 | data.loc[data.index[-2]] = data.loc[data.index[-1]]
73 | # Delete the last row
74 | data = data.drop(data.index[-1])
75 |
76 | # print(data)
77 | if (len(data) <= 2):
78 | print(f'Skipping {stock} since not enough data present ')
79 | continue
80 |
81 | min_months = MIN_MONTHS
82 | if (len(data) < (MIN_MONTHS + 1)):
83 | print(f'{stock} has only {len(data)} months, trimming condition')
84 | min_months = len(data)
85 |
86 | # Highest close prior to last month
87 | result_highestClose = highestClose(data.iloc[:-1], min_months) # Skip the current month
88 | highestClose_condition = result_highestClose[0]
89 | highestClose_value = result_highestClose[1]
90 | highestClose_date = result_highestClose[2]
91 |
92 | # Essential data
93 | sector = ''
94 | industry = ''
95 | marketCap = ''
96 | try:
97 | if ticker.info:
98 | marketCap = round(ticker.info['marketCap'] / One_Cr, 0)
99 | industry = ticker.info['industry']
100 | sector = ticker.info['sector']
101 | except Exception as err:
102 | pass
103 |
104 | last_close = data["Close"].tail(1).values[0]
105 | if (highestClose_condition and last_close >= highestClose_value * threshold):
106 | diff = round(((last_close - highestClose_value) / highestClose_value) * 100, 2)
107 | new_row = pd.DataFrame({"Stock": stock, "mcap": marketCap, "Highest Close": round(highestClose_value, 2), "Highest Close Date": highestClose_date, \
108 | "Current Close": round(last_close, 2), "Diff": diff, "sector": sector, "industry": industry}, index=[0])
109 | results_df = pd.concat([results_df, new_row])
110 |
111 | except Exception as e:
112 | print(f'Error for ticker: {stock} ==> {e}')
113 |
114 | # print(results_df)
115 | write_dataframe_to_file(results_df, "MultiMonth_BO_")
116 | print("Done")
117 |
118 | if __name__ == "__main__":
119 | main()
120 |
--------------------------------------------------------------------------------
/py/beta/chatgpt/generate_report_for_company.py:
--------------------------------------------------------------------------------
1 | import re
2 | import model as ai
3 | import os
4 |
5 | screener_xls_data = {}
6 | delimiter = "####"
7 |
8 | company_data = 'companyinfo/sjvn'
9 |
10 | screener_tabs = ['income_statement','income_statement_quarterly', 'balance_sheet', 'cashflow_statement', 'ratio_analysis']
11 | screener_data = {}
12 |
13 | def preprocess_text(text):
14 | # Lowercase the text
15 | text = text.lower()
16 |
17 | # Remove special characters
18 | text = re.sub(r'\W', ' ', text)
19 |
20 | # Replace multiple spaces with a single space
21 | text = re.sub(r'\s+', ' ', text)
22 |
23 | return text
24 |
25 | def load_screener_data():
26 |
27 | for i in range(0, len(screener_tabs)):
28 | tabname = screener_tabs[i]
29 | f = open(f'{company_data}/{tabname}.txt')
30 | data = f.read()
31 | f.close()
32 | screener_data[tabname] = data
33 |
34 |
35 | def company_info_analysis():
36 | file = f'{company_data}/company_info.txt'
37 | data = 'No company info'
38 | if os.path.isfile(file):
39 | f = open(file)
40 | data = f.read()
41 | f.close()
42 | print('Analyzing company_info data...')
43 | system_message = f'As a financial analyst for equity markets, perform an evaluation of the company based on the inputs provided. The input is enclosed within {delimiter}.\
44 | You must to the analysis in the following steps.\
45 | Step 8: Prepare a short description of the comapnys business, factories, plants and operations in general.\
46 | Step 9: Prepare shareholding trend and status, separetly, if shareholding data is provided. \
47 | Step 10: Perpare a separate detailed summary of concall data if provided. \
48 | Step 11: If credit rating data is provided, list out positive and negative points separately. \
49 | Give your analysis in as detailed manner as possible, however summarize it to limit to max_tokens = 2000 '
50 | user_message = f'{delimiter}{data}{delimiter}'
51 | messages = [
52 | {'role':'system',
53 | 'content': system_message},
54 | {'role':'user',
55 | 'content': f"{delimiter}{user_message}{delimiter}"},
56 | ]
57 | response = ai.get_completion_from_messages(messages,max_tokens=2000)
58 | return response
59 |
60 | def fin_statement_analysis():
61 | print('Analyzing screener data...')
62 | system_message = f'As a financial analyst for equity markets, you need to perform an evaluation of the company based on the inputs provided. Some of these inputs will be standard financial data and some will be unstructured. \
63 | The input data will be encloded with {delimiter} You must to the analysis in the following steps. \
64 | Step 1:{delimiter} Perform a financial analysis of the company from stock market investing perspective from its annual income statement quarterly income statment \
65 | balance sheet and cashflow statement. Each will be provided to you enclosed as {delimiter}income_statement:{delimiter} {delimiter}balance_sheet{delimiter} and so on. \
66 | Step 2: Using the ratio_analysis statement analyze the working capital cycle. Step 3: Perform a Du-Pont analysis using the above data. Step 4: Perform profitibility analysis of this financial data\
67 | Step 5: Provide trend analysis and competitive advantages of the company based on given financial data. Step 6: Check pricing power of this company? \
68 | Step 6: Detect and report any red flags about the company from the data \
69 | Step 7: Report preparation/ Take special care. As an analyst perform these analysis and prepare a report that is very detailed but summarize it to limit to max_tokens=2000.'
70 |
71 | msg = ''
72 | for key,val in screener_data.items():
73 | msg = f'{delimiter}{key}:{val}{delimiter}'
74 | user_message = f'{delimiter}{msg}{delimiter}'
75 | messages = [
76 | {'role':'system',
77 | 'content': system_message},
78 | {'role':'user',
79 | 'content': f"{delimiter}{user_message}{delimiter}"},
80 | ]
81 | response = ''
82 | response = ai.get_completion_from_messages(messages,max_tokens=2000)
83 | return response
84 |
85 |
86 | def main():
87 | ai.set_api()
88 | load_screener_data()
89 |
90 | #Financial statement analysis from screener data
91 | fin_screener_analysis = ''
92 | fin_screener_analysis = fin_statement_analysis()
93 | # print(fin_screener_analysis)
94 | with open(f'{company_data}/financial_analysis.txt', 'w', encoding='utf-8') as file:
95 | file.write(fin_screener_analysis)
96 |
97 | #Perform company info analysis from data from internet and elsewhere
98 | co_info_analysis = ''
99 | co_info_analysis = company_info_analysis()
100 | with open(f'{company_data}/company_info_analysis.txt', 'w', encoding='utf-8') as file:
101 | file.write(co_info_analysis)
102 |
103 | print('Done')
104 |
105 | if __name__ == "__main__":
106 | main()
107 |
--------------------------------------------------------------------------------
/py/yf/box_scan.py:
--------------------------------------------------------------------------------
1 | '''
2 | We detect a consolidation after a rally and quantify the box formation
3 | Rally is defined as 3 consecutive higher closes, and the high of that candle defines the top left of the box
4 | The low is extended with each new lower low
5 | '''
6 | import yfinance as yf
7 | import pandas as pd
8 | import datetime
9 | import matplotlib.pyplot as plt
10 | import matplotlib.patches as patches
11 |
12 |
13 | # Set the bar time frame
14 | data_interval = '1d'
15 | # Set the time frame to 90d
16 | time_frame = '90d'
17 |
18 | # Set output folder path
19 | output_path = "boxscan/output"
20 | # Initialize an empty DataFrame to store the output CSV data
21 | output_df = pd.DataFrame(columns=['Stock Code', 'Box Duration', 'Drawdown', 'Fall Rate'])
22 |
23 | # Read the list of stocks from the CSV file
24 | stocks = pd.read_csv("stocks500.csv", header=0, usecols=["Ticker"])
25 |
26 | # Box depth threshold %
27 | box_depth_threshold = -20
28 | # Rally days
29 | min_rally_days = 3
30 | # Box days
31 | min_days_in_box = 3
32 |
33 | # Function to plot and save chart and data
34 | def scan_for_box(df, stock_code):
35 |
36 | # Calculate 50-day average volume
37 | df['50_day_avg_vol'] = df['Volume'].rolling(window=50).mean()
38 |
39 | # Set up plot
40 | fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(20, 12), sharex=True, gridspec_kw={'height_ratios': [3, 1]})
41 | ax1.set_ylabel('Price')
42 | ax1.set_title(f'{stock_code} with Negative Drawdown')
43 | ax2.set_xlabel('Time')
44 | ax2.set_ylabel('Volume')
45 |
46 | # Initialize variables for debugging and the box
47 | rally_days = 0
48 | rally_volume_high = False
49 | box_start = None
50 | box_end = None
51 | box_high = None
52 | box_low = None
53 |
54 | # Iterate through the data to identify rallies, place debug dots, and draw the box
55 | for i in range(len(df)):
56 | color = 'g' if df.iloc[i]['Close'] >= df.iloc[i]['Open'] else 'r'
57 | vol_color = color
58 | vol_color = 'g' if i > 0 and df.iloc[i]['Close'] >= df.iloc[i-1]['Close'] else 'r'
59 |
60 | ax1.plot([i, i], [df.iloc[i]['Low'], df.iloc[i]['High']], color=color)
61 | ax1.add_patch(patches.Rectangle((i - 0.3, df.iloc[i]['Open']), 0.6, df.iloc[i]['Close'] - df.iloc[i]['Open'], facecolor=color))
62 | ax2.bar(i, df.iloc[i]['Volume'], color=vol_color, width=0.6)
63 |
64 | # Detect a rally
65 | if i > 0 and df.iloc[i]['Close'] > df.iloc[i - 1]['Close']:
66 | rally_days += 1
67 | if df.iloc[i]['Volume'] > df.iloc[i]['50_day_avg_vol']:
68 | rally_volume_high = True
69 | else:
70 | rally_days = 0
71 | rally_volume_high = False
72 |
73 | if rally_days >= min_rally_days and rally_volume_high:
74 | ax1.plot(i, df.iloc[i]['High'], 'o', color='orange')
75 | box_high = df.iloc[i]['High']
76 | box_low = df.iloc[i]['Low']
77 | box_start = i
78 |
79 | if box_start is not None:
80 | new_low = df.iloc[i]['Low']
81 | if new_low < box_low:
82 | box_low = new_low
83 | box_end = i
84 | ax1.add_patch(patches.Rectangle((box_start, box_low), box_end - box_start, box_high - box_low, fill=True, color='yellow', alpha=0.3))
85 |
86 | if df.iloc[i]['Close'] > box_high:
87 | box_start = None
88 | box_end = None
89 | box_high = None
90 | box_low = None
91 |
92 | # Book keeping
93 | if box_start is not None:
94 | box_days = (box_end - box_start) + 1
95 | box_drop_percent = -((box_high - box_low) / box_high) * 100
96 | box_fall_rate = round(-box_drop_percent / box_days, 2)
97 | text_str = f"Box Duration: {box_days} days\nDrawdown: {box_drop_percent:.2f}%\nFR: {box_fall_rate:.2f}"
98 | ax1.text(0.75, 0.1, text_str, transform=ax1.transAxes, fontsize=12, verticalalignment='bottom', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
99 |
100 | if box_end == len(df) - 1 and box_drop_percent > box_depth_threshold and box_days > min_days_in_box:
101 | plt.savefig(f"{output_path}/{stock_code}.png")
102 | output_df.loc[len(output_df)] = [stock_code, box_days, box_drop_percent, box_fall_rate]
103 | plt.close()
104 |
105 |
106 | def main():
107 | print('Started')
108 | # Iterate through the list of stocks
109 | for stock in stocks["Ticker"]:
110 | try:
111 | ticker = yf.Ticker(stock+".NS")
112 | stock_history = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
113 | stock_history = stock_history.dropna()
114 | scan_for_box(stock_history, stock)
115 | except Exception as e:
116 | print(f"Error: {stock} ==> {e}")
117 |
118 | # Append current timestamp to the file name
119 | now = datetime.datetime.now()
120 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
121 | file_name = f'{output_path}/box_scan_{timestamp}.csv'
122 | # Export the DataFrame to CSV
123 | output_df.to_csv(file_name, index=False)
124 | print(f'Done, output saved in {file_name}')
125 |
126 | if __name__ == "__main__":
127 | main()
128 |
--------------------------------------------------------------------------------
/py/yf/green_dot.py:
--------------------------------------------------------------------------------
1 |
2 | import yfinance as yf
3 | import pandas as pd
4 | import numpy as np
5 | import datetime
6 |
7 | # Set output folder path
8 | output_path = "output"
9 |
10 | # Read the list of stocks from the CSV file
11 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
12 |
13 | # Set start Date
14 | start_date = '2020-01-01'
15 |
16 | # Set end Date
17 | end_date = '2023-01-21'
18 |
19 | # Specify the benchmark symbol
20 | benchmark = "^NSEI"
21 |
22 | # Interval
23 | data_interval_daily = '1d' # '1wk' or '1d'
24 | data_interval_weekly = '1wk'
25 |
26 | # Lookback for green dot
27 | lookback = 5
28 |
29 | def calculateReversionExpansion(stock_data):
30 | # Extract the close prices from the DataFrame
31 | src = stock_data["Close"]
32 |
33 | # Perform the EMA calculations
34 | l1, l2, l3, l4 = 20, 50, 100, 200 #EMA periods
35 |
36 | # Compute the exponential moving average with a lookback length of 20
37 | ema1 = src.ewm(span=l1).mean()
38 | ema2 = src.ewm(span=l2).mean()
39 | ema3 = src.ewm(span=l3).mean()
40 | ema4 = src.ewm(span=l4).mean()
41 |
42 | # Merge the series into one DataFrame
43 | merged_df = pd.concat([ema1, ema2, ema3, ema4], axis=1, keys=['EMA 20', 'EMA 50', 'EMA 100', 'EMA 200'])
44 | merged_df.fillna(0, inplace=True)
45 | # Find the lowest and the highest of this emas
46 | merged_df['lowest'] = merged_df[(merged_df > 0)].min(axis=1)
47 | # Cheeky way to replace zero with a miniscule value to get rid of div by zero error
48 | merged_df['lowest'].replace(0, 1e-10, inplace=True)
49 | merged_df['highest'] = merged_df.max(axis=1)
50 |
51 | # Now, merge the close, otherwise lowest will consider Close values also
52 | merged_df = pd.concat([merged_df, src], axis=1)
53 | # Calculate delta between lowest and highest
54 | merged_df['delta'] = (merged_df['highest'] - merged_df['lowest']) / merged_df['lowest']
55 | # Calculate emadelta
56 | merged_df['emadelta'] = merged_df['delta'].ewm(span=7).mean()
57 | # Calculate delta between close and lowest ema
58 | merged_df['pricedelta'] = ( merged_df['Close'] - merged_df['lowest']) / merged_df['lowest']
59 | # Calculate ema of this pricedelta
60 | merged_df['emapricedelta'] = merged_df['pricedelta'].ewm(span=7).mean()
61 | # Determine if a crossover has happened between delta crossing over emadelta
62 | merged_df['crossover'] = np.where((merged_df['delta'] > merged_df['emadelta']) & (merged_df['delta'].shift(1) < merged_df['emadelta'].shift(1)), 1, 0)
63 | # Determine if a crossunder has happened between delta crossing over emadelta
64 | merged_df['crossunder'] = np.where((merged_df['delta'] < merged_df['emadelta']) & (merged_df['delta'].shift(1) > merged_df['emadelta'].shift(1)), 1, 0)
65 |
66 | return merged_df
67 |
68 | def checkforGreenDot(rev_exp_data):
69 | # Check last lookback rows if there has been a crossover and no crossunder in the last
70 | rev_exp_data_21 = rev_exp_data.tail(lookback)
71 |
72 | crossover = False
73 | idx = ''
74 | delta = 0.0
75 | for index, row in rev_exp_data_21.iterrows():
76 | if (row['crossover'] == 1 and row['Close'] > row['highest']):
77 | crossover = True
78 | idx = index
79 | delta = row['delta']
80 |
81 | if (crossover and row['crossunder'] == 1):
82 | crossover = False
83 | return [crossover, idx, delta]
84 |
85 | def main():
86 | print("Started...")
87 | # Create the DataFrame
88 | result_df = pd.DataFrame(columns=['stock', 'dailyXoverDate', 'dailyDelta', 'weeklyXoverDate', 'weeklyDelta'])
89 | # Iterate through the list of stocks
90 | for stock in stocks["Ticker"]:
91 | try:
92 | # Get the stock data
93 | # Get the stock data from yfinance, dont adjust OHLC
94 | stock_data_daily = yf.Ticker(stock+".NS").history(start=start_date, end=end_date,interval=data_interval_daily,auto_adjust=False, prepost=False)
95 | # Drop those with NaN
96 | stock_data_daily = stock_data_daily.dropna()
97 |
98 | # Calculate the entire series of reversion and expansion -- daily
99 | rev_exp_data = calculateReversionExpansion(stock_data_daily)
100 | result_daily = checkforGreenDot(rev_exp_data)
101 |
102 | # Weekly data
103 | stock_data_weekly = yf.Ticker(stock+".NS").history(start=start_date, end=end_date,interval=data_interval_weekly,auto_adjust=False, prepost=False)
104 | # Drop those with NaN
105 | stock_data_weekly = stock_data_weekly.dropna()
106 |
107 | # Calculate the entire series of reversion and expansion -- weekly
108 | rev_exp_data_weekly = calculateReversionExpansion(stock_data_weekly)
109 | result_weekly = checkforGreenDot(rev_exp_data_weekly)
110 |
111 | condition = result_daily[0] or result_weekly[0]
112 | if (condition):
113 | row = {'stock': stock, 'dailyXoverDate': str(result_daily[1]), 'dailyDelta': str(result_daily[2]), 'weeklyXoverDate': str(result_weekly[1]), 'weeklyDelta': str(result_weekly[2])}
114 | # Append the new row to the DataFrame
115 | result_df.loc[len(result_df)] = row
116 |
117 | except Exception as e:
118 | print("Error: " + stock)
119 | print(e)
120 |
121 | # Append current timestamp to the file name
122 | now = datetime.datetime.now()
123 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
124 | file_name = 'green_dot_' + timestamp + '.csv'
125 | # Export the DataFrame to CSV
126 | result_df.to_csv(output_path + "/" + file_name, index=False)
127 |
128 |
129 | if __name__ == "__main__":
130 | main()
131 |
132 |
--------------------------------------------------------------------------------
/py/yf/trendreversal_ha.py:
--------------------------------------------------------------------------------
1 | '''
2 | We tryto analyze trend reversal in stocks with major corrections
3 | In order to reduce noise we select monthly candles and further use HA
4 | 5 consecutive red candles, followed by 2 green candles should be a clean trend reversal
5 | These reversals must be validated with price action on lower timeframes.
6 | Also, one just confirm demand, by checking limevolumes.
7 | Relative strength across benchmark and sector must be checked.
8 | '''
9 | import yfinance as yf
10 | import pandas as pd
11 | import datetime
12 |
13 | # Folder location
14 | output = 'output'
15 |
16 | # Read the list of stocks from the CSV file
17 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
18 |
19 | # Set the time frame to max
20 | time_frame = 'max'
21 |
22 | # Set the bar time frame
23 | data_interval = '1mo'
24 |
25 | # Crore
26 | One_Cr = 10000000
27 |
28 | def create_HA_Candles(df):
29 |
30 | # Create a new DataFrame to store the Heikin-Ashi values
31 | heikin_ashi_data = pd.DataFrame(index=df.index)
32 |
33 | if (len(df) < 2): # We need at least 2
34 | return heikin_ashi_data
35 |
36 | # Append the 'High' and 'Low' columns from the original data
37 | heikin_ashi_data[['High', 'Low']] = df[['High', 'Low']]
38 | # Calculate the Heikin-Ashi open, close, high, and low values
39 | heikin_ashi_data['HA_Close'] = (df['Open'] + df['High'] + df['Low'] + df['Close']) / 4
40 | # Handle the first row separately
41 | first_row_open = (df['Open'][0] + df['Close'][0]) / 2
42 | heikin_ashi_data['HA_Open'] = first_row_open
43 | # Calculate HA_Open correctly for subsequent rows
44 | for i in range(1, len(heikin_ashi_data)):
45 | heikin_ashi_data['HA_Open'][i] = (heikin_ashi_data['HA_Open'][i-1] + heikin_ashi_data['HA_Close'][i-1]) / 2
46 |
47 | heikin_ashi_data['HA_High'] = heikin_ashi_data[['HA_Open', 'HA_Close', 'High']].max(axis=1)
48 | heikin_ashi_data['HA_Low'] = heikin_ashi_data[['HA_Open', 'HA_Close', 'Low']].min(axis=1)
49 |
50 | # Drop the 'High' and 'Low' columns
51 | heikin_ashi_data.drop(['High', 'Low'], axis=1, inplace=True)
52 |
53 | #print(heikin_ashi_data.tail(5))
54 | return heikin_ashi_data
55 |
56 |
57 | def check_trend_change(df):
58 | # Check for the first 5 candles as red and the last 2 candles as green
59 | last_7_candles = df.tail(7) # Select the last 7 candles
60 |
61 | red_candles_count = 0
62 | green_candles_count = 0
63 | valid_pattern = False
64 |
65 | for i in range(5):
66 | candle = last_7_candles.iloc[i]
67 | if candle['HA_Close'] < candle['HA_Open']:
68 | red_candles_count += 1
69 | else:
70 | break
71 |
72 | for i in range(5, 7):
73 | candle = last_7_candles.iloc[i]
74 | if candle['HA_Close'] > candle['HA_Open']:
75 | green_candles_count += 1
76 | else:
77 | break
78 |
79 | if red_candles_count == 5 and green_candles_count == 2:
80 | valid_pattern = True
81 |
82 | return valid_pattern
83 |
84 |
85 | def main():
86 | print("Started... ")
87 | # Create the DataFrame
88 | df = pd.DataFrame(columns=['stock', 'mcap', 'vol1', 'vol2d', 'vol3d', 'sector' , 'industry'])
89 |
90 | # Iterate through the list of stocks
91 | for stock in stocks["Ticker"]:
92 | try:
93 | # Get the stock data from yfinance, dont adjust OHLC
94 | stk_ticker = yf.Ticker(stock+".NS")
95 | data = stk_ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
96 | # Drop those with NaN
97 | data = data.dropna()
98 | if (len(data) < 2): # cannot do much analysis with 2 month candle
99 | continue
100 | # Drop last row, if 2nd last is already of the month
101 | if data.index[-1].month == data.index[-2].month:
102 | # Replace the values in the second-to-last row with the values in the last row
103 | data.loc[data.index[-2]] = data.loc[data.index[-1]]
104 | # Delete the last row
105 | data = data.drop(data.index[-1])
106 |
107 | heikin_ashi_data = create_HA_Candles(data)
108 | if (len(heikin_ashi_data) < 7) :
109 | print(f'Skipped for {stock} too less data')
110 |
111 | # Merge it to data
112 | heikin_ashi_data = heikin_ashi_data.join(data)
113 |
114 | # Check if there is a trend change
115 | if check_trend_change(heikin_ashi_data):
116 | sector = ''
117 | industry = ''
118 | marketCap = ''
119 | try:
120 | if stk_ticker.info:
121 | sector = stk_ticker.info['sector']
122 | industry = stk_ticker.info['industry']
123 | marketCap = round(stk_ticker.info['marketCap'] / One_Cr, 0)
124 | except Exception as err:
125 | pass
126 |
127 | # Get volume data
128 | vols = data.tail(3)['Volume']
129 | vol1 = vols[0]
130 | vol2d = vols[1] - vol1
131 | vol3d = vols[2] - vols[1]
132 |
133 | # Append to row
134 | row = {'stock': stock, 'mcap' : marketCap, 'vol1' : vol1, 'vol2d' : vol2d,'vol3d' : vol3d, 'sector' : sector, 'industry' : industry}
135 | # Append the new row to the DataFrame
136 | df.loc[len(df)] = row
137 |
138 | except Exception as e:
139 | print(f'Error for ticker {stock} ==> {e}')
140 | # Append current timestamp to the file name
141 | now = datetime.datetime.now()
142 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
143 | file_name = f'{output}/ha_trendreversal_{timestamp}.csv'
144 | # Export the DataFrame to CSV
145 | df.to_csv(file_name, index=False)
146 | print('Done')
147 |
148 |
149 | if __name__ == "__main__":
150 | main()
151 |
--------------------------------------------------------------------------------
/py/eodhd/saucer_crs.py:
--------------------------------------------------------------------------------
1 | '''
2 | A script to determine a trend reversal. This script uses Relative Strength (Stock Price / Benchmark ratio).
3 | The script calculates the moving average of the relative strength values for a specified length (avg_length).
4 | It determines the current trend of this average, based on the following logic:
5 | - If the value of the average is rising means greater that max of any of last 3 (trend_length) weeks, the trend is considered uptrend. This is denoted by letter G.
6 | - If the value of the average is falling means less than minimum of any of last 3 (trend_length) weeks, the trend is considered downtrend. This is denoted by letter R.
7 | - If the value of the average is neither rising nor falling, the trend is considered sideways. This is denoted by letter S.
8 | Next, the script will create a string of these trends (G,R,S) for the last 26 (analysis_window) weeks, with the most recent week being the last character in the string.
9 | It will save this string in the output column 'Trend' of the output CSV file.
10 | '''
11 |
12 | import pandas as pd
13 | import pricereader as pr
14 | import datetime
15 |
16 | # Set output folder path
17 | output_path = "output"
18 |
19 | # Read the list of stocks from the CSV file
20 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
21 |
22 | # Specify the benchmark symbol
23 | benchmark = "NSEI"
24 |
25 | # Interval
26 | data_interval_weekly = 'w'
27 |
28 | # Weekly CRS Average length
29 | avg_length = 52 # Weeks
30 | ratio_col = f'ratio{avg_length}W'
31 |
32 | # Trend length
33 | trend_length = 3 # Weeks
34 |
35 | # Window of analysis
36 | analysis_window = 26 # Weeks
37 |
38 | def ratio_mean(data, benchmark_data, avg_length):
39 | # Calculate the relative strength of the stock by dividing its weekly closing price by the weekly closing price of the Nifty 50 index
40 | relative_strength = data['Close'] / benchmark_data['Close']
41 | data[f'relativeRatio'] = relative_strength
42 | # print(relative_strength.tail(10))
43 |
44 | # Calculate the mean of the relative strength values for length
45 | data[ratio_col] = relative_strength.rolling(window=avg_length).mean()
46 | return data
47 |
48 |
49 | def rising(source, length):
50 | return source > source.shift(1).rolling(window=length).max()
51 |
52 | def falling(source, length):
53 | return source < source.shift(1).rolling(window=length).min()
54 |
55 | def sideways(source, length):
56 | # Sideways is true when not rising and not falling
57 | is_rising = rising(source, length)
58 | is_falling = falling(source, length)
59 | return ~(is_rising | is_falling) # Not rising and not falling
60 |
61 | def detect_reversal(sequence, initial_count, initial_type, transition_length, final_pattern):
62 | if sequence[:initial_count].count(initial_type) >= initial_count and sequence[-len(final_pattern):] == final_pattern:
63 | return True
64 | return False
65 |
66 | def main():
67 | print("Started...")
68 | # Create the DataFrame
69 | result_df = pd.DataFrame(columns=['stock', 'Trend Sequence', 'Reversal Message'])
70 |
71 | # Benchmark data
72 | benchmark_data = pr.get_price_data(benchmark, data_interval_weekly)
73 | benchmark_data = benchmark_data.dropna()
74 |
75 | # Iterate through the list of stocks
76 | for stock in stocks["Ticker"]:
77 | try:
78 | # Get the stock data, sample as below. Latest data is at the end
79 | '''
80 | Date,Open,High,Low,Close,Volume,Adj Close
81 | 2017-11-16,400.0,400.0,361.0,361.0,29447,361.0
82 | 2017-11-20,343.0,343.0,279.45,279.45,5389,279.45
83 | 2017-11-27,265.5,265.5,194.15,206.45,613081,206.45
84 | 2017-12-04,196.0,227.55,181.0,227.55,615553,227.55
85 | 2017-12-11,238.9,290.25,238.9,290.25,87251,290.25
86 | '''
87 | data = pr.get_price_data(stock, data_interval_weekly)
88 | # Drop those with NaN
89 | data = data.dropna()
90 |
91 | # Calculate the relative ratio and average avg_lengthW
92 | data = ratio_mean(data, benchmark_data, avg_length)
93 |
94 | # Apply the rising, falling, and sideways functions
95 | data['MA_rising'] = rising(data[ratio_col], trend_length)
96 | data['MA_falling'] = falling(data[ratio_col],trend_length)
97 | data['MA_sideways'] = sideways(data[ratio_col], trend_length)
98 |
99 | # Extract the last analysis_window rows
100 | analysis_data = data[['MA_rising', 'MA_falling', 'MA_sideways']].tail(analysis_window)
101 |
102 | # Create a sequence string from the last 13 rows
103 | sequence = ''.join(['G' if row['MA_rising'] else 'R' if row['MA_falling'] else 'S' for index, row in analysis_data.iterrows()])
104 |
105 | # Detect reversals, 14 weeks of current trend and 4 weeks of opposite trend, in between we do not care
106 | bullish_reversal = detect_reversal(sequence, 14, 'R', 4, 'GG')
107 | bearish_reversal = detect_reversal(sequence, 14, 'G', 4, 'RR')
108 |
109 | # Determine reversal message
110 | reversal_message = ""
111 | if bullish_reversal:
112 | reversal_message = "Bullish reversal detected."
113 | elif bearish_reversal:
114 | reversal_message = "Bearish reversal detected."
115 |
116 | # Save the results to the DataFrame
117 | row = {'stock': stock, 'Trend Sequence': sequence, 'Reversal Message': reversal_message}
118 | # Append the new row to the DataFrame
119 | result_df.loc[len(result_df)] = row
120 | except Exception as e:
121 | print("Error: " + stock)
122 | print(e)
123 |
124 | # Append current timestamp to the file name
125 | now = datetime.datetime.now()
126 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
127 | file_name = 'weeklyRS_Saucer_' + timestamp + '.csv'
128 | # Export the DataFrame to CSV
129 | result_df.to_csv(output_path + "/" + file_name, index=False)
130 | print('Done')
131 |
132 | if __name__ == "__main__":
133 | main()
--------------------------------------------------------------------------------
/py/yf/supply_exhaustion_6m_scan.py:
--------------------------------------------------------------------------------
1 | import yfinance as yf
2 | import pandas as pd
3 | import os
4 | from datetime import datetime, timedelta
5 |
6 | # Set output folder path
7 | output_path = "output"
8 |
9 | # Read the list of stocks from the CSV file
10 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
11 |
12 | # Set start Date
13 | start_date = '2021-01-24'
14 |
15 | # Set end Date
16 | end_date = '2023-01-25'
17 |
18 | # Interval
19 | data_interval = '1d'
20 |
21 | # lowest close lookback dataset length
22 | lowest_low_lookback = 250
23 |
24 | # minimum days since last lowest close
25 | minimum_low_length = 123
26 |
27 | # mimnum days since last peak after lowest close
28 | minimum_days_since_high = 55
29 |
30 | # determine highest close in the dataset , Priorr to lowest low
31 | def highestClose(stock_data):
32 | highest_close = stock_data["Close"][0]
33 | highest_close_date = stock_data.index[0]
34 | for i in range(1, len(stock_data)):
35 | if stock_data["Close"][i] >= highest_close:
36 | highest_close = stock_data["Close"][i]
37 | highest_close_date = stock_data.index[i]
38 |
39 | return [highest_close, highest_close_date]
40 |
41 |
42 | # determine if lowest close was minimum_low_length ago.
43 | def lowestLow(stock_data):
44 |
45 | lowest_close = stock_data["Close"][0]
46 | lowest_close_date = stock_data.index[0]
47 | lowest_close_idx = 0
48 | for i in range(1, len(stock_data)):
49 | if stock_data["Close"][i] <= lowest_close:
50 | lowest_close = stock_data["Close"][i]
51 | lowest_close_date = stock_data.index[i]
52 | lowest_close_idx = i
53 | if len(stock_data) - lowest_close_idx >= minimum_low_length:
54 | return [True, lowest_close, lowest_close_date]
55 | else:
56 | return [False, '', '']
57 |
58 | def write_dataframe_to_file(df, name):
59 | # Get the current timestamp
60 | timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
61 |
62 | # Create the filename
63 | filename = f'{name}_{timestamp}.csv'
64 | # Save the DataFrame as a CSV file with specific column names as the header
65 | df.to_csv(output_path + "/" + filename, index=False, columns=["Stock", "Lowest Close", "Low Date", "High Prior", "High Prior Date", "23_6 Retrace", \
66 | "38_2 Retrace", "50_0 Retrace", "Curr/High %"])
67 |
68 |
69 | def main():
70 | print("Started...")
71 | # create an empty dataframe to store the results
72 | results_df = pd.DataFrame(columns=["Stock", "Lowest Close", "Low Date", "High Prior", "High Prior Date", "23_6 Retrace", "38_2 Retrace", \
73 | "50_0 Retrace", "Curr/High %"])
74 | # Iterate through the list of stocks
75 | for stock in stocks["Ticker"]:
76 | try:
77 | result_lowestLow = [False, '', '']
78 | below_23_6 = False
79 | below_38_2 = False
80 | below_50 = False
81 |
82 | # Get the stock data
83 | # Get the stock data from yfinance, dont adjust OHLC
84 | stock_data = yf.Ticker(stock+".NS").history(start=start_date, end=end_date,interval=data_interval,auto_adjust=False, prepost=False)
85 | # Drop those with NaN
86 | stock_data = stock_data.dropna()
87 |
88 | # Lowest low should be beyond last minimum_low_length months
89 | result_lowestLow = lowestLow(stock_data.tail(lowest_low_lookback))
90 | lowest_low_condition = result_lowestLow[0]
91 | lowest_low_close = result_lowestLow[1]
92 | lowest_low_date = result_lowestLow[2]
93 |
94 | # if lowest low condition is met, find out max in the data set Priorr to lowest low date
95 | if (lowest_low_condition):
96 | # Get dataset upto lowest_low_date
97 | before_low_data = stock_data.loc[stock_data.index < lowest_low_date]
98 |
99 | # Get highest Priorr to low
100 | result_highestClosePriorr = highestClose(before_low_data)
101 | highest_Priorr_close = result_highestClosePriorr[0]
102 | highest_Priorr_date = result_highestClosePriorr[1]
103 |
104 | # Calcualte difference between close and high
105 | diff = (highest_Priorr_close - lowest_low_close)
106 | # 23.6%, 38.2% and 50% retracement value
107 | level_23_6 = lowest_low_close + (diff * 0.236)
108 | level_38_2 = lowest_low_close + (diff * 0.382)
109 | level_50 = lowest_low_close + (diff * 0.50)
110 |
111 | # Get dataset after lowest_low_date
112 | after_low_data = stock_data.loc[stock_data.index > lowest_low_date]
113 | # Get highest after low
114 | result_highestCloseAfter = highestClose(after_low_data)
115 | highest_after_close = result_highestCloseAfter[0]
116 | highest_after_date = result_highestCloseAfter[1]
117 |
118 | # Check if the highest close, is within the retracement level
119 | if highest_after_close <= level_50:
120 | below_50 = True
121 | if highest_after_close <= level_38_2:
122 | below_38_2 = True
123 | if highest_after_close <= level_23_6:
124 | below_23_6 = True
125 | # Calculate distance of current price with respect to the highest value in the retracement
126 | current_close = stock_data["Close"].tail(1).values[-1]
127 | curr_diff = round(((current_close - highest_after_close) / (highest_after_close)) * 100, 2)
128 |
129 | if (below_50 or below_23_6 or below_38_2):
130 | new_row = pd.DataFrame({"Stock": stock, "Lowest Close": lowest_low_close, "Low Date": lowest_low_date, "High Prior": highest_Priorr_close, \
131 | "High Prior Date": highest_Priorr_date, "23_6 Retrace": below_23_6, "38_2 Retrace": below_38_2, "50_0 Retrace": below_50, \
132 | "Curr/High %": curr_diff}, index=[0])
133 | results_df = pd.concat([results_df, new_row])
134 |
135 | except Exception as e:
136 | print("Error: " + stock)
137 | print(e)
138 |
139 | # print(results_df)
140 | write_dataframe_to_file(results_df, "Supply_Exhaustion_6M_")
141 | print("Done")
142 |
143 | if __name__ == "__main__":
144 | main()
145 |
--------------------------------------------------------------------------------
/py/ai/fininsightgpt/src/master_file_generator.py:
--------------------------------------------------------------------------------
1 | """
2 | Master File Generator Module
3 |
4 | This module handles the creation of the consolidated master markdown file from individual markdown files.
5 | """
6 |
7 | import os
8 | import re
9 | import logging
10 | from pathlib import Path
11 | from typing import List, Optional
12 | import datetime
13 |
14 | # Configure logging
15 | logging.basicConfig(
16 | level=logging.INFO,
17 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
18 | )
19 | logger = logging.getLogger(__name__)
20 |
21 |
22 | def generate_master_file(
23 | company_name: str,
24 | markdown_files: List[str],
25 | output_dir: Optional[str] = None
26 | ) -> str:
27 | """Generate a consolidated master markdown file for a company.
28 |
29 | Args:
30 | company_name: Name of the company
31 | markdown_files: List of paths to markdown files to include
32 | output_dir: Directory to save the master file (defaults to company folder)
33 |
34 | Returns:
35 | Path to the generated master file
36 | """
37 | logger.info(f"Generating master file for {company_name} from {len(markdown_files)} markdown files")
38 |
39 | # Create timestamp for the master file
40 | timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
41 | master_filename = f"{company_name}_master_{timestamp}.md"
42 |
43 | # Determine output directory
44 | if output_dir is None:
45 | # Try to infer from the first markdown file
46 | if markdown_files:
47 | first_file = Path(markdown_files[0])
48 | output_dir = first_file.parent.parent # Go up one level from processed/
49 | else:
50 | output_dir = os.getcwd()
51 |
52 | output_path = Path(output_dir) / master_filename
53 |
54 | # Prepare master file content
55 | master_content = [
56 | f"# {company_name.upper()} - Consolidated Analysis",
57 | f"Generated on: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
58 | f"Number of source documents: {len(markdown_files)}",
59 | "\n---\n"
60 | ]
61 |
62 | # Table of Contents
63 | toc = ["## Table of Contents"]
64 |
65 | # Track sections for organizing content
66 | sections = {
67 | "Financial Data": [],
68 | "Business Overview": [],
69 | "Management": [],
70 | "Industry Analysis": [],
71 | "News & Media": [],
72 | "Miscellaneous": []
73 | }
74 |
75 | # Process each markdown file
76 | for idx, md_file in enumerate(markdown_files):
77 | try:
78 | with open(md_file, 'r', encoding='utf-8') as f:
79 | content = f.read()
80 |
81 | # Extract filename for reference
82 | filename = Path(md_file).stem
83 |
84 | # Determine section based on content keywords
85 | section = "Miscellaneous"
86 | content_lower = content.lower()
87 |
88 | if any(kw in content_lower for kw in ["profit", "revenue", "financial", "balance sheet", "income", "statement", "ratio"]):
89 | section = "Financial Data"
90 | elif any(kw in content_lower for kw in ["business", "product", "service", "segment", "overview"]):
91 | section = "Business Overview"
92 | elif any(kw in content_lower for kw in ["ceo", "director", "management", "board"]):
93 | section = "Management"
94 | elif any(kw in content_lower for kw in ["industry", "market", "competitor", "competition"]):
95 | section = "Industry Analysis"
96 | elif any(kw in content_lower for kw in ["news", "press", "announcement", "media"]):
97 | section = "News & Media"
98 |
99 | # Add to appropriate section
100 | sections[section].append((filename, content))
101 |
102 | # Add to TOC
103 | toc.append(f"- [{filename}](#{filename.lower().replace(' ', '-')})")
104 |
105 | except Exception as e:
106 | logger.error(f"Error processing markdown file {md_file}: {str(e)}")
107 | sections["Miscellaneous"].append((
108 | f"Error_{idx}",
109 | f"Error processing file {md_file}: {str(e)}"
110 | ))
111 |
112 | # Add TOC to master content
113 | master_content.extend(toc)
114 | master_content.append("\n---\n")
115 |
116 | # Add content by section
117 | for section_name, section_contents in sections.items():
118 | if section_contents:
119 | master_content.append(f"# {section_name}")
120 |
121 | for filename, content in section_contents:
122 | # Add section anchor
123 | master_content.append(f"")
124 |
125 | # Clean up the content by removing the first heading if it matches the filename
126 | # This avoids duplication with our added heading
127 | content_lines = content.split("\n")
128 | if len(content_lines) > 0 and content_lines[0].startswith("# ") and filename in content_lines[0]:
129 | content = "\n".join(content_lines[1:])
130 |
131 | master_content.append(f"## {filename}")
132 | master_content.append(content)
133 | master_content.append("\n---\n")
134 |
135 | # Add metadata and summary section
136 | master_content.append("# Metadata")
137 | master_content.append("## Document Sources")
138 |
139 | sources_table = ["| Source | Type | Date Included |"]
140 | sources_table.append("| --- | --- | --- |")
141 |
142 | for md_file in markdown_files:
143 | file_path = Path(md_file)
144 | file_type = file_path.suffix
145 | file_date = datetime.datetime.fromtimestamp(os.path.getmtime(md_file)).strftime('%Y-%m-%d')
146 | sources_table.append(f"| {file_path.stem} | {file_type} | {file_date} |")
147 |
148 | master_content.extend(sources_table)
149 |
150 | # Write the master file
151 | try:
152 | with open(output_path, 'w', encoding='utf-8') as f:
153 | f.write("\n\n".join(master_content))
154 | logger.info(f"Master file generated: {output_path}")
155 | except Exception as e:
156 | logger.error(f"Error writing master file: {str(e)}")
157 | return ""
158 |
159 | return str(output_path)
--------------------------------------------------------------------------------
/py/eodhd/gareebman_entry_exit.py:
--------------------------------------------------------------------------------
1 | '''
2 | We are working here on identifying my favorite point of a company's business
3 | when there is a turn around. This analysis will try to capture from a price
4 | movement perspective.
5 |
6 | We solely rely on technical indicators for shortlisting in this scan. Ideally
7 | we should look for long bases, and then, we see if price is bottoming and
8 | then picking up.
9 |
10 | To keep it simple, we will track only RSI and Volstop.
11 | For favourable entries into watchlist we will look for, (in weekly timeframe)
12 | rsi > threshold (45) and volstop in uptrend. We will check with the previous
13 | weeks to see if we had a "False", and now we have a "True". This means entry.
14 | We dont expect to see too many flip-flops.
15 | We are also defining an exit (from the watchlist), if volstop is in downtrend.
16 | Again the same logic of comparing with previous week will apply.
17 |
18 | We are also keeping a count of the current "entry" or "exit". So, let us say
19 | a "trend" is "entry" and "duration" is 8, it means entry condition satisfied
20 | 8 bars ago and continues to remain "entry" (without "exit" condition triggered)
21 |
22 | So, do not confuse with the normal "entry" - "exit" terminology and method of
23 | trading. "entry" doesnt mean sell your house and take position. It means start
24 | to track it.
25 | '''
26 |
27 | import pandas as pd
28 | import numpy as np
29 | import ta
30 | from ta.volatility import AverageTrueRange
31 | import datetime
32 | import pricereader as pr
33 |
34 | # Set output folder path
35 | output_path = "output"
36 |
37 | # Read the list of stocks from the CSV file
38 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
39 |
40 | # Interval
41 | data_interval_weekly = 'w'
42 |
43 | # RSI interval
44 | rsi_length = 14
45 | # RSI weekly threshold
46 | rsi_weekly_threshold = 45
47 |
48 | def rsi(data):
49 | # Calculate the RSI
50 | data['rsi'] = ta.momentum.RSIIndicator(data['Close'], window=rsi_length).rsi()
51 | return data
52 |
53 | def calculate_true_range(df):
54 | high_low = df['High'] - df['Low']
55 | high_close = np.abs(df['High'] - df['Close'].shift())
56 | low_close = np.abs(df['Low'] - df['Close'].shift())
57 | true_ranges = pd.concat([high_low, high_close, low_close], axis=1)
58 | return true_ranges.max(axis=1)
59 |
60 | def calculate_atr(df, atrlen):
61 | df['TR'] = calculate_true_range(df)
62 | return df['TR'].rolling(window=atrlen, min_periods=1).mean()
63 |
64 | def vol_stop(df, atrlen=10, atrfactor=2.0):
65 | df['ATR'] = calculate_atr(df, atrlen) * atrfactor
66 | max_val = df['Close'].iloc[0]
67 | min_val = df['Close'].iloc[0]
68 | uptrend = True
69 | stop = 0.0
70 |
71 | stops = []
72 | uptrends = []
73 |
74 | for index, row in df.iterrows():
75 | max_val = max(max_val, row['Close'])
76 | min_val = min(min_val, row['Close'])
77 | atrM = row['ATR']
78 |
79 | if uptrend:
80 | stop = max(stop, max_val - atrM)
81 | else:
82 | stop = min(stop, min_val + atrM)
83 |
84 | if row['Close'] - stop >= 0.0:
85 | uptrend = True
86 | else:
87 | uptrend = False
88 |
89 | if uptrend != uptrends[-1] if uptrends else True:
90 | max_val = row['Close']
91 | min_val = row['Close']
92 | stop = max_val - atrM if uptrend else min_val + atrM
93 |
94 | stops.append(stop)
95 | uptrends.append(uptrend)
96 |
97 | df['VolStop'] = stops
98 | df['Uptrend'] = uptrends
99 | return df
100 |
101 | def main():
102 | print("Started...")
103 | # Create the DataFrame
104 | result_df = pd.DataFrame(columns=['stock', 'Close', 'VolStop10_2.0', 'RSI(14)', 'Entry', 'Exit', 'Trend', 'Duration'])
105 | # Iterate through the list of stocks
106 | for stock in stocks["Ticker"]:
107 | try:
108 | # Get the stock data
109 | data = pr.get_price_data(stock, data_interval_weekly)
110 | # Drop those with NaN
111 | data = data.dropna()
112 |
113 | # Get RSI data
114 | data = rsi(data)
115 |
116 | # Get VolStop
117 | data = vol_stop(data)
118 |
119 | # Creating the 'entry' column
120 | data['entry'] = (data['rsi'] > rsi_weekly_threshold) & data['Uptrend']
121 |
122 | # Creating the 'exit' column
123 | data['exit'] = ~data['Uptrend']
124 |
125 | # Check entry toggle
126 | entry = data['entry'].iloc[-1] and not data['entry'].iloc[-2]
127 |
128 | # Check exit toggle
129 | exit = data['exit'].iloc[-1] and not data['exit'].iloc[-2]
130 |
131 | # Combine the 'entry' and 'exit' columns into a single column representing the current trend
132 | data['trend'] = np.where(data['entry'], 'entry', 'exit')
133 |
134 | # Identify where the trend changes
135 | trend_changes = data['trend'].ne(data['trend'].shift()).cumsum()
136 |
137 | # Group by these changes and count within each group
138 | data['trend_duration'] = data.groupby(trend_changes).cumcount() + 1
139 |
140 | row = {}
141 |
142 | if (entry or exit):
143 | row = {'stock': stock,'Close': str(round(data['Close'].iloc[-1], 2)),'VolStop10_2.0':str(round(data['VolStop'].iloc[-1])), \
144 | 'RSI(14)':str(round(data['rsi'].iloc[-1])), 'Trend': data['trend'].iloc[-1], \
145 | 'Duration': data['trend_duration'].iloc[-1], 'Entry':entry,'Exit':exit}
146 | else:
147 | row = {'stock': stock,'Close': str(round(data['Close'].iloc[-1], 2)),'VolStop10_2.0':str(round(data['VolStop'].iloc[-1])), \
148 | 'RSI(14)':str(round(data['rsi'].iloc[-1])), 'Trend': data['trend'].iloc[-1], \
149 | 'Duration': data['trend_duration'].iloc[-1], 'Entry':'-','Exit':'-'}
150 |
151 | # Append the new row to the DataFrame
152 | result_df.loc[len(result_df)] = row
153 |
154 | except Exception as e:
155 | print("Error: " + stock)
156 | print(e)
157 |
158 | # Append current timestamp to the file name
159 | now = datetime.datetime.now()
160 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
161 | file_name = f'{output_path}/gareebman_report_' + timestamp + '.csv'
162 | # Export the DataFrame to CSV
163 | result_df.to_csv(file_name, index=False)
164 | print('Done')
165 |
166 | if __name__ == "__main__":
167 | main()
168 |
--------------------------------------------------------------------------------
/py/ai/turnaround/README.md:
--------------------------------------------------------------------------------
1 | # Business Turnaround Detection System
2 |
3 | An AI-powered financial analysis tool that identifies potential business turnarounds by analyzing companies listed in a CSV file. The system uses advanced AI agents to research financial data and market conditions for each company, generating comprehensive markdown reports with turnaround potential verdicts.
4 |
5 | ## 🎯 Purpose
6 |
7 | This tool is designed to help investors and analysts identify companies that may be experiencing business turnarounds by:
8 | - Fetching latest financial reports and news
9 | - Analyzing financial health indicators
10 | - Determining turnaround potential with AI-driven insights
11 | - Generating structured markdown reports for each company
12 |
13 | ## 📁 Project Structure
14 |
15 | ```
16 | turnaround/
17 | ├── main.py # Main execution script
18 | ├── data/
19 | │ └── financial_data.csv # Input CSV with company data
20 | ├── my_tools/ # Custom tools for the AI agent
21 | │ ├── __init__.py
22 | │ ├── cmd_executor.py # Shell command execution tool
23 | │ ├── fs_reader.py # File system reader tool
24 | │ ├── markdown_report.py # Report generation tool
25 | │ └── web_fetcher.py # Web search tool
26 | ├── output/ # Generated reports directory
27 | └── README.md # This file
28 | ```
29 |
30 | ## 🔧 Prerequisites
31 |
32 | Before running this project, ensure you have:
33 |
34 | 1. **Python 3.8+** installed
35 | 2. **OpenAI API Key** - Required for the AI agent
36 | 3. **Internet connection** - For web research functionality
37 |
38 | ## 📦 Installation & Setup
39 |
40 | ### 1. Install Required Dependencies
41 |
42 | #### Option A: Using requirements.txt (Recommended)
43 | ```bash
44 | pip install -r requirements.txt
45 | ```
46 |
47 | #### Option B: Manual Installation
48 | ```bash
49 | pip install smolagents python-dotenv openai litellm pandas numpy requests
50 | ```
51 |
52 | ### 2. Environment Configuration
53 |
54 | Create a `.env` file in the project root directory:
55 |
56 | ```bash
57 | touch .env
58 | ```
59 |
60 | Add your OpenAI API key to the `.env` file:
61 |
62 | ```
63 | OPENAI_API_KEY=your_openai_api_key_here
64 | ```
65 |
66 | ### 3. Prepare Input Data
67 |
68 | Ensure your `data/financial_data.csv` file follows this format:
69 |
70 | ```csv
71 | Name,BSE Code,NSE Code
72 | 63 Moons Tech.,526881,63MOONS
73 | Apex Frozen Food,540692,APEX
74 | Arman Financial,531179,ARMANFIN
75 | ```
76 |
77 | **Required Columns:**
78 | - `Name`: Company name (required)
79 | - `BSE Code`: Bombay Stock Exchange code (optional)
80 | - `NSE Code`: National Stock Exchange code (optional)
81 |
82 | ### 4. Create Output Directory
83 |
84 | ```bash
85 | mkdir -p output
86 | ```
87 |
88 | ## 🚀 Usage
89 |
90 | ### Basic Execution
91 |
92 | Run the turnaround analysis:
93 |
94 | ```bash
95 | cd /path/to/turnaround
96 | python main.py
97 | ```
98 |
99 | ### What Happens During Execution
100 |
101 | 1. **Data Loading**: Reads companies from `data/financial_data.csv`
102 | 2. **AI Analysis**: For each company, the AI agent:
103 | - Searches web for latest financial reports
104 | - Gathers recent news and market data
105 | - Analyzes financial health indicators
106 | - Determines turnaround potential
107 | 3. **Report Generation**: Creates detailed markdown reports in the `output/` directory
108 |
109 | ### Sample Output
110 |
111 | Reports are saved as: `output/{business_name}{timestamp}_report.md`
112 |
113 | Each report includes:
114 | - **Business Name & Codes**
115 | - **Summary of Financial Data**
116 | - **Analysis of Financial Health**
117 | - **Turnaround Potential Verdict**: "Strong Turnaround", "Weak Turnaround", or "No Turnaround"
118 |
119 | ## 🔧 Configuration
120 |
121 | ### Model Configuration
122 |
123 | The system uses OpenAI's GPT-4.1-mini by default. To change the model, modify the `model` variable in `main.py`:
124 |
125 | ```python
126 | model = LiteLLMModel(model_id="openai/gpt-4-turbo", api_key=os.getenv("OPENAI_API_KEY"))
127 | ```
128 |
129 | ### Analysis Steps
130 |
131 | The AI agent follows these steps:
132 | 1. Company identification and code mapping
133 | 2. Web research for financial data and news
134 | 3. Financial health analysis
135 | 4. Turnaround potential assessment
136 | 5. Report generation and saving
137 |
138 | ## 📊 Best Practices
139 |
140 | ### When to Run
141 | - **Ideal timing**: After quarterly earnings season
142 | - **Frequency**: Quarterly or semi-annually for best results
143 | - **Market conditions**: Consider running during market downturns for maximum turnaround identification
144 |
145 | ### Data Quality
146 | - Ensure company names and stock codes are accurate
147 | - Remove delisted or defunct companies from the CSV
148 | - Update the CSV with new companies of interest
149 |
150 | ## 🛠️ Troubleshooting
151 |
152 | ### Common Issues
153 |
154 | 1. **Missing API Key**
155 | ```
156 | Error: OpenAI API key not found
157 | Solution: Check your .env file and ensure OPENAI_API_KEY is set
158 | ```
159 |
160 | 2. **CSV File Not Found**
161 | ```
162 | Error: The financial data file data/financial_data.csv does not exist
163 | Solution: Ensure the CSV file exists in the data/ directory
164 | ```
165 |
166 | 3. **Network Issues**
167 | ```
168 | Error: Web search failed
169 | Solution: Check internet connection and API quotas
170 | ```
171 |
172 | 4. **Permission Errors**
173 | ```
174 | Error: Cannot write to output directory
175 | Solution: Ensure output/ directory exists and has write permissions
176 | ```
177 |
178 | ### Debugging
179 |
180 | Enable verbose logging by modifying the agent configuration:
181 |
182 | ```python
183 | response = agent.run(final_instructions, max_steps=20, verbose=True)
184 | ```
185 |
186 | ## 📈 Output Interpretation
187 |
188 | ### Turnaround Verdicts
189 |
190 | - **Strong Turnaround**: Company shows clear signs of recovery with improving fundamentals
191 | - **Weak Turnaround**: Some positive indicators but recovery uncertain
192 | - **No Turnaround**: No significant improvement indicators found
193 |
194 | ### Report Sections
195 |
196 | Each generated report contains:
197 | - Executive summary with verdict
198 | - Financial metrics analysis
199 | - Market sentiment and news analysis
200 | - Risk factors and considerations
201 | - Timeline for potential recovery
202 |
203 | ## 🤝 Contributing
204 |
205 | To enhance this tool:
206 | 1. Add new analysis tools in the `my_tools/` directory
207 | 2. Extend the financial metrics analysis
208 | 3. Improve web scraping capabilities
209 | 4. Add visualization features
210 |
211 | ## ⚠️ Disclaimer
212 |
213 | This tool is for informational purposes only and should not be considered as financial advice. Always conduct thorough due diligence and consult with financial professionals before making investment decisions.
214 |
215 | ## 📝 License
216 |
217 | This project is part of the BharatTrader stock analysis suite. Please refer to the main project license for usage terms.
218 |
--------------------------------------------------------------------------------
/py/yf/ss_result_parser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Stock Result Analysis Script for Screener Source Data
4 |
5 | This script processes stock data from ss_result_file.csv, which contains stock information
6 | from Screener. For each stock, it downloads historical price data from Yahoo Finance
7 | and calculates various performance metrics relative to benchmark.
8 |
9 | The script:
10 | 1. Reads stock information from a CSV file with 'companyId' format as 'NSE:SYMBOL' or 'BSE:SYMBOL'
11 | 2. Downloads historical price data for each stock using yfinance
12 | 3. Calculates performance metrics (stock change %, benchmark change %, Alpha, ARS)
13 | 4. Saves the enriched data to a new CSV file
14 |
15 | Usage:
16 | python ss_result_parser.py
17 | """
18 |
19 | # Standard library imports
20 | import datetime
21 | import numpy as np
22 | import pandas as pd
23 | import yfinance as yf
24 |
25 | # Constants
26 | ARS_DATE = "2024-05-10" # ARS (Adaptive Relative Strength) reference date
27 | START_DATE = '2024-01-01' # Beginning of analysis period
28 | END_DATE = (datetime.datetime.now() + datetime.timedelta(days=1)).strftime('%Y-%m-%d') # today + 1 day
29 |
30 | RESULT_FILE = "ss_result_file.csv"
31 | OUTPUT_FILE = "final_ss_result_parser.csv"
32 |
33 |
34 | def main():
35 | """
36 | Main function to process stock data and calculate performance metrics.
37 | """
38 | print('Started... with yfinance version:', yf.__version__)
39 |
40 | # Use yfinance to retrieve the benchmark data
41 | benchmark_ticker = yf.Ticker("^NSEI") # NIFTY 50 Index
42 | benchmark_data = benchmark_ticker.history(start=START_DATE, end=END_DATE, interval='1d', auto_adjust=False, prepost=False)
43 | benchmark_data = benchmark_data.dropna()
44 |
45 | # Read the result file
46 | result = pd.read_csv(RESULT_FILE)
47 | result = result.dropna(subset=['companyId']) # Only drop rows with no companyId
48 |
49 | # Process each stock
50 | for index, row in result.iterrows():
51 | try:
52 | # Extract exchange and symbol from companyId
53 | company_id_parts = row['companyId'].split(':')
54 | exchange = company_id_parts[0]
55 | symbol = company_id_parts[1]
56 |
57 | print(f"Processing {row['Name']}...")
58 |
59 | # Set ticker format based on exchange
60 | if exchange == "NSE":
61 | stk_ticker = symbol + '.NS'
62 | elif exchange == "BSE":
63 | stk_ticker = symbol + '.BO'
64 | else:
65 | print(f"Unknown exchange for {row['companyId']}")
66 | continue
67 |
68 | stk_ticker = yf.Ticker(stk_ticker)
69 | stock_data = stk_ticker.history(start=START_DATE, end=END_DATE, interval='1d', auto_adjust=False, prepost=False)
70 |
71 | if stock_data.empty:
72 | print(f"No data available for {row['companyId']}")
73 | continue
74 |
75 | # Fetch Result Date, and then fetch the price on that date from stock_data.
76 | if pd.isna(row['Last Result Date']):
77 | print(f"No result date for {row['companyId']}")
78 | continue
79 |
80 | result_date = datetime.datetime.strptime(row['Last Result Date'], '%Y-%m-%d').strftime('%Y-%m-%d')
81 | result_price = 0.00
82 |
83 | # Get the last date in the stock data
84 | last_date = stock_data.index[-1].strftime('%Y-%m-%d')
85 | if last_date < result_date:
86 | print(f"Error: {row['companyId']} => Result Date {result_date} is greater than last date in stock data {last_date}")
87 | continue
88 |
89 | # If price not found on result date, try following dates
90 | while result_date <= last_date:
91 | try:
92 | result_price = stock_data.loc[stock_data.index == result_date, "Close"].values[0]
93 | break
94 | except:
95 | result_date = (datetime.datetime.strptime(result_date, '%Y-%m-%d') + datetime.timedelta(days=1)).strftime('%Y-%m-%d')
96 | continue
97 |
98 | # Calculate and add stock performance metrics
99 | add_stock_metrics(result, index, stock_data, result_date, result_price)
100 |
101 | # Calculate and add benchmark performance metrics
102 | add_benchmark_metrics(result, index, benchmark_data, result_date)
103 |
104 | # Calculate alpha and ARS
105 | calculate_comparative_metrics(result, index, stock_data, benchmark_data)
106 |
107 | except Exception as e:
108 | print(f'Error processing {row.get("companyId", "unknown")}: {e}')
109 | continue
110 |
111 | # Save the result file
112 | result.to_csv(OUTPUT_FILE, index=False)
113 | print(f"Processing complete. Results saved to {OUTPUT_FILE}")
114 |
115 |
116 | def add_stock_metrics(result_df, index, stock_data, result_date, result_price):
117 | """
118 | Calculate and add stock-specific metrics to the result dataframe.
119 |
120 | Args:
121 | result_df: The dataframe containing stock information
122 | index: The row index in the dataframe
123 | stock_data: Historical stock data from yfinance
124 | result_date: The date when the result was announced
125 | result_price: The stock price on the result date
126 | """
127 | result_df.at[index, 'Result Date Price'] = round(result_price, 2)
128 | result_df.at[index, 'Last Close Date'] = stock_data.index[-1].strftime('%Y-%m-%d')
129 | result_df.at[index, 'Last Close Price'] = round(stock_data['Close'].iloc[-1], 2)
130 | result_df.at[index, '% Stock change'] = round((stock_data['Close'].iloc[-1] - result_price) / result_price * 100, 2)
131 |
132 |
133 | def add_benchmark_metrics(result_df, index, benchmark_data, result_date):
134 | """
135 | Calculate and add benchmark metrics to the result dataframe.
136 |
137 | Args:
138 | result_df: The dataframe containing stock information
139 | index: The row index in the dataframe
140 | benchmark_data: Historical benchmark data from yfinance
141 | result_date: The date when the result was announced
142 | """
143 | benchmark_result_price = benchmark_data.loc[benchmark_data.index == result_date, "Close"].values[0]
144 | result_df.at[index, 'Result Date Benchmark Price'] = round(benchmark_result_price, 2)
145 | result_df.at[index, 'Last Benchmark Date'] = benchmark_data.index[-1].strftime('%Y-%m-%d')
146 | result_df.at[index, 'Last Benchmark Price'] = round(benchmark_data['Close'].iloc[-1], 2)
147 | result_df.at[index, '% Benchmark change'] = round((benchmark_data['Close'].iloc[-1] - benchmark_result_price) / benchmark_result_price * 100, 2)
148 |
149 |
150 | def calculate_comparative_metrics(result_df, index, stock_data, benchmark_data):
151 | """
152 | Calculate comparative performance metrics like Alpha and ARS.
153 |
154 | Args:
155 | result_df: The dataframe containing stock information
156 | index: The row index in the dataframe
157 | stock_data: Historical stock data from yfinance
158 | benchmark_data: Historical benchmark data from yfinance
159 | """
160 | # Calculate alpha (stock performance relative to benchmark)
161 | result_df.at[index, 'Alpha'] = result_df.at[index, '% Stock change'] - result_df.at[index, '% Benchmark change']
162 |
163 | # Calculate ARS (Adaptive Relative Strength)
164 | try:
165 | result_df.at[index, 'ARS'] = round(
166 | (stock_data['Close'].iloc[-1] / stock_data.loc[stock_data.index == ARS_DATE, "Close"].values[0]) /
167 | (benchmark_data['Close'].iloc[-1] / benchmark_data.loc[benchmark_data.index == ARS_DATE, "Close"].values[0]) - 1, 2)
168 | except:
169 | result_df.at[index, 'ARS'] = 0.00 # Error in calculating ARS, set it to 0.00
170 |
171 |
172 | if __name__ == "__main__":
173 | main()
--------------------------------------------------------------------------------
/py/ai/nse_announcements/weekly_nse_announcements_analysis.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import requests
3 | import fitz # PyMuPDF
4 | import os
5 | from openai import OpenAI
6 | from urllib.parse import urlparse
7 | from dotenv import load_dotenv, find_dotenv
8 | from datetime import datetime
9 | import argparse
10 | import logging
11 |
12 | log_timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
13 | LOCAL_MODEL = '' #'llama3.1:latest' # keep it blank if, gpt is used
14 | LOCAL_URL = 'http://10.0.0.4:7862/v1' # Update with cloud URL or Local
15 | GPT_MODEL = 'gpt-4o-mini' # if LOCAL_MODEL is blank, GPT will be used
16 | CONTEXT_LEN = 1500
17 |
18 | # Logging configuration
19 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
20 | logger = logging.getLogger()
21 |
22 | # Load environment variables
23 | def set_api():
24 | load_dotenv(find_dotenv())
25 | return os.getenv('OPENAI_API_KEY')
26 |
27 | # Get LLM client (GPT or local)
28 | def get_llm_client_model():
29 | if not LOCAL_MODEL:
30 | gpt_client = OpenAI(api_key=set_api())
31 | return gpt_client, GPT_MODEL
32 | else:
33 | my_local_client = OpenAI(base_url=LOCAL_URL, api_key="local-llm")
34 | return my_local_client, LOCAL_MODEL
35 |
36 | client, model = get_llm_client_model()
37 |
38 | critical_subjects = [
39 | "Updates", "Press Release", "Financial Result Updates", "Sale or Disposal-XBRL",
40 | "Acquisition-XBRL", "Record Date", "Investor Presentation",
41 | "Change in Directors/Key Managerial Personnel/Auditor/Compliance Officer/Share Transfer Agent",
42 | "Acquisition", "Scheme of Arrangement", "Resignation", "Appointment",
43 | "Date of Payment of Dividend", "Dividend", "Increase in Authorised Capital",
44 | "Credit Rating", "Rights Issue", "Public Announcement-Open Offer"
45 | ]
46 |
47 | routine_updates_subjects = [
48 | "Shareholders meeting", "Outcome of Board Meeting", "Copy of Newspaper Publication",
49 | "Analysts/Institutional Investor Meet/Con. Call Updates", "Loss/Duplicate-Share Certificate-XBRL",
50 | "Board Meeting Intimation", "Trading Window-XBRL", "Notice Of Shareholders Meetings-XBRL",
51 | "Change in Director(s)", "ESOP/ESOS/ESPS", "Clarification - Financial Results",
52 | "Corporate Insolvency Resolution Process-XBRL", "Limited Review Report",
53 | "Disclosure under SEBI (PIT) Reg 2015"
54 | ]
55 |
56 | # Function to download and extract PDF or XML text
57 | def download_and_extract_pdf(url, local_path):
58 | # Skip download if file already exists
59 | if os.path.exists(local_path):
60 | logger.info(f"File already exists locally: {local_path}")
61 | return extract_pdf_text(local_path)
62 |
63 | try:
64 | # Make the request to download the file
65 | response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
66 | response.raise_for_status() # Ensure no bad response
67 |
68 | # Check the Content-Type header to determine file type
69 | content_type = response.headers.get('Content-Type')
70 | file_extension = None
71 |
72 | if 'application/pdf' in content_type:
73 | file_extension = 'pdf'
74 | elif 'application/xml' in content_type:
75 | file_extension = 'xml'
76 |
77 | # Ensure we append the correct file extension to local_path
78 | if file_extension:
79 | local_path += f'.{file_extension}'
80 | else:
81 | logger.warning(f"Unknown content type: {content_type}. Assuming default .pdf")
82 | file_extension = 'pdf'
83 | local_path += '.pdf'
84 |
85 | # Write the file to the local path
86 | with open(local_path, 'wb') as f:
87 | f.write(response.content)
88 |
89 | # Extract text based on file type
90 | if file_extension == 'pdf':
91 | return extract_pdf_text(local_path)
92 | elif file_extension == 'xml':
93 | return extract_xml_text(local_path)
94 | else:
95 | logger.error(f"Unsupported file type: {file_extension}")
96 | return ""
97 |
98 | except requests.RequestException as e:
99 | logger.error(f"Failed to download {url}: {e}")
100 | return ""
101 |
102 | # Function to extract text from PDF
103 | def extract_pdf_text(local_path):
104 | try:
105 | doc = fitz.open(local_path)
106 | text = "".join(page.get_text() for page in doc)
107 | return text
108 | except Exception as e:
109 | logger.error(f"Failed to extract text from PDF {local_path}: {e}")
110 | return ""
111 |
112 | # Function to extract text from XML
113 | def extract_xml_text(local_path):
114 | try:
115 | with open(local_path, 'r') as f:
116 | return f.read()
117 | except Exception as e:
118 | logger.error(f"Failed to extract text from XML file {local_path}: {e}")
119 | return ""
120 |
121 | # Truncate text to context length
122 | def truncate_words(text):
123 | words = text.split()
124 | return ' '.join(words[:CONTEXT_LEN]) if len(words) > CONTEXT_LEN else text
125 |
126 | # Get summary and sentiment using OpenAI API
127 | def get_summary_and_sentiment(text):
128 | truncated_text = truncate_words(text)
129 | try:
130 | response = client.chat.completions.create(
131 | model=model, temperature=1.0, max_tokens=500,
132 | messages=[
133 | {"role": "user", "content": "Please summarize the company announcement provided."},
134 | {"role": "user", "content": truncated_text}
135 | ]
136 | )
137 | summary = response.choices[0].message.content
138 | sentiment_response = client.chat.completions.create(
139 | model=model, temperature=1.0, max_tokens=20,
140 | messages=[
141 | {"role": "user", "content": f"Provide an investor sentiment analysis score in a scale between 0 (negative sentiment) to 1 (positive sentiment) for the following text. The answer should be a single float value, no explanation is required: {summary}"}
142 | ]
143 | )
144 | sentiment_score = float(sentiment_response.choices[0].message.content.strip())
145 | return summary, sentiment_score
146 | except Exception as e:
147 | logger.error(f"Error in generating summary/sentiment: {e}")
148 | return "", -1.0
149 |
150 | # Write result to file
151 | def write_to_file(file, data):
152 | with open(file, 'a') as f:
153 | f.write(data)
154 |
155 | # Main processing function
156 | def process_announcement(index, row, stock):
157 | pdf_url = row['ATTACHMENT']
158 | filename = os.path.basename(urlparse(pdf_url).path)
159 | pdf_local_path = os.path.join('notifications', filename)
160 | pdf_text = download_and_extract_pdf(pdf_url, pdf_local_path)
161 | summary, sentiment_score = get_summary_and_sentiment(pdf_text)
162 | return {
163 | 'Stock': stock, 'Company': row['COMPANY NAME'], 'Subject': row['SUBJECT'],
164 | 'Summary': summary, 'Score': sentiment_score, 'Link': row['ATTACHMENT']
165 | }
166 |
167 | # Main function
168 | def main():
169 | # Parse command line arguments
170 | parser = argparse.ArgumentParser(description='Analyze announcements')
171 | parser.add_argument('--file', type=str, help='Input file path')
172 | parser.add_argument('--start', type=str, help='Stock to start from in stocks.csv')
173 | args = parser.parse_args()
174 |
175 | try:
176 | stocks = pd.read_csv("stocks.csv", usecols=["Ticker"])
177 | df = pd.read_csv(args.file)
178 | df = df[~df['SUBJECT'].isin(routine_updates_subjects) & df['SUBJECT'].isin(critical_subjects)]
179 | logger.info(f"Analyzing {len(df)} announcements")
180 |
181 | result_df = pd.DataFrame(columns=['Stock', 'Company', 'Subject', 'Summary', 'Score', 'Link'])
182 |
183 | for stock in stocks["Ticker"]:
184 | for index, row in df[df['SYMBOL'] == stock].iterrows():
185 | try:
186 | result = process_announcement(index, row, stock)
187 | # Append the new row to the DataFrame
188 | result_df.loc[len(result_df)] = result
189 | except Exception as e:
190 | logger.error(f"Error processing {stock}: {e}")
191 |
192 | file_name = f'output/{args.file}_report_{log_timestamp}.csv'
193 | result_df.to_csv(file_name, index=False)
194 | logger.info(f"Results saved to {file_name}")
195 |
196 | except Exception as e:
197 | logger.error(f"Error during processing: {e}")
198 |
199 | if __name__ == "__main__":
200 | main()
201 |
--------------------------------------------------------------------------------
/py/eodhd/mip12_scanner.py:
--------------------------------------------------------------------------------
1 | """
2 | Momentum Investing Scanner (MIP‑12) (Modified from Prashanth Sir’s book)
3 |
4 | This module implements a momentum‑based stock scanner following the “MIP‑12” strategy
5 | from Prashanth Sir’s recent book. It filters and ranks Nifty 500 stocks by multiple
6 | technical criteria and outputs a CSV report. The original algorithm has been modified
7 | to include a ranking metric based on the Sharpe ratio, rather than Volar as that is proprietary.
8 |
9 | --- Overview ---
10 | 1. Market Trend Filter:
11 | Checks if the benchmark index (e.g., Nifty 500) is above its 20‑day EMA.
12 | 2. Entry Filters (applied only when market is bullish):
13 | • 52‑Week High Retracement: stock must be within 50% of its 52‑week high.
14 | • 200‑Day EMA: stock’s latest close must exceed its 200‑day EMA.
15 | 3. Ranking Metric:
16 | Computes a simple Sharpe ratio (mean daily return ÷ standard deviation of daily returns).
17 | 4. Final Selection:
18 | • If market is bullish: all stocks passing entry filters are ranked by Sharpe ratio.
19 | • If market is bearish: no new entries are considered, but ranking is still performed.
20 | 5. Output:
21 | • `mip12_scan_report.csv` with columns:
22 | Ticker, Rank#, Price, 52W_High, 200D_EMA, Sharpe_Ratio
23 | • `mip12_scan_errors.csv` capturing any per‑symbol exceptions.
24 |
25 | --- Functions ---
26 | market_trend_filter(benchmark_df, ema_period=20) → bool
27 | get_52w_high(stock_df, period=252) → float
28 | get_200d_ema(stock_df, period=200) → float
29 | compute_sharpe_ratio(stock_df) → float
30 |
31 | --- Main Flow ---
32 | 1. Load benchmark data.
33 | 2. Determine `is_bullish` flag based on the market trend filter.
34 | 3. Loop over each symbol:
35 | a. Load its price series.
36 | b. If bullish, enforce entry filters (52W High Retracement and 200D EMA).
37 | c. Compute Sharpe ratio for ranking.
38 | d. Append record (Ticker, Price, 52W_High, 200D_EMA, Sharpe_Ratio).
39 | e. Catch and log any exceptions per symbol.
40 | 4. Build a DataFrame, sort by Sharpe ratio, and insert Rank#.
41 | 5. Export the report and any errors to CSV.
42 |
43 | --- Logging & Error Handling ---
44 | - Uses Python’s `logging` module to record INFO and ERROR messages.
45 | - Errors for individual symbols are collected and saved to `mip12_scan_errors.csv`.
46 |
47 | Usage:
48 | python mip12_scanner.py
49 |
50 | """
51 | import pricereader as pr
52 | import pandas as pd
53 | import numpy as np
54 | import logging
55 |
56 | # Configure logging
57 | logging.basicConfig(
58 | level=logging.INFO,
59 | format='%(asctime)s %(levelname)s: %(message)s',
60 | datefmt='%Y-%m-%d %H:%M:%S'
61 | )
62 |
63 | # Interval
64 | data_interval = 'd'
65 |
66 | # Benchmark symbol
67 | benchmark = "CRSLDX" # Nifty 500 Index
68 |
69 | # Read the list of stocks from the CSV file
70 | stocks = pd.read_csv("nifty500.csv", header=0, usecols=["Ticker"])
71 |
72 | # --- Helper functions ---
73 |
74 | def market_trend_filter(benchmark_df: pd.DataFrame,
75 | ema_period: int = 20,
76 | price_col: str = 'Close') -> bool:
77 | """Return True if latest benchmark Close > its EMA."""
78 | ema = benchmark_df[price_col].ewm(span=ema_period, adjust=False).mean()
79 | return benchmark_df[price_col].iloc[-1] > ema.iloc[-1]
80 |
81 | def get_52w_high(stock_df: pd.DataFrame,
82 | period: int = 252,
83 | price_col: str = 'Close') -> float:
84 | """Return the 52‑week high price, or NaN if insufficient data."""
85 | closes = stock_df[price_col].dropna()
86 | if len(closes) < period:
87 | return float('nan')
88 | return closes.iloc[-period:].max()
89 |
90 | def get_200d_ema(stock_df: pd.DataFrame,
91 | period: int = 200,
92 | price_col: str = 'Close') -> float:
93 | """Return the most recent 200‑day EMA, or NaN if insufficient data."""
94 | closes = stock_df[price_col].dropna()
95 | if len(closes) < period:
96 | return float('nan')
97 | ema = closes.ewm(span=period, adjust=False).mean()
98 | return ema.iloc[-1]
99 |
100 | def passes_ratio_200d_ema(stock_df: pd.DataFrame,
101 | benchmark_df: pd.DataFrame,
102 | period: int = 200,
103 | price_col: str = 'Close') -> bool:
104 | """
105 | Return True if the latest ratio of stock/benchmark Close is above
106 | its 200‑day EMA on the ratio series.
107 | """
108 | # align on common dates
109 | ratio = (stock_df[price_col] / benchmark_df[price_col]).dropna()
110 | if len(ratio) < period:
111 | return False
112 | ema = ratio.ewm(span=period, adjust=False).mean()
113 | return ratio.iloc[-1] > ema.iloc[-1]
114 |
115 | def compute_sharpe_ratio(stock_df: pd.DataFrame,
116 | price_col: str = 'Close',
117 | period: int = 252) -> float:
118 | """
119 | Compute the Sharpe ratio as mean(daily returns) / std(daily returns) for the last `period` days.
120 | Returns 0.0 if there is insufficient data or if the annualized volatility is zero.
121 | """
122 | df_1y = stock_df.tail(period).copy()
123 |
124 | # Calculate 12M ROC
125 | current_price = df_1y['Close'].iloc[-1]
126 | price_1y_ago = df_1y['Close'].iloc[0]
127 | roc_12m = (current_price / price_1y_ago) - 1
128 |
129 | # Daily returns & volatility
130 | df_1y['daily_return'] = df_1y['Close'].pct_change()
131 | daily_vol = df_1y['daily_return'].std()
132 | annualized_vol = daily_vol * np.sqrt(period)
133 |
134 | return 0.0 if annualized_vol == 0 else roc_12m / annualized_vol
135 |
136 |
137 | # --- Main scanning function ---
138 |
139 | def main():
140 |
141 | logging.info("Scan started.")
142 |
143 | # 1. Load & trim benchmark data
144 | benchmark_data = pr.get_price_data(benchmark, data_interval)
145 |
146 | # 2. Check market trend
147 | is_bullish = market_trend_filter(benchmark_data)
148 | if is_bullish:
149 | logging.info("Market is bullish → full entry filters apply.")
150 | else:
151 | logging.info("Market is NOT bullish → only ranking/exits, no new entries.")
152 | print("Market is NOT bullish → only ranking/exits, no new entries.")
153 |
154 | # 3. Prepare lists
155 | candidates = stocks["Ticker"].tolist()
156 | records = []
157 | errors = []
158 |
159 | # 4. Per‐stock processing
160 | for symbol in candidates:
161 | try:
162 | print(f"Processing {symbol}...")
163 | df = pr.get_price_data(symbol, data_interval)
164 | if df.empty:
165 | continue # no data in date range
166 |
167 | # Entry filters if bullish
168 | high_52w = get_52w_high(df)
169 | ema_200 = get_200d_ema(df)
170 |
171 |
172 | price = df['Close'].iloc[-1]
173 | if pd.isna(high_52w) or price < 0.5 * high_52w:
174 | logging.info("Skipping %s: 52W high retracement not met.", symbol)
175 | continue
176 | if pd.isna(ema_200) or price <= ema_200:
177 | logging.info("Skipping %s: 200D EMA not met.", symbol)
178 | continue
179 |
180 | if not passes_ratio_200d_ema(df, benchmark_data):
181 | logging.info("Skipping %s: ratio chart condition not met.", symbol)
182 | continue
183 |
184 | # Compute ranking metric
185 | sharpe = compute_sharpe_ratio(df)
186 |
187 | # Record all required fields
188 | records.append({
189 | "Ticker": symbol,
190 | "Price": df['Close'].iloc[-1],
191 | "52W_High": high_52w,
192 | "200D_EMA": ema_200,
193 | "Sharpe_Ratio": sharpe
194 | })
195 |
196 | except Exception as e:
197 | logging.error(f"Error processing {symbol}: {e}")
198 | errors.append({"Ticker": symbol, "Error": str(e)})
199 |
200 | # 5. Build final report DataFrame
201 | report_df = pd.DataFrame(records)
202 | report_df = report_df.dropna(subset=["Sharpe_Ratio"])
203 | report_df = report_df.sort_values("Sharpe_Ratio", ascending=False)
204 | report_df.insert(1, "Rank#", range(1, len(report_df) + 1))
205 |
206 | # 6. Export results
207 | report_df.to_csv("mip12_scan_report.csv", index=False)
208 | logging.info("Report saved to mip12_scan_report.csv.")
209 |
210 | # 7. Optionally export errors
211 | if errors:
212 | err_df = pd.DataFrame(errors)
213 | err_df.to_csv("mip12_scan_errors.csv", index=False)
214 | logging.info("Errors saved to mip12_scan_errors.csv.")
215 |
216 | return report_df
217 |
218 | # If this script is run directly, invoke main():
219 | if __name__ == "__main__":
220 | main()
221 |
--------------------------------------------------------------------------------
/py/yf/limevolume.py:
--------------------------------------------------------------------------------
1 | '''
2 | Volume is where the whole story begins. So it is important to determine volume expansions.
3 | On charts, one can look for volume expansions, when they breach daily/weekly averages by huge margins. (LimeVolume day)
4 | This indicate institutional demand.
5 | Expansion of volume and presense of demand at different life cycle stages of a stock can mean different things.
6 | For example, a limevolume day observed in Stage 1 for the first time, may be the first signal of demand, but not good
7 | to initiate a long trade just yet, because instituion will absorb the supply gradually.
8 | If the base is instead formed well, and we start to see limevolume with higher lows on price chart, it might indicate
9 | begining of stage 2.
10 | If a scrip is already in an established up trend, (Stage 2), then limevolume days on a sideways (resting) trend, indicates
11 | renewed demand either by the same institution or a new player interested in the company. Maybe suitable for top-up.
12 | '''
13 |
14 | import yfinance as yf
15 | import pandas as pd
16 | import numpy as np
17 | import math
18 | import csv
19 | import datetime
20 |
21 | # Read the list of stocks from the CSV file
22 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
23 | # Exchange ".BO" for BSE, ".NS" for Nifty
24 | exchg = ".NS"
25 |
26 | # Set start Date
27 | start_date = '2022-07-25' # Should be a date that is start of the week date, so that daily and weekly data can match
28 |
29 | # Set end Date
30 | end_date = '2023-07-29'
31 | # Folder location
32 | output = 'output'
33 |
34 | # Interval
35 | data_interval_wkeely = '1wk'
36 | data_interval_daily = '1d'
37 |
38 | # Weekly volume average length
39 | weekly_volume_length = 10
40 | # Daily volume average length
41 | daily_volume_length = 100
42 |
43 | # Number of days to check for limevolume
44 | lookback_length = 55 #3-months daily
45 |
46 | # Read up sector/industry information from text data
47 | stock_industry_map = pd.read_csv("stock_sector_industry_map.csv", header=0, usecols=["NSE Code","Industry","Market Cap", "Sector"])
48 |
49 | # Crore
50 | One_Cr = 10000000
51 |
52 | def fetch_industry_mcap(nse_code):
53 |
54 | industry = ''
55 | mcap = ''
56 | sector = ''
57 |
58 | try:
59 | # We try to get from local file first
60 | sector = stock_industry_map[stock_industry_map['NSE Code'] == nse_code]['Sector'].iloc[0]
61 | industry = stock_industry_map[stock_industry_map['NSE Code'] == nse_code]['Industry'].iloc[0]
62 | mcap = stock_industry_map[stock_industry_map['NSE Code'] == nse_code]['Market Cap'].iloc[0]
63 | except Exception as err:
64 | pass
65 |
66 | if industry == '' or mcap == '':
67 | try:
68 | # Try yf
69 | ticker = yf.Ticker(nse_code+".NS")
70 | if ticker.info:
71 | if industry == '':
72 | industry = ticker.info['industry']
73 | if mcap == '':
74 | mcap = round(ticker.info['marketCap'] / One_Cr, 0)
75 | if sector == '':
76 | sector = ticker.info['sector']
77 | except Exception as err:
78 | pass
79 |
80 | return [sector, industry, mcap]
81 |
82 | def main():
83 | print("Started... " + start_date + " - " + end_date)
84 |
85 | # Create the DataFrame
86 | df = pd.DataFrame(columns=['stock', 'mcap', 'blueVolCount', 'limeVolToday', 'limeVolCount', 'latestLimeVolDate', 'earliestLimeVolDate', 'tealVolCount', 'latestTealVolDate', \
87 | 'earliestTealVolDate', 'priceChng', 'sector' , 'industry'])
88 | # Iterate through the list of stocks
89 | for stock in stocks["Ticker"]:
90 | try:
91 | print(f'Analyzing {stock}...')
92 | # Get the stock data
93 | stk_ticker = yf.Ticker(stock+exchg)
94 | # Get the stock data from yfinance, dont adjust OHLC
95 | stock_data_daily = stk_ticker.history(start=start_date, end=end_date,interval=data_interval_daily,auto_adjust=False, prepost=False)
96 | # Drop those with NaN
97 | stock_data_daily = stock_data_daily.dropna()
98 |
99 | stock_data_weekly = stk_ticker.history(start=start_date, end=end_date,interval=data_interval_wkeely,auto_adjust=False, prepost=False)
100 | # Drop those with NaN
101 | stock_data_weekly = stock_data_weekly.dropna()
102 |
103 | #10wk avg volume
104 | weekly_vol_avg_col = f'Weekly_Volume_Avg{weekly_volume_length}'
105 | stock_data_weekly[weekly_vol_avg_col] = stock_data_weekly['Volume'].rolling(window=weekly_volume_length, min_periods=1).mean().fillna(0)
106 |
107 | #100d avg volule
108 | daily_vol_avg_col = f'Daily_Volume_Avg{daily_volume_length}'
109 | stock_data_daily[daily_vol_avg_col] = stock_data_daily['Volume'].rolling(window=daily_volume_length, min_periods=1).mean().fillna(0)
110 |
111 | # Create a new column in the daily data to store the corresponding weekly volume
112 | stock_data_daily[weekly_vol_avg_col] = 0
113 |
114 | # Loop through each row in the daily data
115 | mismatch_ctr = 0
116 | never_matched = True
117 | for i, row in stock_data_daily.iterrows():
118 | # Extract the date from the current row
119 | date = row.name.date()
120 |
121 | # Look up the corresponding row in the weekly data
122 | weekly_row = stock_data_weekly.loc[stock_data_weekly.index.date == date]
123 |
124 | # If there is no corresponding weekly data for the current date, propagate the last known weekly volume forward
125 | if len(weekly_row) == 0:
126 | if never_matched and mismatch_ctr < 7:
127 | mismatch_ctr = mismatch_ctr + 1
128 | continue # Try to match up data for next week
129 | stock_data_daily.at[i, weekly_vol_avg_col] = stock_data_daily[weekly_vol_avg_col].shift(1)[i]
130 | # If there is corresponding weekly data for the current date, fetch the volume and set it in the daily data
131 | else:
132 | never_matched = False
133 | weekly_avg_volume = weekly_row[weekly_vol_avg_col].iloc[0]
134 | stock_data_daily.at[i, weekly_vol_avg_col] = weekly_avg_volume
135 |
136 | isTodayLimeVolume = False
137 | cntLimeCount = 0
138 | cntTealCount = 0
139 | pctChange = 0
140 | earliestLimeVolDate = ''
141 | latestLimeVolDate = ''
142 | earliestTealVolDate = ''
143 | latestTealVolDate = ''
144 | # reverse
145 | stock_data_daily = stock_data_daily.iloc[::-1]
146 |
147 | if len(stock_data_daily) > lookback_length:
148 | for i in range(0, lookback_length):
149 | if stock_data_daily['Close'][i] > stock_data_daily['Close'][i+1]: # Up Day
150 | weekly_avg_to_compare = stock_data_daily[weekly_vol_avg_col][i]
151 | for j in range(i+1, i+7): # Find the previous week volume average, by checking previous unmatched value
152 | _weekly_avg = stock_data_daily[weekly_vol_avg_col][j]
153 | if _weekly_avg != weekly_avg_to_compare:
154 | weekly_avg_to_compare = _weekly_avg
155 | break
156 | if stock_data_daily['Volume'][i] > weekly_avg_to_compare: # Now compare if this day's volume is greater than weekly average volume
157 | cntLimeCount = cntLimeCount + 1
158 | earliestLimeVolDate = stock_data_daily.index[i].strftime("%d-%b-%Y")
159 | if cntLimeCount == 1:
160 | latestLimeVolDate = stock_data_daily.index[i].strftime("%d-%b-%Y")
161 | pctChange = round(((stock_data_daily['Close'][i] / stock_data_daily['Close'][i+1]) - 1 ) * 100, 2)
162 | if i == 0:
163 | isTodayLimeVolume = True
164 | # Teal Volume
165 | if stock_data_daily['Volume'][i] > stock_data_daily[daily_vol_avg_col][i]: # Now compare if this day's volume is greater than daily average volume
166 | cntTealCount = cntTealCount + 1
167 | earliestTealVolDate = stock_data_daily.index[i].strftime("%d-%b-%Y")
168 | if cntTealCount == 1:
169 | latestTealVolDate = stock_data_daily.index[i].strftime("%d-%b-%Y")
170 |
171 | # Fetch industy and mcap
172 | [sector, industry, marketCap] = fetch_industry_mcap(stock)
173 |
174 | blueVolCnt = cntLimeCount + cntTealCount
175 | row = {'stock': stock, 'blueVolCount': str(blueVolCnt), 'limeVolToday' : str(isTodayLimeVolume), 'limeVolCount': str(cntLimeCount), \
176 | 'latestLimeVolDate' : str(latestLimeVolDate), 'earliestLimeVolDate' : str(earliestLimeVolDate), \
177 | 'tealVolCount': str(cntTealCount), 'latestTealVolDate' : str(latestTealVolDate), 'earliestTealVolDate' : str(earliestTealVolDate), \
178 | 'mcap' : marketCap, 'priceChng': str(pctChange), 'sector' : sector, 'industry' : industry}
179 | # Append the new row to the DataFrame
180 | df.loc[len(df)] = row
181 |
182 | except Exception as e:
183 | print(f'Error: {stock} => {e}')
184 | # Append current timestamp to the file name
185 | now = datetime.datetime.now()
186 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
187 | file_name = f'{output}/limevolume_{timestamp}.csv'
188 | # Export the DataFrame to CSV
189 | df.to_csv(file_name, index=False)
190 | print('Done')
191 |
192 | if __name__ == "__main__":
193 | main()
194 |
--------------------------------------------------------------------------------
/py/ai/fininsightgpt/src/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | FinInsightGPT - AI-Powered Investment Analysis Application
4 |
5 | This application processes company data files, converts them to markdown,
6 | creates consolidated master files, and generates equity research reports.
7 | """
8 |
9 | import os
10 | import sys
11 | import argparse
12 | import logging
13 | from pathlib import Path
14 | from typing import List, Optional
15 |
16 | # Load environment variables from .env file
17 | try:
18 | from dotenv import load_dotenv
19 | load_dotenv() # Load variables from .env file
20 | ENV_LOADED = True
21 | except ImportError:
22 | ENV_LOADED = False
23 | logging.warning("dotenv not found, environment variables must be set manually")
24 |
25 | # Configure logging
26 | logging.basicConfig(
27 | level=logging.INFO,
28 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
29 | )
30 | logger = logging.getLogger(__name__)
31 |
32 | # Import local modules
33 | from document_processor import process_company_folder
34 | from master_file_generator import generate_master_file
35 | from report_generator import generate_report
36 |
37 |
38 | def setup_argparse() -> argparse.ArgumentParser:
39 | """Set up command-line arguments."""
40 | parser = argparse.ArgumentParser(
41 | description="FinInsightGPT - AI-Powered Investment Analysis Application"
42 | )
43 |
44 | subparsers = parser.add_subparsers(dest='command', help='Command to run')
45 |
46 | # Process command
47 | process_parser = subparsers.add_parser('process', help='Process files in a company folder')
48 | process_parser.add_argument('company_folder', help='Path to the company folder')
49 |
50 | # Master file command
51 | master_parser = subparsers.add_parser('master', help='Generate master file from processed files')
52 | master_parser.add_argument('company_folder', help='Path to the company folder')
53 | master_parser.add_argument('--output-dir', help='Directory to save the master file (defaults to company folder)')
54 |
55 | # Report command
56 | report_parser = subparsers.add_parser('report', help='Generate report from master file')
57 | report_parser.add_argument('master_file', help='Path to the master markdown file')
58 | report_parser.add_argument('--template', help='Path to the report template (default: prompt_master/Equity_Research_Report_Template.md)')
59 | report_parser.add_argument('--output-dir', help='Directory to save the report (defaults to master file directory)')
60 | report_parser.add_argument('--model', help='LLM model to use (default: gpt-4-turbo)')
61 |
62 | # All-in-one command
63 | all_parser = subparsers.add_parser('all', help='Process everything end-to-end')
64 | all_parser.add_argument('company_folder', help='Path to the company folder')
65 | all_parser.add_argument('--template', help='Path to the report template (default: prompt_master/Equity_Research_Report_Template.md)')
66 | all_parser.add_argument('--model', default='gpt-4-turbo', help='LLM model to use (default: gpt-4-turbo)')
67 |
68 | # List companies command
69 | subparsers.add_parser('list', help='List all available company folders')
70 |
71 | return parser
72 |
73 |
74 | def list_companies(base_path: str = "../company_data") -> List[str]:
75 | """List all company folders in the company_data directory."""
76 | base_path = Path(base_path)
77 |
78 | if not base_path.exists() or not base_path.is_dir():
79 | logger.error(f"Company data directory not found: {base_path}")
80 | return []
81 |
82 | companies = []
83 |
84 | for item in base_path.iterdir():
85 | if item.is_dir() and not item.name.startswith('.'):
86 | companies.append(item.name)
87 |
88 | return companies
89 |
90 |
91 | def run_process_command(args: argparse.Namespace) -> None:
92 | """Process files in a company folder."""
93 | company_folder = args.company_folder
94 |
95 | # Ensure path is absolute
96 | if not os.path.isabs(company_folder):
97 | script_dir = Path(__file__).parent.absolute()
98 | company_data_dir = script_dir.parent / "company_data"
99 | company_folder = os.path.join(company_data_dir, company_folder)
100 |
101 | logger.info(f"Processing files in: {company_folder}")
102 | processed_files = process_company_folder(company_folder)
103 |
104 | if not processed_files:
105 | logger.warning("No files were processed.")
106 | else:
107 | logger.info(f"Successfully processed {len(processed_files)} files.")
108 |
109 |
110 | def run_master_command(args: argparse.Namespace) -> Optional[str]:
111 | """Generate master file from processed files."""
112 | company_folder = args.company_folder
113 | output_dir = args.output_dir
114 |
115 | # Ensure path is absolute
116 | if not os.path.isabs(company_folder):
117 | script_dir = Path(__file__).parent.absolute()
118 | company_data_dir = script_dir.parent / "company_data"
119 | company_folder = os.path.join(company_data_dir, company_folder)
120 |
121 | # Get company name from folder path
122 | company_name = Path(company_folder).name
123 |
124 | # Find processed markdown files
125 | processed_folder = Path(company_folder) / "processed"
126 |
127 | if not processed_folder.exists() or not processed_folder.is_dir():
128 | logger.error(f"Processed folder not found: {processed_folder}")
129 | return None
130 |
131 | markdown_files = []
132 | for file in processed_folder.glob("*.md"):
133 | if file.is_file():
134 | markdown_files.append(str(file))
135 |
136 | if not markdown_files:
137 | logger.error("No processed markdown files found.")
138 | return None
139 |
140 | logger.info(f"Found {len(markdown_files)} processed files.")
141 |
142 | # Generate master file
143 | master_file_path = generate_master_file(
144 | company_name=company_name,
145 | markdown_files=markdown_files,
146 | output_dir=output_dir
147 | )
148 |
149 | if master_file_path:
150 | logger.info(f"Successfully generated master file: {master_file_path}")
151 | else:
152 | logger.error("Failed to generate master file.")
153 |
154 | return master_file_path
155 |
156 |
157 | def run_report_command(args: argparse.Namespace) -> Optional[str]:
158 | """Generate report from master file."""
159 | master_file = args.master_file
160 | template_path = args.template
161 | output_dir = args.output_dir
162 | model = args.model
163 |
164 | # If model not specified in args, use the environment variable
165 | if model is None:
166 | model = os.environ.get("OPENAI_TEXT_MODEL", "gpt-4-turbo")
167 |
168 | # Ensure master file path is absolute
169 | if not os.path.isabs(master_file):
170 | script_dir = Path(__file__).parent.absolute()
171 | company_data_dir = script_dir.parent / "company_data"
172 | master_file = os.path.join(company_data_dir, master_file)
173 |
174 | # Ensure template path is set
175 | if template_path is None:
176 | script_dir = Path(__file__).parent.absolute()
177 | template_path = script_dir.parent / "prompt_master" / "Equity_Research_Report_Template.md"
178 | elif not os.path.isabs(template_path):
179 | script_dir = Path(__file__).parent.absolute()
180 | template_path = script_dir.parent / template_path
181 |
182 | # Check if files exist
183 | if not os.path.exists(master_file):
184 | logger.error(f"Master file not found: {master_file}")
185 | return None
186 |
187 | if not os.path.exists(template_path):
188 | logger.error(f"Template file not found: {template_path}")
189 | return None
190 |
191 | # Generate report
192 | report_file_path = generate_report(
193 | master_file_path=master_file,
194 | template_path=str(template_path),
195 | output_dir=output_dir,
196 | model=model
197 | )
198 |
199 | if report_file_path:
200 | logger.info(f"Successfully generated report: {report_file_path}")
201 | else:
202 | logger.error("Failed to generate report.")
203 |
204 | return report_file_path
205 |
206 |
207 | def run_all_command(args: argparse.Namespace) -> None:
208 | """Process everything end-to-end: process files, generate master file, and generate report."""
209 | company_folder = args.company_folder
210 | template_path = args.template
211 | model = args.model
212 |
213 | # Process files
214 | process_args = argparse.Namespace(company_folder=company_folder)
215 | run_process_command(process_args)
216 |
217 | # Generate master file
218 | master_args = argparse.Namespace(company_folder=company_folder, output_dir=None)
219 | master_file_path = run_master_command(master_args)
220 |
221 | if not master_file_path:
222 | logger.error("Cannot continue without a master file.")
223 | return
224 |
225 | # Generate report
226 | report_args = argparse.Namespace(
227 | master_file=master_file_path,
228 | template=template_path,
229 | output_dir=None,
230 | model=model
231 | )
232 | report_file_path = run_report_command(report_args)
233 |
234 | if report_file_path:
235 | logger.info(f"End-to-end processing completed successfully.")
236 | else:
237 | logger.error("End-to-end processing failed during report generation.")
238 |
239 |
240 | def check_environment():
241 | """Check if required environment variables are set."""
242 | if not os.environ.get("OPENAI_API_KEY"):
243 | logger.warning("OPENAI_API_KEY environment variable is not set. Set it in your .env file or export it in your shell.")
244 | return False
245 | return True
246 |
247 |
248 | def main() -> None:
249 | """Main entry point of the application."""
250 | parser = setup_argparse()
251 | args = parser.parse_args()
252 |
253 | if args.command is None:
254 | parser.print_help()
255 | sys.exit(1)
256 |
257 | elif args.command == 'process':
258 | run_process_command(args)
259 |
260 | elif args.command == 'master':
261 | run_master_command(args)
262 |
263 | elif args.command == 'report':
264 | run_report_command(args)
265 |
266 | elif args.command == 'all':
267 | run_all_command(args)
268 |
269 | elif args.command == 'list':
270 | script_dir = Path(__file__).parent.absolute()
271 | company_data_dir = script_dir.parent / "company_data"
272 | companies = list_companies(str(company_data_dir))
273 |
274 | if companies:
275 | print("Available company folders:")
276 | for company in companies:
277 | print(f"- {company}")
278 | else:
279 | print("No company folders found.")
280 |
281 | else:
282 | parser.print_help()
283 | sys.exit(1)
284 |
285 |
286 | if __name__ == "__main__":
287 | main()
--------------------------------------------------------------------------------
/py/yf/stock_sector_strength.py:
--------------------------------------------------------------------------------
1 | '''
2 | A comparitive analysis of the stock market based on sectors (or any grouping) from a significant date/event of past as reflected on the benchmark.
3 | The idea then is to calculate the gains not only of the individual stocks but the entire group, with respect to that event.
4 | Interesting analysis can be done, if the entire group is considered, where we can see that the leader stocks move much in advance of their peers and
5 | start outperformance with respect to benchmark and the sectors. We can also see how the sector as a group is performing with respect to the benchmark.
6 | '''
7 | import pandas as pd
8 | import os
9 | from datetime import datetime, timedelta
10 | import csv
11 | import yfinance as yf
12 |
13 |
14 | # Read up sector/industry information from text data
15 | stock_industry_map = pd.read_csv("stock_sector_industry_map.csv", header=0, usecols=["NSE Code","Industry","Market Cap", "Sector"])
16 |
17 | # Reference Date for comaprison, preferred <= 200
18 | reference_date = '2022-12-01'
19 |
20 | # Run date, must be greater than reference date
21 | run_date = '2023-08-05'
22 |
23 | # Minimum number of trading days to consider for index
24 | min_trading_days = 200
25 |
26 | # Maximum number of stocks to include in a sector group
27 | max_stocks_per_sector = 10
28 |
29 | # Limit on marketcap
30 | min_cap = 500 # Crores
31 |
32 | # Calculate gain percentages for different time periods
33 | periods = [5, 21, 55, 123]
34 |
35 | # Specify the benchmark symbol
36 | benchmark = "^NSEI"
37 |
38 | # Folder location
39 | output = 'output'
40 |
41 | def has_min_days_data(nse_code):
42 | # Calculate the start date as one year before the run_date
43 | start_date = (datetime.strptime(run_date, '%Y-%m-%d') - timedelta(days=365)).strftime('%Y-%m-%d')
44 |
45 | # Get the daily data for the specified period
46 | ticker = yf.Ticker(nse_code+'.NS')
47 | stock_data = ticker.history(start=start_date, end=run_date, interval='1d',auto_adjust=False, prepost=False)
48 |
49 | # Check if the stock has at least min_trading_days days of trading data
50 | if len(stock_data) >= min_trading_days:
51 | return True
52 | else:
53 | return False
54 |
55 | def prepare_custom_indexes(df):
56 | # Group the stocks by their sectors into a dictionary
57 | custom_indices = {}
58 |
59 | # Iterate through each row in the DataFrame
60 | for index, row in df.iterrows():
61 | sector = row['Sector']
62 | stock_info = {
63 | 'NSE Code': row['NSE Code'],
64 | 'Industry': row['Industry'],
65 | 'Market Cap': row['Market Cap']
66 | }
67 | nse_code = row['NSE Code']
68 |
69 | # Check if the stock has at least 200 days of trading data
70 | if has_min_days_data(nse_code):
71 | # Check if the sector already exists in the dictionary
72 | if sector in custom_indices:
73 | custom_indices[sector].append(stock_info)
74 | else:
75 | custom_indices[sector] = [stock_info]
76 |
77 | # Sort the stocks within each sector by decreasing market cap
78 | for sector in custom_indices:
79 | stocks_in_sector = custom_indices[sector]
80 | stocks_sorted_by_market_cap = sorted(stocks_in_sector, key=lambda x: x['Market Cap'], reverse=True)
81 | custom_indices[sector] = stocks_sorted_by_market_cap[:max_stocks_per_sector]
82 |
83 | # print(custom_indices)
84 | return custom_indices
85 |
86 | def generate_watchlist_with_headers(custom_indices):
87 | watchlist_string_withheaders = ""
88 | watchlist_string = ""
89 |
90 | sector_index_mapper = {}
91 |
92 | for sector, stocks in custom_indices.items():
93 | # Calculate the number of stocks in the sector
94 | num_stocks = len(stocks)
95 | str = ''
96 | str_header = f'###{sector},'
97 | for stock in stocks:
98 | nse_code = 'NSE:' + stock['NSE Code']
99 | str += nse_code.replace('-','_').replace('&','_') + "+"
100 |
101 | str = str.rsplit('+', 1)[0].strip()
102 | str = f'( {str} )/{num_stocks}' + ','
103 | watchlist_string += str
104 | sector_index_mapper[sector.upper()] = str
105 | watchlist_string_withheaders = watchlist_string_withheaders + str_header.upper() + str
106 |
107 | # Write the watchlist to the txt file
108 | with open('custom_indices_without_headers.txt', 'w') as file:
109 | file.write(watchlist_string)
110 |
111 | # Write the watchlist to the txt file
112 | with open('custom_indices_with_headers.txt', 'w') as file:
113 | file.write(watchlist_string_withheaders)
114 |
115 | return sector_index_mapper
116 |
117 | def calculate_gain_percentages(data_df, reference_date, run_date):
118 | # Filter the data from the reference date to the run date
119 | filtered_data = data_df.loc[reference_date:run_date]
120 |
121 | # Calculate the gain percentage for the original period
122 | start_price = filtered_data.iloc[0]['Close']
123 | end_price = filtered_data.iloc[-1]['Close']
124 | gain_percentage = ((end_price - start_price) / start_price) * 100
125 |
126 | gain_percentages = [gain_percentage]
127 |
128 | for period in periods:
129 | if len(filtered_data) < period:
130 | gain_percentages.append(None) # Append None if there's insufficient data for the period
131 | else:
132 | start_price_period = filtered_data.iloc[-period]['Close']
133 | gain_percentage_period = ((end_price - start_price_period) / start_price_period) * 100
134 | gain_percentages.append(round(gain_percentage_period, 2))
135 |
136 | return gain_percentages
137 |
138 | def calculate_sector_gains(custom_indices, reference_date, run_date):
139 | sector_gains = {}
140 |
141 | for sector, stocks in custom_indices.items():
142 | total_close_start = 0.0
143 | total_close_end = 0.0
144 |
145 | for stock in stocks:
146 | nse_code = stock['NSE Code']
147 | ticker = yf.Ticker(nse_code+'.NS')
148 | stock_data = ticker.history(start=reference_date, end=run_date, interval='1d',auto_adjust=False, prepost=False)
149 | if not stock_data.empty:
150 | # Get the closing price on the reference_date and run_date
151 | close_start = stock_data.iloc[0]['Close']
152 | close_end = stock_data.iloc[-1]['Close']
153 | total_close_start += close_start
154 | total_close_end += close_end
155 |
156 | # Calculate the gain percentage for the sector from reference_date to run_date
157 | sector_gain = round(((total_close_end - total_close_start) / total_close_start) * 100, 2)
158 | sector_gains[sector] = sector_gain
159 |
160 | return sector_gains
161 |
162 | def main():
163 | print("Started...")
164 | # Prepare working dataset We only take NSE Codes and Market Cap > min_cap Crores
165 | df = stock_industry_map[(stock_industry_map['NSE Code'].notna()) & (stock_industry_map['Market Cap'] >= min_cap)]
166 | print(f'{len(df)} NSE stocks with mcap > {min_cap} Cr')
167 | # print(df.tail(10))
168 | # Prepare custom index
169 | ### df = df.tail(30) ### FOR TESTS ONLY####################
170 | print("Preparing custom indices...")
171 | custom_indices = prepare_custom_indexes(df)
172 | sector_index_mapper = generate_watchlist_with_headers(custom_indices)
173 |
174 | print("Calculating benchmark gain...")
175 | # Calculate gains of benchmark from reference date to run date
176 | benchmark_ticker = yf.Ticker(benchmark)
177 | benchmark_data = benchmark_ticker.history(start=reference_date, end=run_date, interval='1d',auto_adjust=False, prepost=False)
178 | benchmark_gain = calculate_gain_percentages(benchmark_data, reference_date, run_date)[0]
179 |
180 | print("Calculating sector gains...")
181 | sector_gains = calculate_sector_gains(custom_indices, reference_date, run_date)
182 |
183 | # Convert the date strings to datetime objects
184 | date1 = datetime.strptime(run_date, '%Y-%m-%d')
185 | date2 = datetime.strptime(reference_date, '%Y-%m-%d')
186 |
187 | # Calculate the difference in days between the two dates
188 | days_difference = (date1 - date2).days
189 |
190 | # Now we run for all stocks and create a big list and report
191 | result_df = pd.DataFrame(columns=['symbol', 'start','end','days', 'mcap', 'sector', 'industry', 'gain_stock_sector', 'gain_stock_benchmrk', 'gain_sector_benchmrk', \
192 | 'gain_stock_refdate', 'gain_sector_refdate', 'gain_benchmrk_refdate', 'gain_stock_5d', 'gain_stock_21d', 'gain_stock_55d', 'gain_stock_123d',\
193 | 'sector_index'])
194 |
195 | print("Calculating stock performances...")
196 | # Iterate through each row in the DataFrame
197 | for index, row in df.iterrows():
198 | nse_code = row['NSE Code']
199 | ticker = yf.Ticker(nse_code+'.NS')
200 | try:
201 | stock_data = ticker.history(start=reference_date, end=run_date, interval='1d',auto_adjust=False, prepost=False)
202 | if (len(stock_data) <= 2):
203 | print(f'Skipping... {nse_code}')
204 | continue
205 | stock_gains = calculate_gain_percentages(stock_data,reference_date, run_date)
206 | stock_gain_from_refdate = stock_gains[0]
207 | sector = row['Sector']
208 | industry = row['Industry']
209 | mcap = row['Market Cap']
210 | gain_stock_sector = stock_gain_from_refdate - sector_gains[sector]
211 | gain_stock_benchmrk = stock_gain_from_refdate - benchmark_gain
212 | gain_sector_benchmrk = sector_gains[sector] - benchmark_gain
213 | gain_sector_refdate = sector_gains[sector]
214 | sector_index = sector_index_mapper[sector.upper()]
215 |
216 | row = {'symbol': nse_code, 'start': reference_date, 'end' : run_date, 'days' : days_difference, 'mcap': str(mcap), 'sector' : sector.upper(), 'industry' : industry.upper(), \
217 | 'gain_stock_sector' : str(gain_stock_sector), 'gain_stock_benchmrk' : str(gain_stock_benchmrk), 'gain_sector_benchmrk' : str(gain_sector_benchmrk), \
218 | 'gain_stock_refdate' : str(stock_gain_from_refdate), 'gain_sector_refdate' : str(gain_sector_refdate), 'gain_benchmrk_refdate' : str(benchmark_gain), \
219 | 'gain_stock_5d' : str(stock_gains[1]), 'gain_stock_55d' : str(stock_gains[2]), 'gain_stock_21d' : str(stock_gains[3]), 'gain_stock_123d' : str(stock_gains[4]),\
220 | 'sector_index' : sector_index}
221 |
222 | # Append the new row to the DataFrame
223 | result_df.loc[len(result_df)] = row
224 | except Exception as e:
225 | print(f'Error: {nse_code} => {e}')
226 |
227 | # Append current timestamp to the file name
228 | now = datetime.now()
229 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
230 | file_name = f'{output}/stock_sector_benchmark_{reference_date}_{run_date}_{timestamp}.csv'
231 | # Export the DataFrame to CSV
232 | result_df.to_csv(file_name, index=False)
233 | # print(sector_index_mapper)
234 | print("Done")
235 |
236 | if __name__ == "__main__":
237 | main()
238 |
--------------------------------------------------------------------------------
/py/eodhd/stocks.csv:
--------------------------------------------------------------------------------
1 | Ticker
2 | RELIANCE
3 | HDFCBANK
4 | TCS
5 | ICICIBANK
6 | HINDUNILVR
7 | ITC
8 | INFY
9 | SBIN
10 | BHARTIARTL
11 | HDFC
12 | BAJFINANCE
13 | LICI
14 | KOTAKBANK
15 | LT
16 | ASIANPAINT
17 | HCLTECH
18 | AXISBANK
19 | MARUTI
20 | ADANIENT
21 | TITAN
22 | SUNPHARMA
23 | BAJAJFINSV
24 | DMART
25 | ULTRACEMCO
26 | TATAMOTORS
27 | WIPRO
28 | NESTLEIND
29 | ONGC
30 | JSWSTEEL
31 | M&M
32 | NTPC
33 | POWERGRID
34 | ADANIGREEN
35 | ADANIPORTS
36 | LTIM
37 | TATASTEEL
38 | COALINDIA
39 | IOC
40 | HDFCLIFE
41 | BAJAJ-AUTO
42 | PIDILITIND
43 | HINDZINC
44 | SBILIFE
45 | HAL
46 | SIEMENS
47 | DLF
48 | BRITANNIA
49 | GRASIM
50 | TECHM
51 | INDUSINDBK
52 | GODREJCP
53 | VBL
54 | VEDL
55 | INDIGO
56 | BANKBARODA
57 | DABUR
58 | DIVISLAB
59 | HINDALCO
60 | CHOLAFIN
61 | ADANIPOWER
62 | BEL
63 | EICHERMOT
64 | ABB
65 | DRREDDY
66 | ADANITRANS
67 | BPCL
68 | CIPLA
69 | SHREECEM
70 | AMBUJACEM
71 | BAJAJHLDNG
72 | HAVELLS
73 | SBICARD
74 | ICICIPRULI
75 | TATACONSUM
76 | MANKIND
77 | MCDOWELL-N
78 | APOLLOHOSP
79 | GAIL
80 | ATGL
81 | MARICO
82 | TATAPOWER
83 | ICICIGI
84 | PNB
85 | ZOMATO
86 | POLYCAB
87 | SHRIRAMFIN
88 | LODHA
89 | BERGEPAINT
90 | MOTHERSON
91 | TORNTPHARM
92 | SRF
93 | JINDALSTEL
94 | TVSMOTOR
95 | CGPOWER
96 | TIINDIA
97 | ZYDUSLIFE
98 | HEROMOTOCO
99 | IDBI
100 | UNIONBANK
101 | CANBK
102 | TRENT
103 | NAUKRI
104 | PFC
105 | MAXHEALTH
106 | INDHOTEL
107 | BOSCHLTD
108 | PIIND
109 | IDFCFIRSTB
110 | PAYTM
111 | ASHOKLEY
112 | HDFCAMC
113 | CUMMINSIND
114 | AWL
115 | YESBANK
116 | MUTHOOTFIN
117 | ASTRAL
118 | AUBANK
119 | PGHH
120 | IOB
121 | COLPAL
122 | IRCTC
123 | ABBOTINDIA
124 | SCHAEFFLER
125 | ABCAPITAL
126 | PATANJALI
127 | UPL
128 | JSWENERGY
129 | NHPC
130 | BALKRISIND
131 | AUROPHARMA
132 | IRFC
133 | INDUSTOWER
134 | TATAELXSI
135 | TATACOMM
136 | GODREJPROP
137 | SUPREMEIND
138 | ALKEM
139 | MPHASIS
140 | MRF
141 | HINDPETRO
142 | LTTS
143 | LUPIN
144 | RECLTD
145 | NYKAA
146 | CONCOR
147 | INDIANB
148 | PAGEIND
149 | UBL
150 | BHARATFORG
151 | APLAPOLLO
152 | LINDEINDIA
153 | M&MFIN
154 | OBEROIRLTY
155 | IDEA
156 | HONAUT
157 | MAZDOCK
158 | STARHEALTH
159 | SAIL
160 | PERSISTENT
161 | DALBHARAT
162 | BANDHANBNK
163 | UCOBANK
164 | BANKINDIA
165 | IGL
166 | SOLARINDS
167 | GICRE
168 | POLICYBZR
169 | PETRONET
170 | SONACOMS
171 | OFSS
172 | ACC
173 | AIAENG
174 | BHEL
175 | UNOMINDA
176 | NMDC
177 | GUJGASLTD
178 | L&TFH
179 | BIOCON
180 | 3MINDIA
181 | FACT
182 | SYNGENE
183 | MANYAVAR
184 | ESCORTS
185 | JUBLFOOD
186 | JSL
187 | FLUOROCHEM
188 | DELHIVERY
189 | METROBRAND
190 | TORNTPOWER
191 | THERMAX
192 | PHOENIXLTD
193 | EMBASSY
194 | SUNDARMFIN
195 | COROMANDEL
196 | POONAWALLA
197 | CRISIL
198 | RVNL
199 | FEDERALBNK
200 | COFORGE
201 | OIL
202 | MFSL
203 | KPITTECH
204 | CENTRALBK
205 | DEEPAKNTR
206 | GMRINFRA
207 | APOLLOTYRE
208 | KANSAINER
209 | SKFINDIA
210 | SUNDRMFAST
211 | MSUMI
212 | FORTIS
213 | VOLTAS
214 | TATACHEM
215 | DIXON
216 | JKCEMENT
217 | TIMKEN
218 | GRINDWELL
219 | SUZLON
220 | DEVYANI
221 | ENDURANCE
222 | PEL
223 | HATSUN
224 | GLAXO
225 | ZFCVINDIA
226 | KEI
227 | MAHABANK
228 | RELAXO
229 | PSB
230 | KAJARIACER
231 | CARBORUNIV
232 | KPRMILL
233 | NAVINFLUOR
234 | PRESTIGE
235 | BATAINDIA
236 | IIFL
237 | BDL
238 | EXIDEIND
239 | GLENMARK
240 | LICHSGFIN
241 | ZEEL
242 | NH
243 | RAMCOCEM
244 | SUNTV
245 | BAYERCROP
246 | ATUL
247 | SUMICHEM
248 | CREDITACC
249 | ISEC
250 | GLAND
251 | ABFRL
252 | IPCALAB
253 | SJVN
254 | NIACL
255 | NAM-INDIA
256 | JBCHEPHARM
257 | INDIAMART
258 | LALPATHLAB
259 | MEDANTA
260 | FIVESTAR
261 | LAURUSLABS
262 | RADICO
263 | VINATIORGA
264 | CIEINDIA
265 | CROMPTON
266 | EMAMILTD
267 | 360ONE
268 | WHIRLPOOL
269 | RATNAMANI
270 | GILLETTE
271 | IDFC
272 | MINDSPACE
273 | AJANTPHARM
274 | KALYANKJIL
275 | TATAMTRDVR
276 | POWERINDIA
277 | ELGIEQUIP
278 | PFIZER
279 | NXST
280 | CHOLAHLDNG
281 | BLUEDART
282 | AARTIIND
283 | TANLA
284 | TRIDENT
285 | NATIONALUM
286 | PNBHOUSING
287 | JBMA
288 | CGCL
289 | NLCINDIA
290 | CYIENT
291 | TTML
292 | GODREJIND
293 | GSPL
294 | KEC
295 | SANOFI
296 | IRB
297 | FINCABLES
298 | BLUESTARCO
299 | ASTERDM
300 | RAJESHEXPO
301 | MRPL
302 | KIMS
303 | CENTURYPLY
304 | LAXMIMACH
305 | PVRINOX
306 | SONATSOFTW
307 | BAJAJELEC
308 | FINEORG
309 | TEJASNET
310 | HAPPSTMNDS
311 | APARINDS
312 | REDINGTON
313 | DCMSHRIRAM
314 | NATCOPHARM
315 | CLEAN
316 | AFFLE
317 | WESTLIFE
318 | EIHOTEL
319 | ANGELONE
320 | ASAHIINDIA
321 | APLLTD
322 | APTUS
323 | CASTROLIND
324 | RBLBANK
325 | AETHER
326 | BRIGADE
327 | NSLNISP
328 | TRITURBINE
329 | NUVOCO
330 | AEGISCHEM
331 | GRINFRA
332 | PPLPHARMA
333 | AAVAS
334 | RHIM
335 | ALKYLAMINE
336 | CDSL
337 | SUVENPHAR
338 | VGUARD
339 | AKZOINDIA
340 | JINDALSAW
341 | HUDCO
342 | RAYMOND
343 | TATAINVEST
344 | SFL
345 | FINPIPE
346 | KIOCL
347 | HINDCOPPER
348 | BIKAJI
349 | DATAPATTNS
350 | BASF
351 | CAMS
352 | MEDPLUS
353 | RAINBOW
354 | ABSLAMC
355 | CHAMBLFERT
356 | CANFINHOME
357 | IEX
358 | MOTILALOFS
359 | ZENSARTECH
360 | RITES
361 | MANAPPURAM
362 | GESHIP
363 | TTKPRESTIG
364 | POLYMED
365 | EQUITASBNK
366 | CENTURYTEX
367 | AMARAJABAT
368 | BSOFT
369 | VTL
370 | ANURAS
371 | MGL
372 | OLECTRA
373 | KAYNES
374 | ITI
375 | KARURVYSYA
376 | UTIAMC
377 | ERIS
378 | WELSPUNIND
379 | BSE
380 | SUNCLAYLTD
381 | USHAMART
382 | RENUKA
383 | CESC
384 | CERA
385 | SHYAMMETL
386 | CEATLTD
387 | FSL
388 | CUB
389 | CRAFTSMAN
390 | GALAXYSURF
391 | ASTRAZEN
392 | CAMPUS
393 | CHALET
394 | ZYDUSWELL
395 | GODREJAGRO
396 | ROUTE
397 | BIRLACORPN
398 | GNFC
399 | KPIL
400 | SAPPHIRE
401 | PNCINFRA
402 | HFCL
403 | JYOTHYLAB
404 | BLS
405 | BIRET
406 | NCC
407 | COCHINSHIP
408 | IRCON
409 | INGERRAND
410 | KRBL
411 | ECLERX
412 | INTELLECT
413 | SHOPERSTOP
414 | PGHL
415 | SAREGAMA
416 | GODFRYPHLP
417 | VIPIND
418 | SPLPETRO
419 | WELCORP
420 | UJJIVANSFB
421 | CCL
422 | EIDPARRY
423 | SYRMA
424 | ELECON
425 | MCX
426 | RKFORGE
427 | GRAPHITE
428 | BALRAMCHIN
429 | IONEXCHANG
430 | LATENTVIEW
431 | MAPMYINDIA
432 | GLS
433 | JKLAKSHMI
434 | GPIL
435 | GRANULES
436 | BBTC
437 | PRAJIND
438 | KSB
439 | ENGINERSIN
440 | JWL
441 | ALOKINDS
442 | AMBER
443 | DEEPAKFERT
444 | MAHLIFE
445 | SPARC
446 | NBCC
447 | ALLCARGO
448 | TITAGARH
449 | EASEMYTRIP
450 | ACE
451 | MHRIL
452 | LEMONTREE
453 | SAFARI
454 | MINDACORP
455 | J&KBANK
456 | HOMEFIRST
457 | INDIGOPNTS
458 | EPL
459 | METROPOLIS
460 | BALAMINES
461 | ESABINDIA
462 | JMFINANCIL
463 | TEGA
464 | BEML
465 | PRINCEPIPE
466 | TV18BRDCST
467 | SWSOLAR
468 | GRSE
469 | CHEMPLASTS
470 | KNRCON
471 | KIRLFER
472 | TMB
473 | SCHNEIDER
474 | JUSTDIAL
475 | RUSTOMJEE
476 | LXCHEM
477 | GSFC
478 | TRIVENI
479 | CHENNPETRO
480 | MASTEK
481 | GMMPFAUDLR
482 | MAHSCOOTER
483 | BORORENEW
484 | ACI
485 | GET&D
486 | KTKBANK
487 | HNDFDS
488 | MTARTECH
489 | VRLLOG
490 | JUBLINGREA
491 | CAPLIPOINT
492 | KFINTECH
493 | INDIACEM
494 | JINDWORLD
495 | QUESS
496 | MAHSEAMLES
497 | ANANTRAJ
498 | GARFIBRES
499 | RCF
500 | HEG
501 | SARDAEN
502 | FUSION
503 | GOCOLORS
504 | HSCL
505 | SIS
506 | NETWORK18
507 | PRSMJOHNSN
508 | SYMPHONY
509 | HGINFRA
510 | ROLEXRINGS
511 | STLTECH
512 | JKTYRE
513 | GREENLAM
514 | SWANENERGY
515 | KIRLOSENG
516 | JUBLPHARMA
517 | PCBL
518 | SUPRAJIT
519 | GAEL
520 | GPPL
521 | RPOWER
522 | CMSINFO
523 | TCI
524 | GMDCLTD
525 | NEWGEN
526 | STARCEMENT
527 | POWERMECH
528 | TCIEXP
529 | MIDHANI
530 | RELINFRA
531 | IBULHSGFIN
532 | DAAWAT
533 | KENNAMET
534 | VSTIND
535 | VAIBHAVGBL
536 | HGS
537 | VESUVIUS
538 | FDC
539 | RBA
540 | RAIN
541 | SUNTECK
542 | RTNINDIA
543 | KIRLOSBROS
544 | AVANTIFEED
545 | JKPAPER
546 | INOXWIND
547 | RELIGARE
548 | BCG
549 | RSYSTEMS
550 | SOBHA
551 | ICRA
552 | UJJIVAN
553 | ISGEC
554 | PTCIL
555 | ZENTEC
556 | SPANDANA
557 | PARADEEP
558 | LAOPALA
559 | VARROC
560 | RESPONIND
561 | MMTC
562 | CSBBANK
563 | DELTACORP
564 | TECHNOE
565 | ORIENTELEC
566 | JSWHL
567 | GHCL
568 | RAILTEL
569 | MARKSANS
570 | BECTORFOOD
571 | BOROLTD
572 | GUJALKALI
573 | SHRIPISTON
574 | SANSERA
575 | IDEAFORGE
576 | GENUSPOWER
577 | NAVA
578 | ROSSARI
579 | RATEGAIN
580 | AARTIDRUGS
581 | VOLTAMP
582 | PRUDENT
583 | HBLPOWER
584 | SHARDACROP
585 | TATACOFFEE
586 | VIJAYA
587 | SCI
588 | AHLUCONT
589 | DODLA
590 | EDELWEISS
591 | PDSL
592 | GRAVITA
593 | NESCO
594 | HCG
595 | HLEGLAS
596 | LUXIND
597 | VMART
598 | ARVINDFASN
599 | ANANDRATHI
600 | JAMNAAUTO
601 | NAZARA
602 | SURYAROSNI
603 | SOUTHBANK
604 | PRIVISCL
605 | GREENPANEL
606 | MANINFRA
607 | AMIORG
608 | AGI
609 | RALLIS
610 | NEULANDLAB
611 | KKCL
612 | TEAMLEASE
613 | MASFIN
614 | AVALON
615 | HINDWAREAP
616 | EMIL
617 | KIRLPNU
618 | ICIL
619 | IRBINVIT
620 | DBCORP
621 | DREAMFOLKS
622 | JPPOWER
623 | SULA
624 | SBCL
625 | POLYPLEX
626 | SHAREINDIA
627 | HARSHA
628 | MFL
629 | INFIBEAM
630 | TIIL
631 | STAR
632 | THOMASCOOK
633 | TDPOWERSYS
634 | CYIENTDLM
635 | HEIDELBERG
636 | NEOGEN
637 | RAJRATAN
638 | BHARATRAS
639 | DCBBANK
640 | EMUDHRA
641 | MOIL
642 | SUNFLAG
643 | TIPSINDLTD
644 | JTEKTINDIA
645 | HIKAL
646 | GANESHHOUC
647 | GATEWAY
648 | LGBBROSLTD
649 | TINPLATE
650 | NILKAMAL
651 | TATVA
652 | IBREALEST
653 | SSWL
654 | PATELENG
655 | DISHTV
656 | ARVIND
657 | SHANTIGEAR
658 | DBL
659 | NOCIL
660 | DHANUKA
661 | ASTRAMICRO
662 | WOCKPHARMA
663 | CHOICEIN
664 | PFOCUS
665 | NFL
666 | ETHOSLTD
667 | WELENT
668 | MOLDTKPAC
669 | TASTYBITE
670 | GLOBUSSPR
671 | BANARISUG
672 | FORCEMOT
673 | IFBIND
674 | ADVENZYMES
675 | PGEL
676 | ELECTCAST
677 | SAKSOFT
678 | PRICOLLTD
679 | SUDARSCHEM
680 | AUTOAXLES
681 | DATAMATICS
682 | PTC
683 | TI
684 | NAVNETEDUL
685 | JAICORPLTD
686 | GOKEX
687 | MAITHANALL
688 | TATASTLLP
689 | BBOX
690 | WABAG
691 | KSCL
692 | KIRLOSIND
693 | GOODYEAR
694 | WSTCSTPAPR
695 | IKIO
696 | GREAVESCOT
697 | WONDERLA
698 | TARSONS
699 | UFLEX
700 | BSHSL
701 | FCL
702 | JTLIND
703 | DALMIASUG
704 | SOMANYCERA
705 | TIMETECHNO
706 | THYROCARE
707 | GABRIEL
708 | BAJAJCON
709 | INDOCO
710 | AARTIPHARM
711 | ITDCEM
712 | APOLLOPIPE
713 | HEMIPROP
714 | KPIGREEN
715 | KOVAI
716 | LANDMARK
717 | MAHLOG
718 | HCC
719 | NUCLEUS
720 | RAMKY
721 | ORIENTCEM
722 | JAYNECOIND
723 | UNIPARTS
724 | RAJRILTD
725 | MAXVIL
726 | MSTCLTD
727 | HINDOILEXP
728 | APCOTEXIND
729 | ITDC
730 | SUBROS
731 | ORCHPHARMA
732 | KOLTEPATIL
733 | JCHAC
734 | STYLAMIND
735 | IFCI
736 | JINDALPOLY
737 | TEXRAIL
738 | SHILPAMED
739 | DIVGIITTS
740 | MBAPL
741 | HATHWAY
742 | SAGCEM
743 | IWEL
744 | VENKEYS
745 | DYNAMATECH
746 | UNICHEMLAB
747 | TATAMETALI
748 | DBREALTY
749 | RTNPOWER
750 | PARAS
751 | PSPPROJECT
752 | TCNSBRANDS
753 | BARBEQUE
754 | BESTAGRO
755 | SIYSIL
756 | ASHOKA
757 | VSTTILLERS
758 | DCXINDIA
759 | JISLJALEQS
760 | SDBL
761 | IPL
762 | JKIL
763 | ASTEC
764 | FIEMIND
765 | VINDHYATEL
766 | ISMTLTD
767 | HERITGFOOD
768 | LUMAXTECH
769 | SANGHVIMOV
770 | GRAUWEIL
771 | SHARDAMOTR
772 | EXPLEOSOL
773 | EVEREADY
774 | CAMLINFINE
775 | DCAL
776 | SWARAJENG
777 | VENUSPIPES
778 | GULFOILLUB
779 | BAJAJHIND
780 | FINOPB
781 | UGROCAP
782 | CARTRADE
783 | TVSSRICHAK
784 | BOMDYEING
785 | ADFFOODS
786 | THANGAMAYL
787 | JAGRAN
788 | BANCOINDIA
789 | PRECAM
790 | GUFICBIO
791 | PURVA
792 | ORISSAMINE
793 | KINGFA
794 | SANGHIIND
795 | IMAGICAA
796 | BALMLAWRIE
797 | GANECOS
798 | PAISALO
799 | INDOSTAR
800 | CIGNITITEC
801 | AURIONPRO
802 | KESORAMIND
803 | NRBBEARING
804 | PILANIINVS
805 | BEPL
806 | MAYURUNIQ
807 | MMFL
808 | CARERATING
809 | HIL
810 | SUNDARMHLD
811 | HONDAPOWER
812 | IOLCP
813 | SEQUENT
814 | CONFIPET
815 | TARC
816 | GREENPLY
817 | ASHIANA
818 | ACCELYA
819 | RUPA
820 | BBL
821 | DHANI
822 | BUTTERFLY
823 | VADILALIND
824 | MOL
825 | VISHNU
826 | SANDHAR
827 | SOTL
828 | FMGOETZE
829 | STYRENIX
830 | GRWRHITECH
831 | DOLLAR
832 | SHALBY
833 | ATFL
834 | WENDT
835 | MANORAMA
836 | GNA
837 | HUHTAMAKI
838 | MPSLTD
839 | ALEMBICLTD
840 | IIFLSEC
841 | AMRUTANJAN
842 | LUMAXIND
843 | JPASSOCIAT
844 | VIDHIING
845 | APTECHT
846 | SIRCA
847 | TIRUMALCHM
848 | DIAMONDYD
849 | NOVARTIND
850 | ANUP
851 | SUPRIYA
852 | REPCOHOME
853 | DHAMPURSUG
854 | SURYODAY
855 | GATI
856 | INDIAGLYCO
857 | CARYSIL
858 | SESHAPAPER
859 | TIDEWATER
860 | NELCO
861 | GOCLCORP
862 | IMFA
863 | SEAMECLTD
864 | OPTIEMUS
865 | WHEELS
866 | KDDL
867 | MUKANDLTD
868 | SUBEXLTD
869 | CENTUM
870 | TTKHLTCARE
871 | AXISCADES
872 | JAIBALAJI
873 | RAMASTEEL
874 | ARMANFIN
875 | SPICEJET
876 | SJS
877 | PARAGMILK
878 | PANAMAPET
879 | DWARKESH
880 | COSMOFIRST
881 | INOXGREEN
882 | GALLANTT
883 | ARTEMISMED
884 | NACLIND
885 | SKIPPER
886 | MONTECARLO
887 | SERVOTECH
888 | DEN
889 | FOSECOIND
890 | JSWISPL
891 | XPROINDIA
892 | HARIOMPIPE
893 | SHANKARA
894 | ADORWELD
895 | PRECWIRE
896 | ANDHRAPAP
897 | SALASAR
898 | VAKRANGEE
899 | PIXTRANS
900 | FILATEX
901 | KSL
902 | KUANTUM
903 | TAJGVK
904 | RPGLIFE
905 | CAPACITE
906 | CANTABIL
907 | GIPCL
908 | SEPC
909 | RANEHOLDIN
910 | ROSSELLIND
911 | SATIN
912 | ORIENTHOT
913 | AHL
914 | GOKULAGRO
915 | SHK
916 | EIHAHOTELS
917 | UNIVCABLES
918 | FAIRCHEMOR
919 | SMLISUZU
920 | PRAKASH
921 | VSSL
922 | GTPL
923 | ARVSMART
924 | ANDHRSUGAR
925 | SANGAMIND
926 | STOVEKRAFT
927 | IGARASHI
928 | RAMCOIND
929 | HESTERBIO
930 | MOREPENLAB
931 | KABRAEXTRU
932 | NDTV
933 | MARATHON
934 | REFEX
935 | TCPLPACK
936 | KCP
937 | HARDWYN
938 | SASKEN
939 | JASH
940 | STEELXIND
941 | RIIL
942 |
--------------------------------------------------------------------------------
/py/ai/fininsightgpt/src/document_processor.py:
--------------------------------------------------------------------------------
1 | """
2 | Document Processor Module
3 |
4 | This module handles the conversion of various file formats to markdown text.
5 | Supported formats: txt, pdf, docx, pptx, xlsx, images
6 | """
7 |
8 | import os
9 | import re
10 | import logging
11 | import base64
12 | import json
13 | from pathlib import Path
14 | from typing import Dict, List, Optional, Tuple, Any
15 | import datetime
16 |
17 | # Load environment variables from .env file
18 | try:
19 | from dotenv import load_dotenv
20 | load_dotenv() # Load variables from .env
21 | ENV_LOADED = True
22 | except ImportError:
23 | ENV_LOADED = False
24 | logging.warning("dotenv not found, environment variables must be set manually")
25 |
26 | # Configure logging
27 | logging.basicConfig(
28 | level=logging.INFO,
29 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
30 | )
31 | logger = logging.getLogger(__name__)
32 |
33 | # Get model IDs and config from environment variables
34 | OPENAI_VISION_MODEL = os.environ.get("OPENAI_VISION_MODEL", "gpt-4-vision-preview")
35 | OPENAI_TEXT_MODEL = os.environ.get("OPENAI_TEXT_MODEL", "gpt-4-turbo")
36 | # Flag to enable/disable LLM prompt logging (default: enabled)
37 | ENABLE_LOGGING = os.environ.get("ENABLE_LLM_LOGGING", "true").lower() == "true"
38 |
39 | # Try to import optional dependencies, with graceful fallbacks
40 | try:
41 | import fitz # PyMuPDF
42 | PDF_EXTRACTOR = "pymupdf"
43 | except ImportError:
44 | PDF_EXTRACTOR = None
45 | logger.warning("PyMuPDF not found. PDF extraction will be limited.")
46 |
47 | try:
48 | import docx
49 | DOCX_AVAILABLE = True
50 | except ImportError:
51 | DOCX_AVAILABLE = False
52 | logger.warning("python-docx not found. DOCX extraction will be unavailable.")
53 |
54 | try:
55 | from pptx import Presentation
56 | PPTX_AVAILABLE = True
57 | except ImportError:
58 | PPTX_AVAILABLE = False
59 | logger.warning("python-pptx not found. PPTX extraction will be unavailable.")
60 |
61 | try:
62 | import pandas as pd
63 | PANDAS_AVAILABLE = True
64 | except ImportError:
65 | PANDAS_AVAILABLE = False
66 | logger.warning("pandas not found. XLSX extraction will be unavailable.")
67 |
68 | try:
69 | from PIL import Image
70 | import pytesseract
71 | OCR_AVAILABLE = True
72 | except ImportError:
73 | OCR_AVAILABLE = False
74 | logger.warning("PIL or pytesseract not found. OCR will be unavailable.")
75 |
76 | try:
77 | import openai
78 | OPENAI_AVAILABLE = True
79 | except ImportError:
80 | OPENAI_AVAILABLE = False
81 | logger.warning("OpenAI library not found. Advanced image analysis will be unavailable.")
82 |
83 |
84 | def extract_from_txt(file_path: str) -> str:
85 | """Extract text from a plain text file.
86 |
87 | Args:
88 | file_path: Path to the text file
89 |
90 | Returns:
91 | Extracted text content
92 | """
93 | try:
94 | with open(file_path, 'r', encoding='utf-8') as f:
95 | return f.read()
96 | except UnicodeDecodeError:
97 | # Try with different encodings if utf-8 fails
98 | try:
99 | with open(file_path, 'r', encoding='latin-1') as f:
100 | return f.read()
101 | except Exception as e:
102 | logger.error(f"Error reading text file {file_path}: {str(e)}")
103 | return f"ERROR: Could not read {file_path} due to encoding issues."
104 |
105 |
106 | def extract_from_pdf(file_path: str) -> str:
107 | """Extract text from a PDF file.
108 |
109 | Args:
110 | file_path: Path to the PDF file
111 |
112 | Returns:
113 | Extracted text content
114 | """
115 | if PDF_EXTRACTOR == "pymupdf":
116 | try:
117 | text_content = []
118 | with fitz.open(file_path) as doc:
119 | for page_num, page in enumerate(doc):
120 | text = page.get_text()
121 | text_content.append(f"# Page {page_num + 1}\n\n{text}\n\n")
122 | return "\n".join(text_content)
123 | except Exception as e:
124 | logger.error(f"Error extracting text from PDF {file_path}: {str(e)}")
125 | return f"ERROR: Could not extract text from {file_path}."
126 | else:
127 | logger.error("No PDF extraction library available")
128 | return "ERROR: PDF extraction requires PyMuPDF. Please install with: pip install pymupdf"
129 |
130 |
131 | def extract_from_docx(file_path: str) -> str:
132 | """Extract text from a DOCX file.
133 |
134 | Args:
135 | file_path: Path to the DOCX file
136 |
137 | Returns:
138 | Extracted text content
139 | """
140 | if not DOCX_AVAILABLE:
141 | return "ERROR: DOCX extraction requires python-docx. Please install with: pip install python-docx"
142 |
143 | try:
144 | doc = docx.Document(file_path)
145 | full_text = []
146 |
147 | for para in doc.paragraphs:
148 | full_text.append(para.text)
149 |
150 | # Add tables
151 | for table in doc.tables:
152 | for row in table.rows:
153 | row_text = " | ".join([cell.text for cell in row.cells])
154 | full_text.append(f"| {row_text} |")
155 |
156 | return "\n\n".join(full_text)
157 | except Exception as e:
158 | logger.error(f"Error extracting text from DOCX {file_path}: {str(e)}")
159 | return f"ERROR: Could not extract text from {file_path}."
160 |
161 |
162 | def extract_from_pptx(file_path: str) -> str:
163 | """Extract text from a PPTX file.
164 |
165 | Args:
166 | file_path: Path to the PPTX file
167 |
168 | Returns:
169 | Extracted text content
170 | """
171 | if not PPTX_AVAILABLE:
172 | return "ERROR: PPTX extraction requires python-pptx. Please install with: pip install python-pptx"
173 |
174 | try:
175 | presentation = Presentation(file_path)
176 | text_content = []
177 |
178 | for slide_num, slide in enumerate(presentation.slides):
179 | slide_text = []
180 | slide_text.append(f"# Slide {slide_num + 1}")
181 |
182 | for shape in slide.shapes:
183 | if hasattr(shape, "text") and shape.text.strip():
184 | slide_text.append(shape.text)
185 |
186 | text_content.append("\n\n".join(slide_text))
187 |
188 | return "\n\n---\n\n".join(text_content)
189 | except Exception as e:
190 | logger.error(f"Error extracting text from PPTX {file_path}: {str(e)}")
191 | return f"ERROR: Could not extract text from {file_path}."
192 |
193 |
194 | def extract_from_xlsx(file_path: str) -> str:
195 | """Extract data from an Excel file.
196 |
197 | Args:
198 | file_path: Path to the Excel file
199 |
200 | Returns:
201 | Extracted data as markdown tables
202 | """
203 | if not PANDAS_AVAILABLE:
204 | return "ERROR: Excel extraction requires pandas. Please install with: pip install pandas openpyxl"
205 |
206 | try:
207 | result = []
208 | # Read all sheets
209 | excel_file = pd.ExcelFile(file_path)
210 |
211 | for sheet_name in excel_file.sheet_names:
212 | df = pd.read_excel(file_path, sheet_name=sheet_name)
213 |
214 | # Convert to markdown table
215 | md_table = f"## Sheet: {sheet_name}\n\n"
216 | md_table += df.to_markdown(index=False)
217 | result.append(md_table)
218 |
219 | return "\n\n---\n\n".join(result)
220 | except Exception as e:
221 | logger.error(f"Error extracting data from Excel {file_path}: {str(e)}")
222 | return f"ERROR: Could not extract data from {file_path}."
223 |
224 |
225 | def log_llm_prompt(
226 | company_name: str,
227 | phase: str,
228 | section: str,
229 | messages: List[Dict[str, Any]],
230 | model: str,
231 | temperature: float,
232 | max_tokens: int,
233 | run_timestamp: Optional[str] = None
234 | ) -> None:
235 | """Log the prompt sent to the LLM.
236 |
237 | Args:
238 | company_name: Name of the company
239 | phase: Phase of processing (e.g., 'document_processing')
240 | section: Section being generated (e.g., 'image_analysis')
241 | messages: Messages sent to the LLM
242 | model: Model name
243 | temperature: Temperature setting
244 | max_tokens: Max tokens setting
245 | run_timestamp: Optional timestamp to use for the log filename. If provided,
246 | appends to an existing log file with this timestamp.
247 | """
248 | if not ENABLE_LOGGING:
249 | logger.info("LLM logging is disabled. Skipping log entry.")
250 | return
251 |
252 | # Create logs directory
253 | company_logs_dir = Path(f"company_data/{company_name}/logs")
254 | company_logs_dir.mkdir(exist_ok=True, parents=True)
255 |
256 | # Generate timestamp for the log file or use provided one
257 | timestamp = run_timestamp if run_timestamp else datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
258 | log_filename = f"{company_name}_{phase}_{timestamp}.log"
259 | log_path = company_logs_dir / log_filename
260 |
261 | # Prepare log entry
262 | log_entry = {
263 | "timestamp": datetime.datetime.now().isoformat(),
264 | "company": company_name,
265 | "phase": phase,
266 | "section": section,
267 | "model": model,
268 | "temperature": temperature,
269 | "max_tokens": max_tokens,
270 | "messages": messages
271 | }
272 |
273 | # Append to log file
274 | try:
275 | # Create file if it doesn't exist
276 | if not log_path.exists():
277 | with open(log_path, 'w', encoding='utf-8') as f:
278 | f.write(f"# LLM Interaction Log for {company_name}\n")
279 | f.write(f"# Phase: {phase}\n")
280 | f.write(f"# Created: {timestamp}\n\n")
281 |
282 | # Append log entry
283 | with open(log_path, 'a', encoding='utf-8') as f:
284 | f.write(f"\n## {section} - {datetime.datetime.now().isoformat()}\n")
285 | f.write(json.dumps(log_entry, indent=2))
286 | f.write("\n\n---\n\n")
287 |
288 | logger.info(f"Logged LLM prompt for {company_name}/{phase}/{section} to {log_path}")
289 | except Exception as e:
290 | logger.error(f"Failed to log LLM prompt: {str(e)}")
291 |
292 |
293 | def extract_from_image(file_path: str, run_timestamp: Optional[str] = None) -> str:
294 | """Extract text from an image using OCR.
295 |
296 | Args:
297 | file_path: Path to the image file
298 | run_timestamp: Optional timestamp for consistent log file naming
299 |
300 | Returns:
301 | Extracted text content
302 | """
303 | if not OCR_AVAILABLE:
304 | return "ERROR: Image extraction requires Pillow and pytesseract. Please install with: pip install Pillow pytesseract"
305 |
306 | try:
307 | image = Image.open(file_path)
308 | text = pytesseract.image_to_string(image)
309 |
310 | # Use OpenAI for better image understanding if available
311 | if OPENAI_AVAILABLE and os.environ.get("OPENAI_API_KEY"):
312 | try:
313 | # Get company name from file path
314 | file_path_obj = Path(file_path)
315 | company_name = file_path_obj.parent.name
316 |
317 | # Create a timestamp for this processing run if not provided
318 | if run_timestamp is None:
319 | run_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
320 |
321 | # Try to get a better description using OpenAI's vision capabilities
322 | client = openai.Client(api_key=os.environ["OPENAI_API_KEY"])
323 | with open(file_path, "rb") as image_file:
324 | base_image = image_file.read()
325 |
326 | # Prepare messages
327 | messages = [
328 | {
329 | "role": "user",
330 | "content": [
331 | {"type": "text", "text": "Describe this image in detail, focusing on any financial data, charts, or business information visible."},
332 | {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64.b64encode(base_image).decode('utf-8')}"}}
333 | ]
334 | }
335 | ]
336 |
337 | # Log the prompt
338 | log_llm_prompt(
339 | company_name=company_name,
340 | phase="document_processing",
341 | section=f"image_analysis_{file_path_obj.stem}",
342 | messages=messages,
343 | model=OPENAI_VISION_MODEL, # Use global variable
344 | temperature=0.3,
345 | max_tokens=300,
346 | run_timestamp=run_timestamp
347 | )
348 |
349 | response = client.chat.completions.create(
350 | model=OPENAI_VISION_MODEL, # Use global variable
351 | messages=messages,
352 | temperature=0.3,
353 | max_tokens=300
354 | )
355 | vision_description = response.choices[0].message.content
356 | return f"## OCR Text:\n\n{text}\n\n## Image Analysis:\n\n{vision_description}"
357 | except Exception as e:
358 | logger.warning(f"OpenAI vision processing failed: {str(e)}")
359 | return f"## OCR Text:\n\n{text}"
360 | else:
361 | return f"## OCR Text:\n\n{text}"
362 | except Exception as e:
363 | logger.error(f"Error extracting text from image {file_path}: {str(e)}")
364 | return f"ERROR: Could not extract text from {file_path}."
365 |
366 |
367 | def convert_to_markdown(file_path: str, run_timestamp: Optional[str] = None) -> Tuple[str, str]:
368 | """Convert various file formats to markdown text.
369 |
370 | Args:
371 | file_path: Path to the file
372 | run_timestamp: Optional timestamp for consistent log naming across a run
373 |
374 | Returns:
375 | Tuple of (markdown_content, file_name)
376 | """
377 | file_path = Path(file_path)
378 | file_ext = file_path.suffix.lower()[1:] # Remove the dot
379 | file_name = file_path.stem
380 |
381 | content = f"# {file_name}\n\n"
382 | content += f"Source: {file_path}\n"
383 | content += f"Processed on: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n---\n\n"
384 |
385 | try:
386 | if file_ext == "txt":
387 | extracted = extract_from_txt(str(file_path))
388 | elif file_ext == "pdf":
389 | extracted = extract_from_pdf(str(file_path))
390 | elif file_ext == "docx":
391 | extracted = extract_from_docx(str(file_path))
392 | elif file_ext == "pptx":
393 | extracted = extract_from_pptx(str(file_path))
394 | elif file_ext in ["xlsx", "xls"]:
395 | extracted = extract_from_xlsx(str(file_path))
396 | elif file_ext in ["jpg", "jpeg", "png", "gif", "bmp"]:
397 | extracted = extract_from_image(str(file_path), run_timestamp)
398 | else:
399 | extracted = f"Unsupported file format: {file_ext}"
400 | logger.warning(f"Unsupported file format: {file_ext}")
401 |
402 | content += extracted
403 |
404 | except Exception as e:
405 | logger.error(f"Error processing {file_path}: {str(e)}")
406 | content += f"ERROR: Failed to process file {file_path}. Exception: {str(e)}"
407 |
408 | return content, f"{file_name}.md"
409 |
410 |
411 | def process_company_folder(company_folder: str) -> List[Tuple[str, str]]:
412 | """Process all files in a company folder.
413 |
414 | Args:
415 | company_folder: Path to the company folder
416 |
417 | Returns:
418 | List of tuples (markdown_content, markdown_file_path)
419 | """
420 | logger.info(f"Processing company folder: {company_folder}")
421 | company_path = Path(company_folder)
422 |
423 | if not company_path.exists() or not company_path.is_dir():
424 | logger.error(f"Company folder does not exist: {company_folder}")
425 | return []
426 |
427 | # Get company name from folder name
428 | company_name = company_path.name
429 |
430 | # Create output folders proactively
431 | output_folder = company_path / "processed"
432 | output_folder.mkdir(exist_ok=True)
433 |
434 | # Create logs directory proactively
435 | logs_folder = company_path / "logs"
436 | logs_folder.mkdir(exist_ok=True)
437 | logger.info(f"Ensured logs directory exists: {logs_folder}")
438 |
439 | # Create a single timestamp for this processing run
440 | run_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
441 |
442 | results = []
443 |
444 | # Process all files in the folder
445 | for file_path in company_path.glob("*"):
446 | if file_path.is_file() and not file_path.name.startswith('.') and not file_path.name.endswith('.md'):
447 | logger.info(f"Processing file: {file_path}")
448 |
449 | # Convert the file to markdown using the common run timestamp
450 | markdown_content, markdown_name = convert_to_markdown(str(file_path), run_timestamp)
451 |
452 | # Save the markdown file
453 | markdown_path = output_folder / markdown_name
454 | with open(markdown_path, 'w', encoding='utf-8') as f:
455 | f.write(markdown_content)
456 |
457 | results.append((markdown_content, str(markdown_path)))
458 |
459 | logger.info(f"Processed {len(results)} files for company: {company_name}")
460 | return results
--------------------------------------------------------------------------------