├── py
    ├── ai
    │   ├── fininsightgpt
    │   │   ├── src
    │   │   │   ├── __init__.py
    │   │   │   ├── requirements.txt
    │   │   │   ├── master_file_generator.py
    │   │   │   ├── main.py
    │   │   │   └── document_processor.py
    │   │   ├── company_data
    │   │   │   └── Description.txt
    │   │   ├── README.md
    │   │   └── prompt_master
    │   │   │   └── Equity_Research_Report_Template.md
    │   ├── market_analyzer
    │   │   ├── .env
    │   │   ├── output
    │   │   │   ├── Description.txt
    │   │   │   └── Avanti feeds_chat_log_20250323_211534.pdf
    │   │   ├── stock_chat.py
    │   │   ├── requirements.txt
    │   │   └── analysis_utils.py
    │   ├── turnaround
    │   │   ├── output
    │   │   │   └── description.txt
    │   │   ├── my_tools
    │   │   │   ├── __init__.py
    │   │   │   ├── web_fetcher.py
    │   │   │   ├── markdown_report.py
    │   │   │   ├── fs_reader.py
    │   │   │   └── cmd_executor.py
    │   │   ├── requirements.txt
    │   │   ├── data
    │   │   │   └── financial_data.csv
    │   │   ├── main.py
    │   │   └── README.md
    │   ├── nse_announcements
    │   │   ├── requirements.txt
    │   │   └── weekly_nse_announcements_analysis.py
    │   └── newsarranger
    │   │   ├── requirements.txt
    │   │   └── get_news_arrange.py
    ├── eodhd
    │   ├── price_data
    │   │   ├── RELIANCE_M.csv
    │   │   ├── RELIANCE_W.csv
    │   │   └── RELIANCE_D.csv
    │   ├── pricereader.py
    │   ├── ath_scan.py
    │   ├── my_rsi.py
    │   ├── how_many_weeks_high.py
    │   ├── saucer_crs.py
    │   ├── gareebman_entry_exit.py
    │   ├── mip12_scanner.py
    │   └── stocks.csv
    ├── beta
    │   ├── chatgpt
    │   │   ├── model.py
    │   │   └── generate_report_for_company.py
    │   └── concall_transcript_summarize.py
    └── yf
    │   ├── daily_rs_55_bo.py
    │   ├── glb_scan.py
    │   ├── ars_srs_scan.py
    │   ├── weeklyRSIVolStopBO.py
    │   ├── newHighMonthly.py
    │   ├── multimonthBO.py
    │   ├── box_scan.py
    │   ├── green_dot.py
    │   ├── trendreversal_ha.py
    │   ├── supply_exhaustion_6m_scan.py
    │   ├── ss_result_parser.py
    │   ├── limevolume.py
    │   └── stock_sector_strength.py
├── .gitignore
└── README.md


/py/ai/fininsightgpt/src/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/py/ai/market_analyzer/.env:
--------------------------------------------------------------------------------
1 | GOOGLE_API_KEY='YOUR API KEY'
2 | 


--------------------------------------------------------------------------------
/py/ai/turnaround/output/description.txt:
--------------------------------------------------------------------------------
1 | Output reports will be here
2 | 


--------------------------------------------------------------------------------
/py/ai/market_analyzer/output/Description.txt:
--------------------------------------------------------------------------------
1 | Your outputs are saved here
2 | 


--------------------------------------------------------------------------------
/py/ai/fininsightgpt/company_data/Description.txt:
--------------------------------------------------------------------------------
1 | Create folder for each business (company) you wish to analyse and place all documents here.


--------------------------------------------------------------------------------
/py/ai/market_analyzer/output/Avanti feeds_chat_log_20250323_211534.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QueryType/bharattrader/HEAD/py/ai/market_analyzer/output/Avanti feeds_chat_log_20250323_211534.pdf


--------------------------------------------------------------------------------
/py/ai/fininsightgpt/src/requirements.txt:
--------------------------------------------------------------------------------
 1 | pymupdf>=1.22.5
 2 | python-docx>=0.8.11
 3 | python-pptx>=0.6.21
 4 | pandas>=2.0.0
 5 | openpyxl>=3.1.2
 6 | Pillow>=10.0.0
 7 | pytesseract>=0.3.10
 8 | openai>=1.3.0
 9 | tiktoken>=0.5.0
10 | python-dotenv>=1.0.0


--------------------------------------------------------------------------------
/py/ai/turnaround/my_tools/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Tools submodule for turnaround.
 3 | 
 4 | Contains all the individual tool implementations.
 5 | """
 6 | 
 7 | from .fs_reader import fs_reader
 8 | from .cmd_executor import cmd_executor
 9 | from .web_fetcher import search_web
10 | from .markdown_report import save_report
11 | 
12 | __all__ = [
13 |     "fs_reader",
14 |     "cmd_executor",
15 |     "search_web",
16 |     "save_report"
17 | ]
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Environment variables
 2 | py/ai/fininsightgpt/.env
 3 | 
 4 | # Python
 5 | __pycache__/
 6 | *.py[cod]
 7 | *$py.class
 8 | *.so
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # Virtual Environment
27 | venv/
28 | ENV/
29 | env/
30 | 
31 | # IDE specific files
32 | .idea/
33 | .vscode/
34 | *.swp
35 | *.swo
36 | 
37 | # OS specific files
38 | .DS_Store
39 | .DS_Store?
40 | ._*
41 | .Spotlight-V100
42 | .Trashes
43 | ehthumbs.db
44 | Thumbs.db
45 | 
46 | # Jupyter Notebook
47 | .ipynb_checkpoints


--------------------------------------------------------------------------------
/py/eodhd/price_data/RELIANCE_M.csv:
--------------------------------------------------------------------------------
 1 | Date,Open,High,Low,Close,Volume,Adj Close
 2 | 1994-11-03,375.0,400.0,356.75,380.25,879250,380.25
 3 | 1994-12-01,378.5,382.5,329.0,341.2,851600,341.2
 4 | 1995-01-02,341.0,343.0,235.15,272.45,4167200,272.45
 5 | 1995-02-01,275.0,287.0,237.25,271.75,6171750,271.75
 6 | 1995-03-01,275.0,297.0,250.0,265.45,12290250,265.45
 7 | 1995-04-03,267.0,288.05,246.5,249.9,6403250,249.9
 8 | 1995-05-02,249.5,280.0,225.55,271.85,21273350,271.85
 9 | 1995-06-01,274.95,284.0,260.4,264.0,27161750,264.0
10 | 1995-07-03,262.25,303.0,226.35,267.4,26989650,267.4
11 | 1995-08-01,266.0,280.0,256.5,261.85,32501950,261.85
12 | 1995-09-01,261.0,278.0,257.0,275.0,19358500,275.0
13 | 1995-10-04,278.0,290.0,234.25,244.0,44468050,244.0
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # bharattrader
2 | Utilities for trading , scanning and other things.
3 | 
4 | I am not going to pretend that I am a coder wiz. I am just a normal software guy, who has written these codes for my own personal use. I know there are intelligent and smart people out there, who can not only write better code but also develop better algos. In case some people find something valuable here, you can pick up and honour the license under which the code is released.
5 | 
6 | This repo is strictly as-is. Please do not come to me for changes and modifications. The source is licensed under GNU GPL, so please go ahead.
7 | Also, I am not responsible for any trading/investing/financial loss that you encounter after using these codes. Please do your own analysis.
8 | 


--------------------------------------------------------------------------------
/py/ai/turnaround/requirements.txt:
--------------------------------------------------------------------------------
 1 | # AI Agent Framework
 2 | smolagents>=0.3.0
 3 | 
 4 | # OpenAI API Integration
 5 | openai>=1.0.0
 6 | 
 7 | # LLM Model Support
 8 | litellm>=1.0.0
 9 | 
10 | # Environment Variables Management
11 | python-dotenv>=1.0.0
12 | 
13 | # Data Processing
14 | pandas>=2.0.0
15 | numpy>=1.24.0
16 | 
17 | # HTTP Requests (for web fetching)
18 | requests>=2.31.0
19 | 
20 | # JSON Processing (built-in, but some tools might need enhanced support)
21 | jsonschema>=4.17.0
22 | 
23 | # File I/O and CSV Processing (built-in, but for completeness)
24 | # csv - built-in
25 | # os - built-in
26 | # datetime - built-in
27 | 
28 | # Optional: For enhanced web scraping capabilities
29 | beautifulsoup4>=4.12.0
30 | selenium>=4.15.0
31 | 
32 | # Optional: For better logging and debugging
33 | loguru>=0.7.0
34 | 
35 | # Optional: For data validation
36 | pydantic>=2.0.0
37 | 


--------------------------------------------------------------------------------
/py/ai/turnaround/data/financial_data.csv:
--------------------------------------------------------------------------------
 1 | Name,BSE Code,NSE Code
 2 | 63 Moons Tech.,526881,63MOONS
 3 | Apex Frozen Food,540692,APEX
 4 | Arman Financial,531179,ARMANFIN
 5 | Ashima,514286,ASHIMASYN
 6 | Bajaj Hindusthan,500032,BAJAJHIND
 7 | Concord Enviro,544315,CEWATER
 8 | Elpro Internatio,504000,
 9 | Embassy Develop,532832,EMBDL
10 | Graphite India,509488,GRAPHITE
11 | Gujarat Alkalies,530001,GUJALKALI
12 | IFCI,500106,IFCI
13 | Meghmani Organi.,543331,MOL
14 | Munjal Auto Inds,520059,MUNJALAU
15 | Nuvoco Vistas,543334,NUVOCO
16 | PNB Gilts,532366,PNBGILTS
17 | Precision Camshf,539636,PRECAM
18 | Prince Pipes,542907,PRINCEPIPE
19 | Reliance Infra.,500390,RELINFRA
20 | RSWM Ltd,500350,RSWM
21 | S Chand & Compan,540497,SCHAND
22 | Saurashtra Cem.,502175,SAURASHCEM
23 | Snowman Logistic,538635,SNOWMAN
24 | T N Newsprint,531426,TNPL
25 | Texmaco Infrast.,505400,TEXINFRA
26 | Utkarsh Small F.,543942,UTKARSHBNK
27 | Visaka Industrie,509055,VISAKAIND
28 | 


--------------------------------------------------------------------------------
/py/ai/nse_announcements/requirements.txt:
--------------------------------------------------------------------------------
 1 | annotated-types==0.6.0
 2 | anyio==4.2.0
 3 | arxiv==2.1.0
 4 | autogenstudio==0.0.25a0
 5 | boto3==1.34.145
 6 | botocore==1.34.145
 7 | certifi==2023.11.17
 8 | charset-normalizer==3.3.2
 9 | click==8.1.7
10 | diskcache==5.6.3
11 | distro==1.9.0
12 | exceptiongroup==1.2.0
13 | fastapi==0.109.0
14 | feedparser==6.0.10
15 | FLAML==2.1.1
16 | h11==0.14.0
17 | httpcore==1.0.2
18 | httpx==0.26.0
19 | idna==3.6
20 | jiter==0.5.0
21 | jmespath==1.0.1
22 | numpy==1.26.3
23 | openai==1.42.0
24 | packaging==24.1
25 | pandas==2.2.2
26 | pillow==10.4.0
27 | plotly==5.22.0
28 | pyautogen==0.2.6
29 | pydantic==2.5.3
30 | pydantic_core==2.14.6
31 | PyMuPDF==1.24.9
32 | PyMuPDFb==1.24.9
33 | python-dateutil==2.9.0.post0
34 | python-dotenv==1.0.0
35 | pytz==2024.1
36 | regex==2023.12.25
37 | requests==2.31.0
38 | s3transfer==0.10.2
39 | sgmllib3k==1.0.0
40 | six==1.16.0
41 | sniffio==1.3.0
42 | starlette==0.35.1
43 | tenacity==8.5.0
44 | termcolor==2.4.0
45 | tiktoken==0.5.2
46 | tqdm==4.66.1
47 | typer==0.9.0
48 | typing_extensions==4.12.2
49 | tzdata==2024.1
50 | urllib3==2.1.0
51 | uvicorn==0.25.0
52 | 


--------------------------------------------------------------------------------
/py/ai/turnaround/my_tools/web_fetcher.py:
--------------------------------------------------------------------------------
 1 | from smolagents import tool
 2 | import os
 3 | from openai import OpenAI
 4 | import json
 5 | 
 6 | model="gpt-4.1-mini"
 7 | client = OpenAI()
 8 | 
 9 | @tool
10 | def search_web(query: str) -> str:
11 |     """
12 |     This tool searches the web for the given query and returns the results.
13 |     It is useful for gathering information from the web to assist in decision-making or analysis.
14 |     Args:
15 |         query (str): The search query to use. Be as specific as possible to get relevant results.
16 |     Returns:
17 |         str: The search results or an error message if the search fails. It is json formatted string.
18 |     """
19 |     # check if the file exists on the filesystem
20 |     if not query:
21 |         return "No file path provided."
22 |     
23 |     response = client.responses.create(
24 |         model=model,  # or another supported model
25 |         input=query,
26 |         tools=[
27 |             {
28 |                 "type": "web_search"
29 |             }
30 |         ]
31 |     )
32 |     return json.dumps(response.output, default=lambda o: o.__dict__, indent=2)


--------------------------------------------------------------------------------
/py/ai/turnaround/my_tools/markdown_report.py:
--------------------------------------------------------------------------------
 1 | from smolagents import tool
 2 | import datetime
 3 | 
 4 | instructions = """You are simple file writer tool that dumps the input text into a file."""
 5 | 
 6 | @tool
 7 | def save_report(md_report: str, business_name: str) -> None:
 8 |     """
 9 |     This tool saves a markdown formatted report to a file.
10 |     Args:
11 |         md_report (str): The markdown report content to save.
12 |         business_name (str): The name of the business for which the report is generated.
13 |     Returns:
14 |         None: The function does not return anything, but saves the report to a file.
15 |     """
16 |     # check if the file exists on the filesystem
17 |     if not md_report:
18 |         return "No file path provided."
19 |     
20 |     output_file = f"output/{business_name}" + datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + "_report.md"
21 | 
22 |     # Save the output to a file
23 |     try:
24 |         with open(output_file, "w", encoding="utf-8") as file:
25 |             file.write(md_report)
26 |     except Exception as e:
27 |         return f"An error occurred while saving the report: {str(e)}"


--------------------------------------------------------------------------------
/py/ai/turnaround/my_tools/fs_reader.py:
--------------------------------------------------------------------------------
 1 | from smolagents import tool
 2 | import os
 3 | from huggingface_hub import list_models
 4 | 
 5 | @tool
 6 | def fs_reader(task: str) -> str:
 7 |     """
 8 |     This tool reads a file from the filesystem and returns its content.
 9 |     This can read on plain text files, markdown files, source code files, etc.
10 |     It is useful for reading files that are part of the project or for reading
11 |     files that are provided as input to the agent.
12 |     Args:
13 |         task (str): The path to the file to read.
14 |     Returns:
15 |         str: The content of the file or an error message if the file cannot be read.
16 |     """
17 |     # check if the file exists on the filesystem
18 |     if not task:
19 |         return "No file path provided."
20 |     
21 |     # Expand user path (handle ~ symbol)
22 |     expanded_path = os.path.expanduser(task)
23 |     
24 |     try:
25 |         with open(expanded_path, "r", encoding="utf-8") as file:
26 |             content = file.read()
27 |         return content
28 |     except FileNotFoundError:
29 |         return f"File not found: {expanded_path} (original path: {task})"
30 |     except Exception as e:
31 |         return f"An error occurred while reading the file: {str(e)}"


--------------------------------------------------------------------------------
/py/eodhd/price_data/RELIANCE_W.csv:
--------------------------------------------------------------------------------
 1 | Date,Open,High,Low,Close,Volume,Adj Close
 2 | 1994-11-03,375.0,400.0,375.0,396.0,42650,396.0
 3 | 1994-11-07,396.0,399.0,369.5,372.75,297500,372.75
 4 | 1994-11-14,373.0,390.0,356.75,387.5,259000,387.5
 5 | 1994-11-21,385.0,387.0,367.0,379.5,174750,379.5
 6 | 1994-11-28,377.5,387.0,372.5,374.25,158850,374.25
 7 | 1994-12-05,373.5,382.5,358.5,363.0,223050,363.0
 8 | 1994-12-12,355.0,355.5,329.0,344.0,367400,344.0
 9 | 1994-12-19,345.0,345.5,337.0,344.75,137600,344.75
10 | 1994-12-26,339.0,344.75,336.35,341.2,70050,341.2
11 | 1995-01-02,341.0,343.0,315.0,316.75,235400,316.75
12 | 1995-01-09,319.25,319.25,277.5,286.3,1330100,286.3
13 | 1995-01-16,288.0,294.0,271.0,272.5,724000,272.5
14 | 1995-01-23,274.5,274.5,235.15,256.2,1151100,256.2
15 | 1995-01-30,250.0,282.55,248.0,279.05,1962650,279.05
16 | 1995-02-06,280.0,281.0,262.0,269.85,1348800,269.85
17 | 1995-02-13,270.0,274.7,250.0,255.45,1114200,255.45
18 | 1995-02-20,255.0,282.0,237.25,266.7,1911100,266.7
19 | 1995-02-28,263.5,291.5,258.0,285.5,2423250,285.5
20 | 1995-03-06,286.0,297.0,271.0,286.45,2130300,286.45
21 | 1995-03-13,282.0,290.0,250.0,266.95,4470750,266.95
22 | 1995-03-20,255.0,267.25,250.0,259.75,2033150,259.75
23 | 1995-03-27,264.75,274.45,258.0,265.45,1794400,265.45
24 | 1995-04-03,267.0,288.05,262.0,282.75,1939300,282.75
25 | 1995-04-10,282.0,282.25,275.0,280.7,841750,280.7
26 | 1995-04-17,284.0,286.0,274.25,279.55,1512550,279.55
27 | 1995-04-24,280.0,280.2,246.5,249.9,2109650,249.9
28 | 


--------------------------------------------------------------------------------
/py/eodhd/pricereader.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | base_path = 'price_data'
 3 | 
 4 | def get_price_data(stockname, period):
 5 |     """
 6 |     Fetches stock price data from CSV files for the given stock name and period.
 7 |     Sets the 'Date' column as a DatetimeIndex.
 8 |     
 9 |     :param stockname: Name of the stock (str)
10 |     :param period: List of periods for which to fetch data ['d', 'w', 'm']
11 |     :return: Dictionary of DataFrames with keys as the period
12 |     """
13 | 
14 |     df = pd.DataFrame()
15 |     
16 |     # Mapping of period to file suffix
17 |     period_suffix = {'d': '_D.csv', 'w': '_W.csv', 'm': '_M.csv'}
18 |     
19 | 
20 |     # Construct file path based on stock name and period
21 |     file_path = f"{base_path}/{stockname}{period_suffix[period]}"
22 |     try:
23 |         # Read the data from the file and set the 'Date' column as the index
24 |         df = pd.read_csv(file_path, parse_dates=['Date'])
25 |         df.set_index('Date', inplace=True)
26 |     except FileNotFoundError:
27 |         print(f"No data available for {stockname} for period: {period}")
28 |     
29 |     return df
30 | 
31 | '''
32 | This requires to pass df, after selection of the timeframe
33 | '''
34 | def get_price_daterange(df, start_date, end_date):
35 |     # Ensure the dates are in the correct format
36 |     start_date = pd.to_datetime(start_date)
37 |     end_date = pd.to_datetime(end_date)
38 |     
39 |     # Filter the dataframe
40 |     filtered_df = df[(df.index >= start_date) & (df.index <= end_date)]
41 |     
42 |     return filtered_df
43 | 


--------------------------------------------------------------------------------
/py/ai/newsarranger/requirements.txt:
--------------------------------------------------------------------------------
 1 | annotated-types==0.7.0
 2 | anyio==4.8.0
 3 | beautifulsoup4==4.12.3
 4 | Brotli==1.1.0
 5 | cachetools==5.5.0
 6 | certifi==2024.12.14
 7 | cffi==1.17.1
 8 | charset-normalizer==3.4.1
 9 | cobble==0.1.4
10 | cryptography==44.0.0
11 | cssselect2==0.7.0
12 | defusedxml==0.7.1
13 | distro==1.9.0
14 | docopt==0.6.2
15 | et_xmlfile==2.0.0
16 | fonttools==4.55.3
17 | google-auth==2.37.0
18 | google-genai==0.4.0
19 | h11==0.14.0
20 | httpcore==1.0.7
21 | httpx==0.28.1
22 | idna==3.10
23 | jiter==0.8.2
24 | lxml==5.3.0
25 | mammoth==1.9.0
26 | markdown2==2.5.2
27 | Markdown2PDF==0.1.4
28 | markdownify==0.14.1
29 | -e git+https://github.com/microsoft/markitdown.git@f58a864951da6c720d3e10987371133c67db296a#egg=markitdown
30 | md2pdf==1.0.1
31 | numpy==2.2.1
32 | olefile==0.47
33 | openai==1.59.6
34 | openpyxl==3.1.5
35 | pandas==2.2.3
36 | pathvalidate==3.2.3
37 | pdfkit==1.0.0
38 | pdfminer.six==20240706
39 | pillow==11.1.0
40 | puremagic==1.28
41 | pyasn1==0.6.1
42 | pyasn1_modules==0.4.1
43 | pycparser==2.22
44 | pydantic==2.10.5
45 | pydantic_core==2.27.2
46 | pydub==0.25.1
47 | pydyf==0.11.0
48 | pyphen==0.17.0
49 | python-dateutil==2.9.0.post0
50 | python-dotenv==1.0.1
51 | python-pptx==1.0.2
52 | pytz==2024.2
53 | requests==2.32.3
54 | rsa==4.9
55 | setuptools==75.1.0
56 | six==1.17.0
57 | sniffio==1.3.1
58 | soupsieve==2.6
59 | SpeechRecognition==3.13.0
60 | tinycss2==1.4.0
61 | tinyhtml5==2.0.0
62 | tqdm==4.67.1
63 | typing_extensions==4.12.2
64 | tzdata==2024.2
65 | urllib3==2.3.0
66 | weasyprint==63.1
67 | webencodings==0.5.1
68 | websockets==14.1
69 | wheel==0.44.0
70 | xlrd==2.0.1
71 | XlsxWriter==3.2.0
72 | youtube-transcript-api==0.6.3
73 | zopfli==0.2.3.post1
74 | 


--------------------------------------------------------------------------------
/py/ai/market_analyzer/stock_chat.py:
--------------------------------------------------------------------------------
 1 | from analysis_utils import initialize_client, show_parts, log_message, log_message_r, start_log_file, end_log_file
 2 | from datetime import datetime
 3 | import os
 4 | 
 5 | chat_output_folder = "output"
 6 | my_model = 'gemini-2.0-flash'
 7 | client = initialize_client('GOOGLE_API_KEY')
 8 | 
 9 | def main():
10 |     search_tool = {'google_search': {}}
11 |     stock_chat = client.chats.create(model=my_model, config={'tools': [search_tool]})
12 | 
13 |     while True:
14 |         stock = input('Enter stock or company to chat on (or type bye to leave): ')
15 |         if stock == 'bye':
16 |             break
17 | 
18 |         timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
19 |         log_file = f"{chat_output_folder}/{stock}_chat_log_{timestamp}.html"
20 |         start_log_file(log_file)
21 |         log_message(log_file, f"User selected stock/company: {stock}", "info")
22 | 
23 |         date_now = datetime.now().strftime('%Y-%m-%d')
24 |         stock_prompt_prefix = f'Date today is: {date_now}. Answer following in context of the company/stock_code {stock}, listed in India.\n'
25 |         
26 |         while True:
27 |             input_txt = input('Ask >> : ')
28 |             if input_txt == 'exit':
29 |                 break
30 |             log_message(log_file, f"User input: {input_txt}", "user")
31 |             print("-" * 80)
32 |             response = stock_chat.send_message(f"{stock_prompt_prefix}{input_txt}")
33 |             show_parts(response)
34 |             log_message_r(log_file, response, "model")
35 |             print(f'Working on: {stock}\n Type exit to work on new stock/company.')
36 | 
37 |         end_log_file(log_file)
38 | 
39 | if __name__ == "__main__":
40 |     main()
41 | 


--------------------------------------------------------------------------------
/py/beta/chatgpt/model.py:
--------------------------------------------------------------------------------
 1 | import openai
 2 | import os
 3 | from dotenv import load_dotenv, find_dotenv
 4 | 
 5 | model_name = 'gpt-4' #gpt-3.5-turbo
 6 | 
 7 | def get_completion(prompt, model=model_name):
 8 |     messages = [{"role": "user", "content": prompt}]
 9 |     response = openai.ChatCompletion.create(
10 |         model=model,
11 |         messages=messages,
12 |         temperature=0, # this is the degree of randomness of the model's output
13 |     )
14 |     return response.choices[0].message["content"]
15 | 
16 | def get_completion_large(messages, 
17 |                         model=model_name, 
18 |                         temperature=0, 
19 |                         max_tokens=1000):
20 |     continuation_token = None
21 | 
22 |     while True:
23 |         response = openai.Completion.create(
24 |             model=model,
25 |             messages=messages,
26 |             temperature=temperature, 
27 |             max_tokens=max_tokens,
28 |             continuation_token=continuation_token
29 |         )
30 | 
31 |         chunk = response.choices[0].message['content']
32 |         messages.append({'role': 'system', 'content': chunk})
33 | 
34 |         continuation_token = response['choices'][0]['finish_reason']
35 | 
36 |         if continuation_token == 'stop':
37 |             break
38 | 
39 |     return response.choices[0].message["content"]
40 | 
41 | 
42 | def get_completion_from_messages(messages, 
43 |                                  model=model_name, 
44 |                                  temperature=0, 
45 |                                  max_tokens=500):
46 |     response = openai.ChatCompletion.create(
47 |         model=model,
48 |         messages=messages,
49 |         temperature=temperature, 
50 |         max_tokens=max_tokens,
51 |     )
52 |     return response.choices[0].message["content"]
53 | 
54 | def set_api():
55 |     _ = load_dotenv(find_dotenv()) # read local .env file
56 |     openai.api_key = os.environ['OPENAI_API_KEY']
57 | 


--------------------------------------------------------------------------------
/py/eodhd/ath_scan.py:
--------------------------------------------------------------------------------
 1 | import pricereader as pr
 2 | import pandas as pd
 3 | import time
 4 | 
 5 | # Read the list of stocks from the CSV file
 6 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
 7 | 
 8 | # Set the bar time frame
 9 | data_interval = 'm'
10 | 
11 | # Initialize a list to store the results
12 | results = []
13 | 
14 | # Iterate through the list of stocks
15 | for stock in stocks["Ticker"]:
16 |     try:
17 |         # Get the stock data
18 |         data = pr.get_price_data(stock, data_interval)
19 |         # Drop those with NaN
20 |         data = data.dropna()
21 |         # Drop last row, if 2nd last is already of the month
22 |         if data.index[-1].month == data.index[-2].month:
23 |             # Replace the values in the second-to-last row with the values in the last row
24 |             data.loc[data.index[-2]] = data.loc[data.index[-1]]
25 |             # Delete the last row
26 |             data = data.drop(data.index[-1])
27 | 
28 |         # print(data)
29 |         # data = data.iloc[:-1 , :] // If previous month ATH stocks are desired
30 | 
31 |         # Initialize the ATH to the first close price and the ATH date to the first date
32 |         ath = data.at[data.index[0], 'High']
33 |         ath_date = data.index[0]
34 |         
35 |         data_iter = data.iloc[:-1]
36 | 
37 |         # Loop through each row of the dataframe
38 |         for index, row in  data_iter.iterrows():
39 |             # Update the ATH and ATH date if the current close price is higher
40 |             if row['High'] > ath:
41 |                 ath = row['High']
42 |                 ath_date = index
43 | 
44 |         # print(stock + " green line: " + str(green_line) + " green line date: " + str(green_line_date))
45 |         last_close = data.at[data.index[-1], 'Close']
46 |         
47 |         if last_close > ath:
48 |             # print(stock +" close: " + str(last_close) + " ath: " + str(ath) + " ath  date: " + str(ath_date))
49 |             results.append(stock)
50 | 
51 |     except Exception as e:
52 |         print("Error for ticker: " + stock)
53 |         print(e)
54 | 
55 | # Print the results
56 | print(results)
57 | print("Done")


--------------------------------------------------------------------------------
/py/yf/daily_rs_55_bo.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Detect breakout of CRS from 55 day average
 3 | Daily timeframe
 4 | '''
 5 | 
 6 | import yfinance as yf
 7 | import pandas as pd
 8 | 
 9 | # Set the bar time frame
10 | data_interval = '1d'
11 | 
12 | # Set the time frame to max
13 | time_frame = '1y'
14 | 
15 | # Set CRS average length
16 | average_length = 55
17 | 
18 | # Specify the benchmark symbol
19 | benchmark = "^NSEI"
20 | 
21 | # Read the list of stocks from the CSV file
22 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
23 | 
24 | def main():
25 |     print('Started')
26 | 
27 |     # Use yfinance to retrieve the benchmark data
28 |     benchmark_ticker = yf.Ticker(benchmark)
29 |     benchmark_data = benchmark_ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
30 |     benchmark_data = benchmark_data.dropna()
31 | 
32 |     # Iterate through the list of stocks
33 |     for stock in stocks["Ticker"]:
34 |         try:
35 |             ticker = yf.Ticker(stock+".NS")
36 |             stock_history = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
37 |             stock_history = stock_history.dropna()
38 | 
39 |             # Create a new column in the stock dataframe for relative strength
40 |             rs_column = 'Relative_Strength'
41 |             stock_history[rs_column] = stock_history['Close'] / benchmark_data['Close']
42 | 
43 |             # Calculate the average_length-day moving average of the 'Relative_Strength' column
44 |             crs_average_column = f'{average_length}_RS_MA'
45 |             stock_history[crs_average_column] = stock_history[rs_column].rolling(window=average_length).mean()
46 | 
47 |             # Check if there is a cross over of crs
48 |             isCrossOver = stock_history.iloc[-2][rs_column] <= stock_history.iloc[-2][crs_average_column] and \
49 |                             stock_history.iloc[-1][rs_column] > stock_history.iloc[-1][crs_average_column]
50 |             if (isCrossOver):
51 |                 print(stock)
52 |             
53 |         except Exception as e:
54 |             print(f"Error: {stock} ==> {e}")
55 | 
56 | if __name__ == "__main__":
57 |     main()
58 | 


--------------------------------------------------------------------------------
/py/beta/concall_transcript_summarize.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from PyPDF2 import PdfReader
 3 | from transformers import PegasusTokenizer, PegasusForConditionalGeneration
 4 | 
 5 | #path of the folder where your pdfs are located
 6 | folder_path = "concallpdfs" 
 7 | 
 8 | # Max token size
 9 | max_seq_length = 512
10 | 
11 | # Max token for pegasus financial summarization
12 | max_length_pegasus_fin_summ = 32
13 | 
14 | # Pick model
15 | # model_name = "google/pegasus-xsum" -- used for testing
16 | model_name = "human-centered-summarization/financial-summarization-pegasus"
17 | 
18 | # Load pretrained tokenizer
19 | pegasus_tokenizer = PegasusTokenizer.from_pretrained(model_name)
20 | 
21 | # Make model from pre-trained model
22 | model = PegasusForConditionalGeneration.from_pretrained(model_name)
23 | 
24 | for filename in os.listdir(folder_path):
25 |     if filename.endswith(".pdf"):
26 |         pdf_path = os.path.join(folder_path, filename)
27 |         with open(pdf_path, "rb") as file:
28 |             print(f'Summarizing {filename}')
29 |             reader = PdfReader(file)
30 |             page_summaries = []
31 |             count = 0
32 |             for page in reader.pages: # summarize page by page
33 |                 page_text = page.extract_text()
34 |                 # Generate input tokens
35 |                 input_ids = pegasus_tokenizer(page_text,  max_length=max_seq_length, truncation=True, return_tensors="pt").input_ids
36 |                 # Generate Summary
37 |                 summary_ids = model.generate(input_ids, max_length=max_length_pegasus_fin_summ, num_beams=5, early_stopping=True)
38 |                 tgt_texts = pegasus_tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
39 |                 page_summaries.append(tgt_texts[0])
40 |                 count = count + 1
41 |                 # print(f'{count} page(s) done')
42 |             # Merge all page summaries
43 |             merged_summary = "\n".join(page_summaries)
44 |             # Write the merged summary to a file
45 |             with open(f'{folder_path}/{filename}_summary.txt', 'w') as f:
46 |                 f.write(merged_summary)
47 |         print(f'{filename} done')
48 | 


--------------------------------------------------------------------------------
/py/ai/market_analyzer/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiofiles==23.2.1
 2 | annotated-types==0.6.0
 3 | anthropic==0.34.2
 4 | anyio==4.3.0
 5 | attrs==24.2.0
 6 | black==24.10.0
 7 | cachetools==5.3.2
 8 | cattrs==23.2.3
 9 | certifi==2023.11.17
10 | charset-normalizer==3.3.2
11 | click==8.1.7
12 | colorama==0.4.6
13 | dill==0.3.9
14 | distro==1.9.0
15 | dnspython==2.7.0
16 | ell-ai==0.0.14
17 | email_validator==2.2.0
18 | exceptiongroup==1.2.0
19 | fastapi==0.115.6
20 | fastapi-cli==0.0.5
21 | ffmpy==0.5.0
22 | filelock==3.16.1
23 | fsspec==2024.10.0
24 | google-ai-generativelanguage==0.6.10
25 | google-api-core==2.15.0
26 | google-api-python-client==2.125.0
27 | google-auth==2.25.2
28 | google-auth-httplib2==0.2.0
29 | google-genai==0.1.0
30 | google-generativeai==0.8.3
31 | googleapis-common-protos==1.62.0
32 | gradio==5.9.1
33 | gradio_client==1.5.2
34 | groq==0.11.0
35 | grpcio==1.60.0
36 | grpcio-status==1.60.0
37 | h11==0.14.0
38 | httpcore==1.0.5
39 | httplib2==0.22.0
40 | httptools==0.6.4
41 | httpx==0.27.0
42 | huggingface-hub==0.26.2
43 | idna==3.6
44 | Jinja2==3.1.4
45 | jiter==0.7.0
46 | markdown-it-py==3.0.0
47 | markdown2==2.5.2
48 | MarkupSafe==2.1.5
49 | mdurl==0.1.2
50 | mypy-extensions==1.0.0
51 | numpy==2.1.3
52 | openai==1.54.3
53 | orjson==3.10.12
54 | packaging==24.2
55 | pandas==2.2.3
56 | pathspec==0.12.1
57 | pillow==10.4.0
58 | platformdirs==4.3.6
59 | proto-plus==1.23.0
60 | protobuf==4.25.1
61 | psutil==5.9.8
62 | pyasn1==0.5.1
63 | pyasn1-modules==0.3.0
64 | pydantic==2.7.0
65 | pydantic_core==2.18.1
66 | pydub==0.25.1
67 | Pygments==2.18.0
68 | pyparsing==3.1.2
69 | python-dateutil==2.9.0.post0
70 | python-dotenv==1.0.0
71 | python-multipart==0.0.20
72 | pytz==2024.2
73 | PyYAML==6.0.2
74 | requests==2.32.3
75 | rich==13.9.4
76 | rsa==4.9
77 | ruff==0.8.4
78 | safehttpx==0.1.6
79 | semantic-version==2.10.0
80 | shellingham==1.5.4
81 | six==1.17.0
82 | sniffio==1.3.1
83 | SQLAlchemy==2.0.36
84 | sqlmodel==0.0.21
85 | starlette==0.41.3
86 | tokenizers==0.20.3
87 | tomli==2.0.2
88 | tomlkit==0.13.2
89 | tqdm==4.66.1
90 | typer==0.13.0
91 | typing_extensions==4.12.2
92 | tzdata==2024.2
93 | uritemplate==4.1.1
94 | urllib3==2.1.0
95 | uvicorn==0.30.6
96 | uvloop==0.21.0
97 | watchfiles==0.24.0
98 | websockets==14.0
99 | 


--------------------------------------------------------------------------------
/py/ai/market_analyzer/analysis_utils.py:
--------------------------------------------------------------------------------
 1 | # analysis_utils.py
 2 | 
 3 | import os
 4 | import json
 5 | from datetime import datetime
 6 | from rich.console import Console
 7 | from rich.markdown import Markdown
 8 | import markdown2
 9 | from dotenv import load_dotenv, find_dotenv
10 | from google import genai
11 | 
12 | console = Console()
13 | 
14 | def initialize_client(api_key_env_var):
15 |     load_dotenv(find_dotenv())
16 |     api_key = os.getenv(api_key_env_var)
17 |     if not api_key:
18 |         raise ValueError(f"API key not found in environment variable {api_key_env_var}")
19 |     return genai.Client(api_key=api_key)
20 | 
21 | def show_json(obj):
22 |     print(json.dumps(obj.model_dump(exclude_none=True), indent=2))
23 | 
24 | def show_parts(response):
25 |     parts = response.candidates[0].content.parts
26 |     if parts is None:
27 |         print(f'finish_reason={response.candidates[0].finish_reason}')
28 |         return
29 |     for part in parts:
30 |         if part.text:
31 |             console.print(Markdown(part.text, hyperlinks=True))
32 |     grounding_metadata = response.candidates[0].grounding_metadata
33 |     if grounding_metadata and grounding_metadata.search_entry_point:
34 |         console.print(grounding_metadata.search_entry_point.rendered_content)
35 | 
36 | def log_message(log_file, message, message_type="info"):
37 |     timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
38 |     log_entry = f"<p><strong>{timestamp}</strong> - <span class='{message_type}'>{message}</span></p>\n"
39 |     with open(log_file, 'a', encoding='utf-8') as file:
40 |         file.write(log_entry)
41 | 
42 | def log_message_r(log_file, response, message_type="model"):
43 |     parts = response.candidates[0].content.parts
44 |     log_message_content = "Response: "
45 |     if parts is None:
46 |         log_message_content += f"\n{response.candidates[0].finish_reason}"
47 |     else:
48 |         log_message_content += "".join(part.text for part in parts if part.text)
49 |     log_message_content = markdown2.markdown(log_message_content)
50 |     grounding_metadata = response.candidates[0].grounding_metadata
51 |     if grounding_metadata and grounding_metadata.search_entry_point:
52 |         log_message_content += grounding_metadata.search_entry_point.rendered_content
53 |     log_message(log_file, log_message_content, message_type)
54 | 
55 | def start_log_file(log_file):
56 |     with open(log_file, 'w') as file:
57 |         file.write("<html><head><style>.info {color: blue;} .user {color: green;} .model {color: red;}</style></head><body>\n")
58 | 
59 | def end_log_file(log_file):
60 |     with open(log_file, 'a') as file:
61 |         file.write("</body></html>")
62 | 


--------------------------------------------------------------------------------
/py/ai/newsarranger/get_news_arrange.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import requests
 3 | from markitdown import MarkItDown
 4 | from dotenv import load_dotenv, find_dotenv
 5 | from google import genai
 6 | import datetime
 7 | 
 8 | """
 9 | Retrieve the news content from a location  https://example.xxxstockxxxnews.com
10 | Then convert it to markdown format using markitdown library.
11 | Then pass on the markdown content to Google Gemini API to arrange and group the news feed provided based on the order of importance for an investor in the markets.
12 | """
13 | 
14 | news_url = 'https://example.xxxstockxxxnews.com'
15 | 
16 | # Initialize the client, using Google Gemini API key
17 | def initialize_client(api_key_env_var):
18 |     load_dotenv(find_dotenv())
19 |     api_key = os.getenv(api_key_env_var)
20 |     if not api_key:
21 |         raise ValueError(f"API key not found in environment variable {api_key_env_var}")
22 |     return genai.Client(api_key=api_key)
23 | 
24 | my_model = 'gemini-2.0-flash'
25 | client = initialize_client('GOOGLE_API_KEY')
26 | 
27 | # Main function
28 | if __name__ == '__main__':
29 | 
30 |     # Current time is, dd-mm-YYYY HH:MM:SS
31 |     timenow = datetime.datetime.now().strftime("%d-%m-%Y %H:%M:%S")
32 | 
33 |     # get the news file
34 |     print(f"Start getting the news file at {timenow}...")
35 |     response = requests.get(news_url)
36 |     html_content = response.text
37 |     # Save it to a file
38 |     with open('output.html', 'w') as file:
39 |         file.write(html_content)
40 | 
41 |     print(f"Start converting the news file to markdown format at {timenow}...")
42 |     md = MarkItDown()
43 |     result = md.convert("output.html")
44 |     #print(result.text_content)
45 |     # Save the markdown content to a file
46 |     with open('output.md', 'w') as file:
47 |         file.write(result.text_content)
48 | 
49 |     print(f"Start arranging the news file at {timenow}...")
50 |     analyzer = client.chats.create(model=my_model)
51 |     response = analyzer.send_message(f"Arrange and group the news feed provided based on the order of importance for an investor in the markets. Include whatever data related to the news is available in the input, such as short summaries, hyperlinks etc. If available include time of report of the news. The time now is: {timenow}. The input is in markdown. Input: {result.text_content}")
52 |     output = ""
53 |     parts = response.candidates[0].content.parts
54 |     if parts is None:
55 |             print(f'finish_reason={response.candidates[0].finish_reason}')
56 |     for part in parts:
57 |         if part.text:
58 |             #print(part.text)
59 |             # join the text parts
60 |             output += part.text
61 | 
62 |     # Save the output to a file
63 |     with open('output_arranged.md', 'w') as file:
64 |         file.write(output)
65 | 


--------------------------------------------------------------------------------
/py/yf/glb_scan.py:
--------------------------------------------------------------------------------
 1 | import yfinance as yf
 2 | import pandas as pd
 3 | import time
 4 | 
 5 | # Read the list of stocks from the CSV file
 6 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
 7 | # Exchange, ".BO, .NS"
 8 | exchange = ".NS"
 9 | 
10 | # Set the time frame to max
11 | time_frame = 'max'
12 | 
13 | # Set the bar time frame
14 | data_interval = '1mo'
15 | 
16 | # Set the green line to the all-time high of the stock
17 | green_line = 0.0
18 | 
19 | # Set the minimum number of months since the ath/green line was breached
20 | min_months = 2
21 | 
22 | # Initialize a list to store the results
23 | results = []
24 | 
25 | # Iterate through the list of stocks
26 | for stock in stocks["Ticker"]:
27 |     try:
28 |         # Get the stock data from yfinance, dont adjust OHLC
29 |         ticker = yf.Ticker(f'{stock}{exchange}')
30 |         data = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
31 |         # Drop those with NaN
32 |         data = data.dropna()
33 |         # Drop last row, if 2nd last is already of the month
34 |         if data.index[-1].month == data.index[-2].month:
35 |             # Replace the values in the second-to-last row with the values in the last row
36 |             data.loc[data.index[-2]] = data.loc[data.index[-1]]
37 |             # Delete the last row
38 |             data = data.drop(data.index[-1])
39 | 
40 |         # print(data)
41 | 
42 |         # Initialize the ATH to the first close price and the ATH date to the first date
43 |         ath = data.at[data.index[0], 'High']
44 |         ath_date = data.index[0]
45 |         green_line = ath
46 |         green_line_date = ath_date
47 | 
48 |         # Loop through each row of the dataframe
49 |         for index, row in  data.iterrows():
50 |             # Update the ATH and ATH date if the current close price is higher
51 |             if row['High'] > ath:
52 |                 ath = row['High']
53 |                 ath_date = index
54 |             # Update Greenline if condition of minimum months is met
55 |             if  data.index.get_loc(index) - data.index.get_loc(ath_date)  >= min_months:
56 |                     green_line = ath
57 |                     green_line_date = ath_date
58 | 
59 |         # print(stock + " green line: " + str(green_line) + " green line date: " + str(green_line_date))
60 |         last_close = data.at[data.index[-1], 'Close']
61 |         second_last_close = data.at[data.index[-2], 'Close']
62 |         if second_last_close < green_line and last_close > green_line:
63 |             # print(stock +" close: " + str(last_close) + " second last close: " + str(second_last_close) + " green line: " + str(green_line) + " green line date: " + str(green_line_date))
64 |             results.append(stock)
65 | 
66 |     except Exception as e:
67 |         print("Error for ticker: " + stock)
68 |         print(e)
69 | 
70 | # Print the results
71 | print(results)
72 | ex = 'NSE' if exchange == '.NS' else 'BSE'
73 | for stk in results:
74 |     print(f'{ex}:{stk},')
75 | print("Done")
76 | 


--------------------------------------------------------------------------------
/py/eodhd/price_data/RELIANCE_D.csv:
--------------------------------------------------------------------------------
 1 | Date,Open,High,Low,Close,Volume,Adj Close
 2 | 1994-11-03,375.0,400.0,375.0,396.0,42650,396.0
 3 | 1994-11-07,396.0,398.25,393.0,395.5,58700,395.5
 4 | 1994-11-08,398.0,398.75,393.0,396.0,49050,396.0
 5 | 1994-11-09,399.0,399.0,385.75,387.0,57500,387.0
 6 | 1994-11-10,387.0,390.0,380.0,380.5,67250,380.5
 7 | 1994-11-11,371.0,378.5,369.5,372.75,65000,372.75
 8 | 1994-11-14,373.0,373.0,358.0,361.5,30450,361.5
 9 | 1994-11-15,360.0,364.0,356.75,361.25,66900,361.25
10 | 1994-11-16,364.25,377.0,363.0,373.75,72800,373.75
11 | 1994-11-17,377.0,390.0,373.75,387.5,88850,387.5
12 | 1994-11-21,385.0,387.0,377.5,378.25,30350,378.25
13 | 1994-11-22,381.0,381.25,370.0,371.0,27700,371.0
14 | 1994-11-23,375.0,375.0,367.0,367.25,25300,367.25
15 | 1994-11-24,370.0,379.0,369.0,378.0,54500,378.0
16 | 1994-11-25,381.0,384.0,378.0,379.5,36900,379.5
17 | 1994-11-28,377.5,385.0,374.0,383.0,6000,383.0
18 | 1994-11-29,385.0,387.0,380.0,380.5,77400,380.5
19 | 1994-11-30,382.0,382.0,379.0,380.25,21950,380.25
20 | 1994-12-01,378.5,381.75,378.0,379.0,42800,379.0
21 | 1994-12-02,376.75,377.0,372.5,374.25,10700,374.25
22 | 1994-12-05,373.5,382.5,369.5,380.25,55800,380.25
23 | 1994-12-06,378.0,380.0,371.0,372.5,47800,372.5
24 | 1994-12-07,373.0,373.0,364.0,366.25,40950,366.25
25 | 1994-12-08,363.5,363.5,359.0,361.0,29900,361.0
26 | 1994-12-09,360.0,365.0,358.5,363.0,48600,363.0
27 | 1994-12-12,355.0,355.5,344.0,347.0,54750,347.0
28 | 1994-12-13,345.0,346.0,329.0,332.75,55800,332.75
29 | 1994-12-14,333.0,342.5,332.25,336.5,107100,336.5
30 | 1994-12-15,336.0,347.0,336.0,344.75,84000,344.75
31 | 1994-12-16,345.0,350.0,342.75,344.0,65750,344.0
32 | 1994-12-19,345.0,345.0,338.0,338.5,26200,338.5
33 | 1994-12-20,342.0,342.0,337.5,339.5,26450,339.5
34 | 1994-12-21,337.0,344.0,337.0,343.0,34100,343.0
35 | 1994-12-22,342.0,342.0,339.0,340.0,15700,340.0
36 | 1994-12-23,339.0,345.5,338.5,344.75,35150,344.75
37 | 1994-12-26,339.0,344.75,338.55,339.0,37050,339.0
38 | 1994-12-27,340.0,340.0,336.35,337.75,7450,337.75
39 | 1994-12-28,340.5,340.5,339.25,339.25,7650,339.25
40 | 1994-12-29,339.5,339.75,338.75,339.2,3150,339.2
41 | 1994-12-30,339.0,341.5,339.0,341.2,14750,341.2
42 | 1995-01-02,341.0,343.0,340.5,341.2,13600,341.2
43 | 1995-01-03,342.5,342.5,336.1,336.2,11450,336.2
44 | 1995-01-04,339.95,340.5,330.3,332.0,62600,332.0
45 | 1995-01-05,332.0,333.0,320.25,321.3,59200,321.3
46 | 1995-01-06,323.0,323.5,315.0,316.75,88550,316.75
47 | 1995-01-09,319.25,319.25,299.0,302.35,264800,302.35
48 | 1995-01-10,303.0,303.0,283.0,287.35,351750,287.35
49 | 1995-01-11,290.0,300.0,280.0,295.25,168000,295.25
50 | 1995-01-12,294.0,297.5,277.5,278.25,375850,278.25
51 | 1995-01-13,280.0,288.5,279.0,286.3,169700,286.3
52 | 1995-01-16,288.0,294.0,282.0,284.1,149800,284.1
53 | 1995-01-17,285.0,286.0,275.05,277.3,255950,277.3
54 | 1995-01-18,279.25,284.7,278.0,283.35,88500,283.35
55 | 1995-01-19,282.0,287.5,282.0,283.95,64350,283.95
56 | 1995-01-20,284.0,285.45,271.0,272.5,165400,272.5
57 | 1995-01-23,274.5,274.5,250.3,251.7,175050,251.7
58 | 1995-01-24,250.0,251.75,235.15,246.4,287400,246.4
59 | 1995-01-25,245.0,250.0,238.0,242.5,372400,242.5
60 | 1995-01-27,245.0,261.0,242.0,256.2,316250,256.2
61 | 1995-01-30,250.0,269.0,248.0,267.95,425600,267.95
62 | 1995-01-31,265.0,276.65,264.0,272.45,301000,272.45
63 | 


--------------------------------------------------------------------------------
/py/yf/ars_srs_scan.py:
--------------------------------------------------------------------------------
 1 | import yfinance as yf
 2 | import pandas as pd
 3 | import time
 4 | import datetime
 5 | 
 6 | def cleanUp_data(data):
 7 |     # Drop those with NaN
 8 |     data = data.dropna()
 9 |     return data
10 | 
11 | # set the file name of stocks
12 | stock_filename = "stocks.csv"
13 | 
14 | # Set the time frame to max
15 | time_frame = '2y'
16 | 
17 | # Set the bar time frame
18 | data_interval = '1d'
19 | 
20 | # Specify the benchmark symbol
21 | benchmark = "^NSEI"
22 | 
23 | 
24 | # Specify the reference date
25 | reference_date = "2022-06-03"
26 | 
27 | # Specify the number of rows to look back for the Static RS calculation
28 | srs_length = 123
29 | 
30 | # Read the list of stocks from the CSV file
31 | stocks = pd.read_csv(stock_filename, header=0, usecols=["Ticker"])
32 | 
33 | # Use yfinance to retrieve the benchmark data
34 | benchmark_ticker = yf.Ticker(benchmark)
35 | benchmark_data = benchmark_ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
36 | benchmark_data = cleanUp_data(benchmark_data)
37 | 
38 | # Create an empty list to store the stock data
39 | stock_data_list = []
40 | 
41 | # Iterate through the list of stocks
42 | for stock in stocks["Ticker"]:
43 |     try:
44 |         ticker = yf.Ticker(stock+".NS")
45 | 
46 |         # Use yfinance to retrieve the stock data
47 |         stock_data = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
48 |         stock_data = cleanUp_data(stock_data)
49 | 
50 |         # Calculate the Adaptive relative strength (ARS) using the formula you provided
51 |         stock_data["Adaptive RS"] = (stock_data["Close"] / stock_data.loc[stock_data.index == reference_date, "Close"].values[0]) / (benchmark_data["Close"] / benchmark_data.loc[benchmark_data.index == reference_date, "Close"].values[0]) - 1
52 | 
53 |         # Calculate the Static relative strength (SRS) using the formula you provided and the specified number of rows to look back
54 |         stock_close_123 = stock_data.at[stock_data.index[-123], 'Close']
55 |         benchmark_close_123 = benchmark_data.at[benchmark_data.index[-123], 'Close']
56 |         stock_data["Static RS"] = (stock_data["Close"] /stock_close_123) / (benchmark_data["Close"] / benchmark_close_123) - 1
57 | 
58 |         # Get the last row of the stock data
59 |         last_row = stock_data.tail(1)
60 | 
61 |         # Extract the ARS and SRS values from the last row
62 |         ars = round(last_row["Adaptive RS"].values[0], 2)
63 |         srs = round(last_row["Static RS"].values[0], 2)
64 | 
65 |         # Create a dictionary with the stock name, ARS, and SRS values
66 |         stock_data_dict = {"Stock": stock, "Adaptive RS": ars, "Static RS": srs}
67 | 
68 |         # Add the dictionary to the list
69 |         stock_data_list.append(stock_data_dict)
70 |     except Exception as e:
71 |         print("Error " + stock)
72 |         print(e)
73 | 
74 | # print(stock_data_list)
75 | 
76 | # Get the current timestamp
77 | timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
78 | 
79 | # Construct the file name using the timestamp
80 | filename = "rs_stock_data_" + timestamp + ".csv"
81 | 
82 | # Convert the list of dictionaries to a dataframe
83 | stock_data_df = pd.DataFrame(stock_data_list)
84 | 
85 | # Write the dataframe to the CSV file
86 | stock_data_df.to_csv(filename, index=False)
87 | 


--------------------------------------------------------------------------------
/py/eodhd/my_rsi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This script calculates the Combined Relative Strength Index (RSI) for a list of stocks. 
 3 | The Combined RSI is a technical indicator used in the analysis of financial markets. 
 4 | It is intended to chart the current and historical strength or weakness of a stock or market based on the closing 
 5 | prices of a recent trading period. The Combined RSI is calculated by combining the traditional RSI with the volume.
 6 | """
 7 | 
 8 | import pricereader as pr
 9 | import pandas as pd
10 | import numpy as np
11 | import datetime
12 | 
13 | # Set output folder path
14 | output_path = "output"
15 | 
16 | # Read the list of stocks from the CSV file
17 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"]) 
18 | 
19 | def calculate_combined_rsi(df, period=14):
20 |     """
21 |     Calculate the Combined Relative Strength Index (RSI) for a given DataFrame.
22 | 
23 |     Parameters:
24 |     - df (pandas.DataFrame): DataFrame containing the stock data.
25 |     - period (int): Number of periods to consider for calculating the RSI. Default is 14.
26 | 
27 |     Returns:
28 |     - combined_rsi (pandas.Series): Series containing the Combined RSI values.
29 |     """
30 |     # Calculate daily price change
31 |     df['Price Change'] = df['Close'].diff()
32 | 
33 |     # Calculate volume ratio and volatility
34 |     avg_volume = df['Volume'].rolling(window=period).mean()
35 |     df['Volume Ratio'] = df['Volume'] / avg_volume
36 |     volatility = df['Price Change'].rolling(window=period).std()
37 | 
38 |     # Combine volume and volatility adjustments
39 |     df['Combined Gain'] = np.where(df['Price Change'] > 0, (df['Price Change'] * df['Volume Ratio']) / volatility, 0)
40 |     df['Combined Loss'] = np.where(df['Price Change'] < 0, -(df['Price Change'] * df['Volume Ratio']) / volatility, 0)
41 | 
42 |     # Compute average combined gain and loss
43 |     avg_combined_gain = df['Combined Gain'].rolling(window=period).mean()
44 |     avg_combined_loss = df['Combined Loss'].rolling(window=period).mean()
45 | 
46 |     # Calculate Combined RS and RSI
47 |     combined_rs = avg_combined_gain / avg_combined_loss
48 |     combined_rsi = 100 - (100 / (1 + combined_rs))
49 | 
50 |     return combined_rsi
51 | 
52 | 
53 | def main():
54 |     """
55 |     Main function that calculates the Combined RSI for a list of stocks and saves the results to a CSV file.
56 |     """
57 |     print("Started...")
58 |     # Create the DataFrame
59 |     result_df = pd.DataFrame(columns=['stock', 'my_rsi'])
60 |     # Iterate through the list of stocks
61 |     for stock in stocks["Ticker"]:
62 |         try:
63 |             # Get the daily stock data
64 |             stock_data = pr.get_price_data(stock, 'd')
65 |             # Drop those with NaN
66 |             stock_data = stock_data.dropna()
67 | 
68 |             # Calculate combined RSI
69 |             stock_data['Combined_RSI'] = calculate_combined_rsi(stock_data)
70 |             # print(stock_data.tail())
71 |             last_row_idx = stock_data.index[-1]
72 |             row = {'stock': stock, 'my_rsi': str(round(stock_data.loc[last_row_idx, 'Combined_RSI'], 2))}
73 |             # Append the new row to the DataFrame
74 |             result_df.loc[len(result_df)] = row
75 | 
76 |         except Exception as e:
77 |             print("Error: " + stock)
78 |             print(e)
79 | 
80 |     # Append current timestamp to the file name
81 |     now = datetime.datetime.now()
82 |     timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
83 |     file_name = 'my_rsi_' + timestamp + '.csv'
84 |     # Export the DataFrame to CSV
85 |     result_df.to_csv(output_path + "/" + file_name, index=False)       
86 |     print(f'Saved file {file_name}')
87 | 
88 | 
89 | if __name__ == "__main__":
90 |     main()
91 |    


--------------------------------------------------------------------------------
/py/ai/turnaround/my_tools/cmd_executor.py:
--------------------------------------------------------------------------------
 1 | from smolagents import tool
 2 | import subprocess
 3 | import shlex
 4 | 
 5 | @tool
 6 | def cmd_executor(command: str, confirmed: bool = False) -> str:
 7 |     """
 8 |     This tool executes readonly shell commands in a Linux/macOS environment.
 9 |     It is restricted to safe, readonly commands that do not modify the filesystem
10 |     or system state. Useful for exploring directory structures, searching files,
11 |     and gathering information about the system.
12 |     
13 |     LIMITATION: Only readonly commands are allowed for security. Commands that
14 |     modify files, install software, or change system state are blocked.
15 |     
16 |     Allowed commands include:
17 |     - ls, find, locate, which, whereis
18 |     - grep, egrep, fgrep, zgrep
19 |     - cat, head, tail, less, more
20 |     - wc, sort, uniq, cut, awk, sed (readonly operations)
21 |     - ps, top, htop, df, du, free
22 |     - pwd, whoami, id, uname, date
23 |     - file, stat, lsof
24 |     
25 |     Args:
26 |         command (str): The shell command to execute (must be readonly).
27 |         confirmed (bool): Must be set to True to confirm command execution.
28 |                          Defaults to False for safety.
29 |     Returns:
30 |         str: The output of the command or an error message.
31 |     """
32 |     # check if operation is confirmed
33 |     if not confirmed:
34 |         return "Error: Command execution not confirmed. Set confirmed=True to proceed with running the command."
35 |     
36 |     if not command.strip():
37 |         return "No command provided."
38 |     
39 |     # List of allowed readonly commands
40 |     allowed_commands = {
41 |         'ls', 'find', 'locate', 'which', 'whereis',
42 |         'grep', 'egrep', 'fgrep', 'zgrep', 'rg', 'ag',
43 |         'cat', 'head', 'tail', 'less', 'more',
44 |         'wc', 'sort', 'uniq', 'cut', 'awk', 'sed',
45 |         'ps', 'top', 'htop', 'df', 'du', 'free',
46 |         'pwd', 'whoami', 'id', 'uname', 'date',
47 |         'file', 'stat', 'lsof', 'tree'
48 |     }
49 |     
50 |     # Parse the command to get the base command
51 |     try:
52 |         parsed_command = shlex.split(command)
53 |         base_command = parsed_command[0] if parsed_command else ""
54 |     except ValueError:
55 |         return "Error: Invalid command syntax."
56 |     
57 |     # Check if the base command is allowed
58 |     if base_command not in allowed_commands:
59 |         return f"Error: Command '{base_command}' is not allowed. Only readonly commands are permitted."
60 |     
61 |     # Additional safety checks for potentially dangerous flags
62 |     dangerous_patterns = ['rm', 'mv', 'cp', 'chmod', 'chown', 'sudo', '>', '>>', '|', '&&', '||', ';']
63 |     for pattern in dangerous_patterns:
64 |         if pattern in command:
65 |             return f"Error: Command contains potentially dangerous pattern '{pattern}'. Only readonly operations are allowed."
66 |     
67 |     try:
68 |         # Execute the command with timeout for safety
69 |         result = subprocess.run(
70 |             command,
71 |             shell=True,
72 |             capture_output=True,
73 |             text=True,
74 |             timeout=30,  # 30 second timeout
75 |             cwd=None  # Use current working directory
76 |         )
77 |         
78 |         if result.returncode == 0:
79 |             return result.stdout if result.stdout else "Command executed successfully (no output)."
80 |         else:
81 |             return f"Command failed with return code {result.returncode}:\n{result.stderr}"
82 |             
83 |     except subprocess.TimeoutExpired:
84 |         return "Error: Command timed out after 30 seconds."
85 |     except Exception as e:
86 |         return f"An error occurred while executing the command: {str(e)}"


--------------------------------------------------------------------------------
/py/eodhd/how_many_weeks_high.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This scrip will fetch the current high price of a stock and calculate how many weeks it 
 3 | has been since the stock was at that price.
 4 | """
 5 | import pricereader as pr
 6 | import pandas as pd
 7 | import time
 8 | import datetime
 9 | 
10 | # Read the list of stocks from the CSV file
11 | stocks = pd.read_csv("stocks5.csv", header=0, usecols=["Ticker"])
12 | 
13 | # Set output folder path
14 | output_path = "output"
15 | 
16 | # Function to get the number of bars to reach the high that t
17 | # stock_data: DataFrame containing the stock data
18 | #   Date,Open,High,Low,Close,Volume,Adj Close
19 | #   2002-07-01,283.25,331.0,283.25,317.8,11803,317.8
20 | #   2002-07-08,303.6,327.0,300.0,300.45,10390,300.45
21 | #   2002-07-15,296.2,305.0,290.3,300.0,4744,300.0
22 | #   2002-07-22,286.0,315.0,280.0,304.4,21643,304.4
23 | def get_previous_index_prce_for_last_high(stock_data):
24 |     """
25 |     This function will first fetch the high price of the latest date (latest week)
26 |     Then for each row before this, it will check if this high price was reached or crossed
27 |     If it was, it will return the number of weeks it took to reach this price
28 |     If it was not, it will return -1, indicating that the stock is ATH (All time high)
29 |     stock_data: DataFrame containing the stock data, in acsending order of date
30 |     """
31 |     # Get the high price of the latest date
32 |     latest_high = stock_data['High'].iloc[-1]
33 | 
34 |     # Iterate through the rows in reverse order
35 |     for index in reversed(stock_data.index[:-1]):
36 |         # Check if the high price was reached or crossed
37 |         if stock_data.loc[index, 'High'] >= latest_high:
38 |             # Return the index of the row where this price was reached
39 |             return index, stock_data.loc[index, 'High']
40 | 
41 |     # Return last index if the high price was not reached or crossed
42 |     return stock_data.index[-1], latest_high
43 | 
44 | 
45 | def main():
46 |     print("Started...")
47 |     # Create the DataFrame
48 |     result_df = pd.DataFrame(columns=['stock', 'High of latest week', 'Last such week high', \
49 |                                       'Days passed', 'High of that week', 'diff%'])
50 |     # Iterate through the list of stocks
51 |     for stock in stocks["Ticker"]:
52 |         try:
53 |             # Get the daily stock data
54 |             stock_data = pr.get_price_data(stock, 'w')
55 |             # Drop those with NaN
56 |             stock_data = stock_data.dropna()
57 | 
58 |             # Get the index and high price of the week when the stock was at its high
59 |             index, high = get_previous_index_prce_for_last_high(stock_data) 
60 | 
61 |             # Get the high price of the latest date
62 |             latest_high = stock_data['High'].iloc[-1]
63 |             # Current / last date
64 |             latest_date = stock_data.index[-1]
65 |             diff = round((latest_high - high) / high * 100,2)
66 |             days_diff = (latest_date - index).days
67 |             latest_high = round(latest_high,2)
68 |             high = round(high,2)
69 |             # Append the result to the DataFrame
70 |             row = {'stock': stock, 'High of latest week': latest_high, 'Last such week high':index, \
71 |                    'Days passed': f'{days_diff}', 'High of that week': high, 'diff%': f'{diff}%'}
72 |             result_df.loc[len(result_df)] = row
73 |             print(f"Processed: {stock}")
74 |             
75 |         except Exception as e:
76 |             print("Error: " + stock)
77 |             print(e)
78 | 
79 |     # Append current timestamp to the file name
80 |     now = datetime.datetime.now()
81 |     timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
82 |     file_name = 'weeks_to_high_' + timestamp + '.csv'
83 |     # Export the DataFrame to CSV
84 |     result_df.to_csv(output_path + "/" + file_name, index=False)       
85 |     print(f'Saved file {file_name}')
86 | 
87 | if __name__ == "__main__":
88 |     main()
89 |    


--------------------------------------------------------------------------------
/py/ai/fininsightgpt/README.md:
--------------------------------------------------------------------------------
  1 | # FinInsightGPT: AI-Powered Investment Analysis
  2 | 
  3 | FinInsightGPT is an application that helps with investment decisions and business analysis by processing company data files, converting them to structured markdown, and generating comprehensive equity research reports using AI.
  4 | 
  5 | ## Features
  6 | 
  7 | - **Document Processing**: Converts various file formats (PDF, DOCX, PPTX, TXT, XLSX, images) to markdown
  8 | - **Intelligent Image Analysis**: Uses OCR and AI vision to extract text and analyze charts/graphs
  9 | - **Master File Generation**: Consolidates all company documents into a comprehensive master file
 10 | - **AI Report Generation**: Creates detailed equity research reports using LLM models
 11 | - **Command-line Interface**: Easy-to-use CLI for all operations
 12 | 
 13 | ## Installation
 14 | 
 15 | 1. Clone this repository
 16 | 2. Install the required dependencies:
 17 | 
 18 | ```bash
 19 | pip install -r requirements.txt
 20 | ```
 21 | 
 22 | 3. Install Tesseract OCR (for image processing):
 23 |    - macOS: `brew install tesseract`
 24 |    - Ubuntu/Debian: `sudo apt-get install tesseract-ocr`
 25 |    - Windows: Download from [GitHub](https://github.com/UB-Mannheim/tesseract/wiki)
 26 | 
 27 | 4. Set up your environment variables:
 28 |    - Copy the template file: `cp .env.example .env`
 29 |    - Edit the `.env` file and add your OpenAI API key and model preferences:
 30 | 
 31 | ```
 32 | # OpenAI API Key
 33 | OPENAI_API_KEY=your_openai_api_key_here
 34 | 
 35 | # OpenAI Model IDs
 36 | OPENAI_TEXT_MODEL=gpt-4-turbo
 37 | OPENAI_VISION_MODEL=gpt-4-vision-preview
 38 | ```
 39 | 
 40 | ## Usage
 41 | 
 42 | ### Directory Structure
 43 | 
 44 | Place company files in folders under `company_data`:
 45 | 
 46 | ```
 47 | company_data/
 48 |   ├── company1/
 49 |   │   ├── file1.pdf
 50 |   │   ├── file2.txt
 51 |   │   └── image1.jpg
 52 |   └── company2/
 53 |       ├── presentation.pptx
 54 |       └── financials.xlsx
 55 | ```
 56 | 
 57 | ### Commands
 58 | 
 59 | #### List available companies:
 60 | 
 61 | ```bash
 62 | python src/main.py list
 63 | ```
 64 | 
 65 | #### Process files for a company:
 66 | 
 67 | ```bash
 68 | python src/main.py process <company_folder>
 69 | ```
 70 | 
 71 | #### Generate master file from processed files:
 72 | 
 73 | ```bash
 74 | python src/main.py master <company_folder> [--output-dir <output_directory>]
 75 | ```
 76 | 
 77 | #### Generate report from master file:
 78 | 
 79 | ```bash
 80 | python src/main.py report <master_file> [--template <template_file>] [--output-dir <output_directory>] [--model <llm_model>]
 81 | ```
 82 | 
 83 | #### Run the entire pipeline (process files, generate master, create report):
 84 | 
 85 | ```bash
 86 | python src/main.py all <company_folder> [--template <template_file>] [--model <llm_model>]
 87 | ```
 88 | 
 89 | ### Examples
 90 | 
 91 | Process files for CDSL:
 92 | 
 93 | ```bash
 94 | python src/main.py process cdsl
 95 | ```
 96 | 
 97 | Generate a report for JyothyLabs using previously created master file:
 98 | 
 99 | ```bash
100 | python src/main.py report jyothylabs_master_20250504_123456.md --model gpt-4-vision-preview
101 | ```
102 | 
103 | Run the entire pipeline for a new company:
104 | 
105 | ```bash
106 | python src/main.py all mynewcompany --model gpt-4-turbo
107 | ```
108 | 
109 | ## Report Templates
110 | 
111 | The system uses the template file in `prompt_master/Equity_Research_Report_Template.md` by default. This template contains:
112 | 
113 | 1. A system prompt to instruct the AI model
114 | 2. A user prompt that defines the report structure and analysis requirements
115 | 
116 | You can modify this template or create custom templates for different analysis styles.
117 | 
118 | ## Dependencies
119 | 
120 | - pymupdf: PDF processing
121 | - python-docx: DOCX processing
122 | - python-pptx: PowerPoint processing
123 | - pandas & openpyxl: Excel processing
124 | - Pillow & pytesseract: Image processing
125 | - openai: AI model integration
126 | - tiktoken: Token counting for LLM API calls


--------------------------------------------------------------------------------
/py/ai/turnaround/main.py:
--------------------------------------------------------------------------------
 1 | # load .env into environment
 2 | import os
 3 | from dotenv import load_dotenv
 4 | load_dotenv()
 5 | 
 6 | from smolagents import CodeAgent, MLXModel
 7 | from my_tools import search_web as web_fetcher
 8 | from my_tools import save_report as save_report
 9 | from my_tools import fs_reader as fs_reader
10 | from my_tools import cmd_executor  as cmd_executor
11 | from smolagents import CodeAgent, LiteLLMModel
12 | 
13 | import datetime
14 | import csv
15 | 
16 | # Initialize the tools and models
17 | #local_model=mlx_model = MLXModel("Path to local model directory")
18 | model = LiteLLMModel(model_id="openai/gpt-4.1-mini", api_key=os.getenv("OPENAI_API_KEY"))
19 | 
20 | #Create the agent with the model and tools
21 | agent = CodeAgent(tools=[web_fetcher, save_report, fs_reader, cmd_executor], model=model, additional_authorized_imports=["os", "openai", "json", "csv"]) # Not adding base tools.
22 | 
23 | # Define the data directory and today's date
24 | data_dir = "data/financial_data.csv"
25 | date_today = datetime.datetime.now().strftime("%Y-%m-%d")
26 | 
27 | instructions = f"""
28 | You are an expert financial analyst specializing in identifying turnaround in companies. Analyze for the company mentioned below in Step 1. With the searched financial data and your analysis generate a comprehensive markdown report that detects potential turnarounds if any for the company. To achieve this, you will follow these steps in sequence:
29 | Step 1. Company/Business Name/Stock Codes: {{business_name}}.
30 | Step 2. For this business, analyse if the business is experiencing a turnaround. Give a short report of your analysis. You will gather additional latest information using the web_fetcher tool. This includes searching for the latest financial reports, news, and other relevant information about the company.
31 | Step 3. After gathering enough information, you will prepare a report that includes a verdict about the turnaround potential of each business. The verdict can be "Strong Turnaround", "Weak Turnaround", or "No Turnaround".
32 | Step 4. Finally, format the report into a well-structured markdown document and save it to a file. You will ensre that the report contains the following sections:
33 | - Business Name
34 | - Summary of Financial Data
35 | - Analysis of Financial Health
36 | - Turnaround Potential Verdict
37 | Step 5. You will use the save_report tool to persist the report on disk. The report will be saved per business. You will pass the report content and the business name to the save_report tool.
38 | 
39 | General instructions:
40 | You will use the web_fetcher tool to gather additional information about these businesses and the reporter tool to generate the markdown report. You can look up for latest financial reports, news and other relevant information for the company. 
41 | Today is: {date_today}.
42 | Always search for tools available to you before writing new code, esp. the cmd_executor tool, which can execute read only shell commands to gather more information if needed.
43 | """
44 | 
45 | # Read the financial data file and start the analysis
46 | print("Loading financial data from:", data_dir)
47 | if not os.path.exists(data_dir):
48 |     raise FileNotFoundError(f"The financial data file {data_dir} does not exist. Please check the path.")
49 | businesses = []
50 | with open(data_dir, 'r', encoding='utf-8') as file:
51 |     reader = csv.DictReader(file)
52 |     # For each row in the CSV, create a entry, that contains the Name, Stock Symbol. Assuming the columns are Name,BSE Code,NSE Code. It is possible that BSE Code or NSE Code is not available, Create the entry for businesses list as a concatenation of Name/NSE Code/BSE Code.
53 |     for row in reader:
54 |         name = row.get('Name', 'Unknown')
55 |         bse_code = row.get('BSE Code', '').strip()
56 |         nse_code = row.get('NSE Code', '').strip()
57 |         if not nse_code and not bse_code:
58 |             business_entry = name
59 |         elif not nse_code and bse_code:
60 |             business_entry = f"""Name: {name} / BSE: {bse_code}"""
61 |         elif nse_code and not bse_code:
62 |             business_entry = f"""Name: {name} / NSE: {nse_code}"""
63 |         else:
64 |             business_entry = f"""Name: {name} / NSE: {nse_code} / BSE: {bse_code}"""
65 |         businesses.append(business_entry)
66 | 
67 | total_businesses = len(businesses)
68 | count = 0
69 | for business in businesses:
70 |     print(f"Starting analyzing financial data and generating a report for {business}... Please wait.")
71 |     final_instructions = instructions.format(business_name=business)
72 |     #print(f"Final instructions for the agent: {final_instructions}")
73 |     response = agent.run(final_instructions, max_steps=20)
74 |     # Print progress
75 |     count += 1
76 |     print(f"Completed {count}/{total_businesses} businesses. Current business: {business}")


--------------------------------------------------------------------------------
/py/ai/fininsightgpt/prompt_master/Equity_Research_Report_Template.md:
--------------------------------------------------------------------------------
  1 | # Equity Research Report Template
  2 | 
  3 | ## System Prompt
  4 | 
  5 | You are a financial analyst specializing in creating concise company reports. I need a comprehensive research report on {company}, an Indian publicly listed company, with deep insights based on publicly available data, including concalls, annual reports, news, and competitive analysis. The report should be for company called {company}. Current datetime is {timestamp}.
  6 | 
  7 | ## User Prompt
  8 | 
  9 | The structure of the {company} report shall be as follows.
 10 | 
 11 | ---
 12 | 
 13 | ### 1. 📌 Company Overview
 14 | 
 15 | - **Business Model and Key Segments**  
 16 |   Briefly describe the company’s core business activities and primary segments. If diversified, outline the major business segments and how revenue mix has evolved over the last 3–5 years.
 17 | 
 18 | - **Key Milestones**  
 19 |   Highlight key events such as IPOs, product/service launches, expansions, strategic partnerships, or diversification moves.
 20 | 
 21 | ---
 22 | 
 23 | ### 2. 📈 Strategic Developments & Execution Analysis
 24 | 
 25 | #### A. Business Expansion & Innovation
 26 | - Detail new product or service launches, acquisitions, or innovations.
 27 | - Mention R&D pipelines and any noteworthy technologies under development.
 28 | - Include subsidiaries, joint ventures, and their strategic significance.
 29 | 
 30 | #### B. Order Book & Execution Capacity
 31 | - Present current size and growth trend of the order book. Include insights on order pipeline and execution win-rates.
 32 | - Clarify execution visibility over the next 12–18 months.
 33 | 
 34 | #### C. Capacity Expansion
 35 | - Describe ongoing or planned projects/plants and their impact on production/revenue capacity.
 36 | - Explain the funding strategy for capex (equity, debt, internal accruals).
 37 | 
 38 | #### D. Risk Analysis
 39 | - Discuss regulatory, operational, geopolitical, and credit-related risks.
 40 | - Provide the latest credit ratings and any notable outlooks.
 41 | 
 42 | #### E. Management & Governance
 43 | - Assess management’s historical performance versus guidance.
 44 | - Mention promoter shareholding patterns and governance quality.
 45 | - Flag any litigation, controversies, or red flags if applicable.
 46 | 
 47 | ---
 48 | 
 49 | ### 3. 🏗 Recent Milestones & Notable Projects
 50 | - Showcase recognition from industry/government bodies and key partnerships.
 51 | - Highlight delivery of high-profile projects or export milestones.
 52 | - Summarize improvements in net worth, margins, interest coverage, and profitability.
 53 | 
 54 | ---
 55 | 
 56 | ### 4. 🧭 “What’s New vs. What’s Next” Summary Table
 57 | 
 58 | Create a table outlining:
 59 | - Recent achievements, future plans, expected timelines, and revenue/strategic impact across areas such as Order Book, Capacity, Exports, R&D, and Financials.
 60 | 
 61 | ---
 62 | 
 63 | ### 5. 🆚 Competitive Analysis
 64 | 
 65 | - Identify and compare peers using valuation (P/E, EV/EBITDA), RoE, margins, etc.
 66 | - Discuss relative market positioning and entry barriers.
 67 | - List key strengths and weaknesses compared to competitors.
 68 | - Include visual peer comparison charts or tables.
 69 | 
 70 | ---
 71 | 
 72 | ### 6. 📰 News and Media Perception
 73 | 
 74 | - List significant headlines from the past 2 years.
 75 | - Compare media coverage with management’s communicated vision and strategies.
 76 | - Mention any frauds, disputes, or controversies.
 77 | - Provide an overall sentiment assessment (positive/neutral/negative).
 78 | 
 79 | ---
 80 | 
 81 | ### 7. 📊 Valuation & Investment Perspective
 82 | 
 83 | - Include valuation metrics (P/E, EV/EBITDA, etc.) and compare with historical ranges and peers.
 84 | - Perform a DCF analysis with assumptions and forecasts.
 85 | - Evaluate whether the current market price justifies future earnings and growth.
 86 | - Add broker/analyst consensus and sentiment.
 87 | - Correlate stock price performance with earnings visibility.
 88 | 
 89 | ---
 90 | 
 91 | ### 8. 🚀 Key Catalysts to Watch (Near-Term)
 92 | 
 93 | Track important near-term triggers such as:
 94 | - Plant/project commissionings
 95 | - Export order wins
 96 | - Quarterly financial results and trends
 97 | 
 98 | ---
 99 | 
100 | ### 9. 🧾 Conclusion & Investment Rationale
101 | 
102 | - Provide a clear investment rating (Buy/Hold/Avoid) with reasoning.
103 | - Outline key upside/downside triggers.
104 | - Call out any immediate risks (regulatory, geopolitical, macro).
105 | - Mention near-term events that could impact re-rating potential.
106 | 
107 | ---
108 | 
109 | ## 📁 Data Sources to Refer to:
110 | 
111 | - Last 3–5 Years of Annual Reports
112 | - Investor Presentations (especially the latest)
113 | - Earnings Call Transcripts (latest quarter)
114 | - Credit Rating Reports (if public)
115 | - Exchange Filings (NSE/BSE)
116 | - Financial News Sources (ET, BloombergQuint, Moneycontrol, etc.)
117 | 


--------------------------------------------------------------------------------
/py/yf/weeklyRSIVolStopBO.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | BOs of nifty500 stocks, that gave a weekly breakout from RSI(14) > 60
  3 | Also, check if they are above the volstop(10,2.5)
  4 | Also, check if they are abover the 20-EMA
  5 | Prefer, stocks with relative ratio on an increasing trend on 5-6M average
  6 | All calculations on weekly timeframes
  7 | 
  8 | Generally, such stocks that take repitative support on a bullish RSI level,
  9 | with backing of sectoral tailwind or strong fundamentals give good long term moves
 10 | Exits can be planned on volstop break, or 20-EMA break or both with partial booking on
 11 | break of one
 12 | '''
 13 | 
 14 | import yfinance as yf
 15 | import pandas as pd
 16 | import ta
 17 | import datetime
 18 | 
 19 | # Set output folder path
 20 | output_path = "output"
 21 | 
 22 | # Read the list of stocks from the CSV file
 23 | stocks = pd.read_csv("stocks500.csv", header=0, usecols=["Ticker"])
 24 | 
 25 | # Set start Date
 26 | start_date = '2020-02-01'
 27 | 
 28 | # Set end Date
 29 | end_date = '2023-02-26'
 30 | 
 31 | # Specify the benchmark symbol
 32 | benchmark = "^NSEI"
 33 | 
 34 | # Interval
 35 | data_interval_weekly = '1wk'
 36 | 
 37 | import yfinance as yf
 38 | import pandas as pd
 39 | import numpy as np
 40 | 
 41 | def rsi_crossover(data, rsi_level):
 42 |     current_rsi = data.iloc[-1]['RSI']
 43 |     previous_rsi = data.iloc[-2]['RSI']
 44 |     return previous_rsi <= 60.0 and current_rsi > 60.0
 45 | 
 46 | def volatility_stop(data, period, multiplier):
 47 |     high = data['High']
 48 |     low = data['Low']
 49 |     close = data['Close']
 50 | 
 51 |     atr = pd.Series((high - low).abs().rolling(period).mean(), name='ATR')
 52 |     direction = np.where(close.diff() > 0, 1, -1)
 53 |     vol_stop = close - direction * atr * multiplier
 54 | 
 55 |     data['volStop'] = vol_stop
 56 |     return data
 57 | 
 58 | def ratio_mean(data, benchmark_data, length):
 59 |     # Calculate the relative strength of the stock by dividing its weekly closing price by the weekly closing price of the Nifty 50 index
 60 |     relative_strength = data['Close'] / benchmark_data['Close']
 61 |     data[f'relativeRatio'] = relative_strength
 62 |     # print(relative_strength.tail(10))
 63 | 
 64 |     # Calculate the mean of the relative strength values for length
 65 |     data[f'ratio{length}W'] = relative_strength.rolling(window=length).mean()
 66 |     return data
 67 |     
 68 |     
 69 | def main():
 70 |     print("Started...")
 71 |     # Create the DataFrame
 72 |     result_df = pd.DataFrame(columns=['stock', 'Close', 'volStop10_2.5', 'ema20', 'RS-ratio', 'ratio-21W', 'RSI(14)'])
 73 | 
 74 |     # Benchmark data
 75 |     # Use yfinance to retrieve the benchmark data
 76 |     benchmark_ticker = yf.Ticker(benchmark)
 77 |     benchmark_data = benchmark_ticker.history(start=start_date, end=end_date, interval=data_interval_weekly,auto_adjust=False, prepost=False)
 78 |     benchmark_data = benchmark_data.dropna()
 79 | 
 80 |     # Iterate through the list of stocks
 81 |     for stock in stocks["Ticker"]:
 82 |         try:
 83 |             # Get the stock data from yfinance, dont adjust OHLC
 84 |             data = yf.Ticker(stock+".NS").history(start=start_date, end=end_date,interval=data_interval_weekly,auto_adjust=False, prepost=False)
 85 |             # Drop those with NaN
 86 |             data = data.dropna()
 87 | 
 88 |             # Calculate the RSI using a 14-day period
 89 |             data['RSI'] = ta.momentum.RSIIndicator(data['Close'], window=14).rsi()
 90 |             # Check if a crossover from value lower than 60 has happend, we need to however look at RSI trend on a charting platform
 91 |             if (rsi_crossover(data, 60)):
 92 |                 # Calculate volStop
 93 |                 data = volatility_stop(data, 10, 2.5)
 94 |                 # Calculate ema20W
 95 |                 data['ema20'] = ta.trend.EMAIndicator(data['Close'], window=20).ema_indicator()
 96 |                 # Calculate the relative ratio and average 21W
 97 |                 data = ratio_mean(data, benchmark_data, 21)
 98 |                 curr_data = data.iloc[-1]
 99 |                 row = {'stock': stock, 'Close': curr_data['Close'], 'volStop10_2.5': str(round(curr_data['volStop'], 2)), 'ema20': str(round(curr_data['ema20'], 2)), \
100 |                         'RS-ratio': str(round(curr_data['relativeRatio'], 2)), 'ratio-21W': str(round(curr_data['ratio21W'], 2)), 'RSI(14)': str(round(curr_data['RSI'], 2))}
101 |                 # Append the new row to the DataFrame
102 |                 result_df.loc[len(result_df)] = row
103 | 
104 |         except Exception as e:
105 |             print("Error: " + stock)
106 |             print(e)
107 | 
108 |     # Append current timestamp to the file name
109 |     now = datetime.datetime.now()
110 |     timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
111 |     file_name = 'weeklyRSIVolStopBO_' + timestamp + '.csv'
112 |     # Export the DataFrame to CSV
113 |     result_df.to_csv(file_name, index=False)
114 |     print('Done')
115 | 
116 | if __name__ == "__main__":
117 |     main()


--------------------------------------------------------------------------------
/py/yf/newHighMonthly.py:
--------------------------------------------------------------------------------
  1 | 
  2 | '''
  3 | This code, also searches for new monthly highs, but not just ATH
  4 | This it does by boxing a lookback limit and a minimum duration where the new high should be
  5 | with respect to the historical high.
  6 | '''
  7 | import yfinance as yf
  8 | import pandas as pd
  9 | import time
 10 | import os
 11 | from datetime import datetime, timedelta
 12 | 
 13 | # Set output folder path
 14 | output_path = "output"
 15 | 
 16 | # Read the list of stocks from the CSV file
 17 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
 18 | 
 19 | # Set the time frame to max
 20 | time_frame = 'max'
 21 | 
 22 | # Set the bar time frame
 23 | data_interval = '1mo'
 24 | 
 25 | # Set the maximum number of months to lookback
 26 | LOOKBACK_LIIMIT = 15 * 12 # Years in months
 27 | 
 28 | # Set minimum numbber of months that this BO should be after
 29 | MIN_BO_LENGTH = 50 #5 * 12 # Years in months
 30 | 
 31 | # Initialize a list to store the results
 32 | results = []
 33 | 
 34 | # Crore
 35 | One_Cr = 10000000
 36 | 
 37 | # Columnns in the report
 38 | report_columns = ["Stock", "mcap", "High Close", "High Close Date", "Current Close", "#MonthsBO", "Diff", "sector" , "industry"]
 39 | 
 40 | def write_dataframe_to_file(df, name):
 41 |     # Get the current timestamp
 42 |     timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
 43 | 
 44 |     # Create the filename
 45 |     filename = f'{name}_{timestamp}.csv'
 46 |     # Save the DataFrame as a CSV file with specific column names as the header
 47 |     df.to_csv(f'{output_path}/{filename}',index=False)
 48 | 
 49 | 
 50 | 
 51 | def main():
 52 |     print("Started...")
 53 |     # create an empty dataframe to store the results
 54 |     results_df = pd.DataFrame(columns=report_columns)
 55 |     # Iterate through the list of stocks
 56 |     for stock in stocks["Ticker"]:
 57 |         try:
 58 |             # Get the stock data from yfinance, dont adjust OHLC
 59 |             ticker = yf.Ticker(stock+".NS")
 60 |             data = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
 61 |             # Drop those with NaN
 62 |             data = data.dropna()
 63 |             # Drop last row, if 2nd last is already of the month
 64 |             if data.index[-1].month == data.index[-2].month:
 65 |                 # Replace the values in the second-to-last row with the values in the last row
 66 |                 data.loc[data.index[-2]] = data.loc[data.index[-1]]
 67 |                 # Delete the last row
 68 |                 data = data.drop(data.index[-1])
 69 |             
 70 |             if (len(data) < MIN_BO_LENGTH + 1):
 71 |                 print(f'Skipping. Not enough data for {stock}, only {len(data)} available, minimum required {MIN_BO_LENGTH+1}')
 72 |                 continue
 73 | 
 74 |             # Reverse the data frame to start from current candle
 75 |             stk_df = data.iloc[::-1]
 76 |             max_loopback = LOOKBACK_LIIMIT
 77 |             if (len(stk_df) < LOOKBACK_LIIMIT): # Limit lookback if not available data for so long
 78 |                 max_loopback = len(stk_df)
 79 |             
 80 |             stk_df_max_lookback = stk_df.head(max_loopback)
 81 |             current_close = stk_df_max_lookback['Close'][0]
 82 |             for i in range(1, len(stk_df_max_lookback)):
 83 |                 this_close = stk_df_max_lookback['Close'][i]
 84 |                 if this_close > current_close:
 85 |                     if i >= MIN_BO_LENGTH: 
 86 |                         highest_close_date = stk_df_max_lookback.index[i].strftime('%Y-%m-%d')
 87 |                         diff = round((this_close - current_close)/current_close * 100, 2)
 88 |                         # Essential data
 89 |                         sector = ''
 90 |                         industry = ''
 91 |                         marketCap = ''
 92 |                         try:
 93 |                             if ticker.info:
 94 |                                 marketCap = round(ticker.info['marketCap'] / One_Cr, 0)
 95 |                                 industry = ticker.info['industry']
 96 |                                 sector = ticker.info['sector']
 97 |                         except Exception as err:
 98 |                             pass
 99 |                         new_row = pd.DataFrame({"Stock": stock, "mcap": marketCap, "High Close": round(this_close, 2), "High Close Date": highest_close_date, \
100 |                                         "Current Close": round(current_close, 2), "#MonthsBO": i, "Diff": diff, "sector": sector, "industry": industry}, index=[0])
101 |                         results_df = pd.concat([results_df, new_row])
102 |                         break
103 |                     else:
104 |                         break # A newer high exist before MIN_BO_LENGTH
105 |         except Exception as e:
106 |             print(f'Error for ticker: {stock} ==> {e}')
107 | 
108 |     # print(results_df)
109 |     write_dataframe_to_file(results_df, "newHighMonthly_BO_")
110 |     print("Done")
111 | 
112 | if __name__ == "__main__":
113 |     main()
114 | 


--------------------------------------------------------------------------------
/py/yf/multimonthBO.py:
--------------------------------------------------------------------------------
  1 | import yfinance as yf
  2 | import pandas as pd
  3 | import time
  4 | import os
  5 | from datetime import datetime, timedelta
  6 | 
  7 | # Set output folder path
  8 | output_path = "output"
  9 | 
 10 | # Read the list of stocks from the CSV file
 11 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
 12 | 
 13 | # Set the time frame to max
 14 | time_frame = 'max'
 15 | 
 16 | # Set the bar time frame
 17 | data_interval = '1mo'
 18 | 
 19 | # Set the minimum number of months since the last ath was breached
 20 | MIN_MONTHS = 11
 21 | 
 22 | # Threshold to previous ATH
 23 | threshold = 1.0
 24 | 
 25 | # Initialize a list to store the results
 26 | results = []
 27 | 
 28 | # Crore
 29 | One_Cr = 10000000
 30 | 
 31 | # determine if highest close was minimum_low_length ago.
 32 | def highestClose(stock_data, min_months):
 33 | 
 34 |     highest_close = stock_data["Close"][0]
 35 |     highest_close_date = stock_data.index[0]
 36 |     highest_close_idx = 0
 37 |     for i in range(1, len(stock_data)):
 38 |         if stock_data["Close"][i] > highest_close:
 39 |             highest_close = stock_data["Close"][i]
 40 |             highest_close_date = stock_data.index[i]
 41 |             highest_close_idx = i
 42 |     if len(stock_data) - highest_close_idx >= min_months:
 43 |        return [True, highest_close, highest_close_date]
 44 |     else:
 45 |        return [False, '', '']
 46 | 
 47 | def write_dataframe_to_file(df, name):
 48 |     # Get the current timestamp
 49 |     timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
 50 | 
 51 |     # Create the filename
 52 |     filename = f'{name}_{timestamp}.csv'
 53 |     # Save the DataFrame as a CSV file with specific column names as the header
 54 |     df.to_csv(output_path + "/" + filename, index=False, columns=["Stock", "mcap", "Highest Close", "Highest Close Date", "Current Close", "Diff", "sector", "industry"])
 55 | 
 56 | 
 57 | def main():
 58 |     print("Started...")
 59 |     # create an empty dataframe to store the results
 60 |     results_df = pd.DataFrame(columns=["Stock", "mcap", "Highest Close", "Highest Close Date", "Current Close", "Diff", "sector" , "industry"])
 61 |     # Iterate through the list of stocks
 62 |     for stock in stocks["Ticker"]:
 63 |         try:
 64 |             # Get the stock data from yfinance, dont adjust OHLC
 65 |             ticker = yf.Ticker(stock+".NS")
 66 |             data = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
 67 |             # Drop those with NaN
 68 |             data = data.dropna()
 69 |             # Drop last row, if 2nd last is already of the month
 70 |             if data.index[-1].month == data.index[-2].month:
 71 |                 # Replace the values in the second-to-last row with the values in the last row
 72 |                 data.loc[data.index[-2]] = data.loc[data.index[-1]]
 73 |                 # Delete the last row
 74 |                 data = data.drop(data.index[-1])
 75 | 
 76 |             # print(data)
 77 |             if (len(data) <= 2):
 78 |                 print(f'Skipping {stock} since not enough data present ')
 79 |                 continue
 80 | 
 81 |             min_months = MIN_MONTHS
 82 |             if (len(data) < (MIN_MONTHS + 1)):
 83 |                 print(f'{stock} has only {len(data)} months, trimming condition')
 84 |                 min_months = len(data)
 85 |                 
 86 |             # Highest close prior to last month
 87 |             result_highestClose = highestClose(data.iloc[:-1], min_months) # Skip the current month
 88 |             highestClose_condition = result_highestClose[0]
 89 |             highestClose_value = result_highestClose[1]
 90 |             highestClose_date = result_highestClose[2]
 91 | 
 92 |             # Essential data
 93 |             sector = ''
 94 |             industry = ''
 95 |             marketCap = ''
 96 |             try:
 97 |                 if ticker.info:
 98 |                     marketCap = round(ticker.info['marketCap'] / One_Cr, 0)
 99 |                     industry = ticker.info['industry']
100 |                     sector = ticker.info['sector']
101 |             except Exception as err:
102 |                 pass
103 | 
104 |             last_close = data["Close"].tail(1).values[0]
105 |             if (highestClose_condition and last_close >= highestClose_value * threshold):
106 |                 diff = round(((last_close - highestClose_value) / highestClose_value) * 100, 2)
107 |                 new_row = pd.DataFrame({"Stock": stock, "mcap": marketCap, "Highest Close": round(highestClose_value, 2), "Highest Close Date": highestClose_date, \
108 |                                         "Current Close": round(last_close, 2), "Diff": diff, "sector": sector, "industry": industry}, index=[0])
109 |                 results_df = pd.concat([results_df, new_row])
110 | 
111 |         except Exception as e:
112 |             print(f'Error for ticker: {stock} ==> {e}')
113 | 
114 |     # print(results_df)
115 |     write_dataframe_to_file(results_df, "MultiMonth_BO_")
116 |     print("Done")
117 | 
118 | if __name__ == "__main__":
119 |     main()
120 | 


--------------------------------------------------------------------------------
/py/beta/chatgpt/generate_report_for_company.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import model as ai
  3 | import os
  4 | 
  5 | screener_xls_data = {}
  6 | delimiter = "####"
  7 | 
  8 | company_data = 'companyinfo/sjvn'
  9 | 
 10 | screener_tabs = ['income_statement','income_statement_quarterly', 'balance_sheet', 'cashflow_statement', 'ratio_analysis']
 11 | screener_data = {}
 12 | 
 13 | def preprocess_text(text):
 14 |     # Lowercase the text
 15 |     text = text.lower()
 16 | 
 17 |     # Remove special characters
 18 |     text = re.sub(r'\W', ' ', text)
 19 | 
 20 |     # Replace multiple spaces with a single space
 21 |     text = re.sub(r'\s+', ' ', text)
 22 | 
 23 |     return text
 24 | 
 25 | def load_screener_data():
 26 | 
 27 |     for i in range(0, len(screener_tabs)):
 28 |         tabname = screener_tabs[i]
 29 |         f = open(f'{company_data}/{tabname}.txt')
 30 |         data = f.read()
 31 |         f.close()
 32 |         screener_data[tabname] = data
 33 | 
 34 | 
 35 | def company_info_analysis():
 36 |     file = f'{company_data}/company_info.txt'
 37 |     data = 'No company info'
 38 |     if os.path.isfile(file):
 39 |         f = open(file)
 40 |         data = f.read()
 41 |         f.close()
 42 |     print('Analyzing company_info data...') 
 43 |     system_message = f'As a financial analyst for equity markets, perform an evaluation of the company based on the inputs provided. The input is enclosed within {delimiter}.\
 44 |         You must to the analysis in the following steps.\
 45 |         Step 8: Prepare a short description of the comapnys business, factories, plants and operations in general.\
 46 |         Step 9: Prepare shareholding trend and status, separetly, if shareholding data is provided. \
 47 |         Step 10: Perpare a separate detailed summary of concall data if provided. \
 48 |         Step 11: If credit rating data is provided, list out positive and negative points separately. \
 49 |         Give your analysis in as detailed manner as possible, however summarize it to limit to max_tokens = 2000 '
 50 |     user_message = f'{delimiter}{data}{delimiter}'
 51 |     messages =  [  
 52 |         {'role':'system', 
 53 |         'content': system_message},    
 54 |         {'role':'user', 
 55 |         'content': f"{delimiter}{user_message}{delimiter}"},  
 56 |         ] 
 57 |     response = ai.get_completion_from_messages(messages,max_tokens=2000)
 58 |     return response
 59 | 
 60 | def fin_statement_analysis():
 61 |     print('Analyzing screener data...')
 62 |     system_message = f'As a financial analyst for equity markets, you need to perform an evaluation of the company based on the inputs provided. Some of these inputs will be standard financial data and some will be unstructured. \
 63 |             The input data will be encloded with {delimiter} You must to the analysis in the following steps. \
 64 |             Step 1:{delimiter} Perform a financial analysis of the company from stock market investing perspective from its annual income statement quarterly income statment \
 65 |                 balance sheet and cashflow statement. Each will be provided to you enclosed as {delimiter}income_statement:{delimiter} {delimiter}balance_sheet{delimiter} and so on. \
 66 |                 Step 2: Using the ratio_analysis statement analyze the working capital cycle. Step 3: Perform a Du-Pont analysis using the above data. Step 4: Perform profitibility analysis of this financial data\
 67 |                     Step 5: Provide trend analysis and competitive advantages of the company based on given financial data. Step 6: Check pricing power of this company? \
 68 |                     Step 6: Detect and report any red flags about the company from the data \
 69 |                     Step 7: Report preparation/ Take special care. As an analyst perform these analysis and prepare a report that is very detailed but summarize it to limit to  max_tokens=2000.'
 70 | 
 71 |     msg = ''
 72 |     for key,val in screener_data.items():
 73 |         msg = f'{delimiter}{key}:{val}{delimiter}'
 74 |     user_message = f'{delimiter}{msg}{delimiter}'
 75 |     messages =  [  
 76 |         {'role':'system', 
 77 |         'content': system_message},    
 78 |         {'role':'user', 
 79 |         'content': f"{delimiter}{user_message}{delimiter}"},  
 80 |         ] 
 81 |     response = ''
 82 |     response = ai.get_completion_from_messages(messages,max_tokens=2000)
 83 |     return response
 84 | 
 85 | 
 86 | def main():
 87 |     ai.set_api()
 88 |     load_screener_data()
 89 | 
 90 |     #Financial statement analysis from screener data
 91 |     fin_screener_analysis = ''
 92 |     fin_screener_analysis = fin_statement_analysis()
 93 |     # print(fin_screener_analysis)
 94 |     with open(f'{company_data}/financial_analysis.txt', 'w', encoding='utf-8') as file:
 95 |         file.write(fin_screener_analysis)
 96 | 
 97 |     #Perform company info analysis from data from internet and elsewhere
 98 |     co_info_analysis = ''
 99 |     co_info_analysis = company_info_analysis()
100 |     with open(f'{company_data}/company_info_analysis.txt', 'w', encoding='utf-8') as file:
101 |         file.write(co_info_analysis)
102 | 
103 |     print('Done')
104 | 
105 | if __name__ == "__main__":
106 |     main()
107 | 


--------------------------------------------------------------------------------
/py/yf/box_scan.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | We detect a consolidation after a rally and quantify the box formation
  3 | Rally is defined as 3 consecutive higher closes, and the high of that candle defines the top left of the box
  4 | The low is extended with each new lower low
  5 | '''
  6 | import yfinance as yf
  7 | import pandas as pd
  8 | import datetime
  9 | import matplotlib.pyplot as plt
 10 | import matplotlib.patches as patches
 11 | 
 12 | 
 13 | # Set the bar time frame
 14 | data_interval = '1d'
 15 | # Set the time frame to 90d
 16 | time_frame = '90d'
 17 | 
 18 | # Set output folder path
 19 | output_path = "boxscan/output"
 20 | # Initialize an empty DataFrame to store the output CSV data
 21 | output_df = pd.DataFrame(columns=['Stock Code', 'Box Duration', 'Drawdown', 'Fall Rate'])
 22 | 
 23 | # Read the list of stocks from the CSV file
 24 | stocks = pd.read_csv("stocks500.csv", header=0, usecols=["Ticker"])
 25 | 
 26 | # Box depth threshold %
 27 | box_depth_threshold = -20
 28 | # Rally days
 29 | min_rally_days = 3
 30 | # Box days
 31 | min_days_in_box = 3
 32 | 
 33 | # Function to plot and save chart and data
 34 | def scan_for_box(df, stock_code):
 35 | 
 36 |     # Calculate 50-day average volume
 37 |     df['50_day_avg_vol'] = df['Volume'].rolling(window=50).mean()
 38 | 
 39 |     # Set up plot
 40 |     fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(20, 12), sharex=True, gridspec_kw={'height_ratios': [3, 1]})
 41 |     ax1.set_ylabel('Price')
 42 |     ax1.set_title(f'{stock_code} with Negative Drawdown')
 43 |     ax2.set_xlabel('Time')
 44 |     ax2.set_ylabel('Volume')
 45 | 
 46 |     # Initialize variables for debugging and the box
 47 |     rally_days = 0
 48 |     rally_volume_high = False
 49 |     box_start = None
 50 |     box_end = None
 51 |     box_high = None
 52 |     box_low = None
 53 | 
 54 |     # Iterate through the data to identify rallies, place debug dots, and draw the box
 55 |     for i in range(len(df)):
 56 |         color = 'g' if df.iloc[i]['Close'] >= df.iloc[i]['Open'] else 'r'
 57 |         vol_color = color
 58 |         vol_color = 'g' if i > 0 and df.iloc[i]['Close'] >= df.iloc[i-1]['Close'] else 'r'
 59 |             
 60 |         ax1.plot([i, i], [df.iloc[i]['Low'], df.iloc[i]['High']], color=color)
 61 |         ax1.add_patch(patches.Rectangle((i - 0.3, df.iloc[i]['Open']), 0.6, df.iloc[i]['Close'] - df.iloc[i]['Open'], facecolor=color))
 62 |         ax2.bar(i, df.iloc[i]['Volume'], color=vol_color, width=0.6)
 63 | 
 64 |         # Detect a rally
 65 |         if i > 0 and df.iloc[i]['Close'] > df.iloc[i - 1]['Close']:
 66 |             rally_days += 1
 67 |             if df.iloc[i]['Volume'] > df.iloc[i]['50_day_avg_vol']:
 68 |                 rally_volume_high = True
 69 |         else:
 70 |             rally_days = 0
 71 |             rally_volume_high = False
 72 | 
 73 |         if rally_days >= min_rally_days and rally_volume_high:
 74 |             ax1.plot(i, df.iloc[i]['High'], 'o', color='orange')
 75 |             box_high = df.iloc[i]['High']
 76 |             box_low = df.iloc[i]['Low']
 77 |             box_start = i
 78 | 
 79 |         if box_start is not None:
 80 |             new_low = df.iloc[i]['Low']
 81 |             if new_low < box_low:
 82 |                 box_low = new_low
 83 |             box_end = i
 84 |             ax1.add_patch(patches.Rectangle((box_start, box_low), box_end - box_start, box_high - box_low, fill=True, color='yellow', alpha=0.3))
 85 | 
 86 |             if df.iloc[i]['Close'] > box_high:
 87 |                 box_start = None
 88 |                 box_end = None
 89 |                 box_high = None
 90 |                 box_low = None
 91 | 
 92 |     # Book keeping
 93 |     if box_start is not None:
 94 |         box_days = (box_end - box_start) + 1
 95 |         box_drop_percent = -((box_high - box_low) / box_high) * 100
 96 |         box_fall_rate = round(-box_drop_percent / box_days, 2)
 97 |         text_str = f"Box Duration: {box_days} days\nDrawdown: {box_drop_percent:.2f}%\nFR: {box_fall_rate:.2f}"
 98 |         ax1.text(0.75, 0.1, text_str, transform=ax1.transAxes, fontsize=12, verticalalignment='bottom', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
 99 |         
100 |         if box_end == len(df) - 1 and box_drop_percent > box_depth_threshold and box_days > min_days_in_box:
101 |             plt.savefig(f"{output_path}/{stock_code}.png")
102 |             output_df.loc[len(output_df)] = [stock_code, box_days, box_drop_percent, box_fall_rate]
103 |     plt.close()
104 | 
105 | 
106 | def main():
107 |     print('Started')
108 |     # Iterate through the list of stocks
109 |     for stock in stocks["Ticker"]:
110 |         try:
111 |             ticker = yf.Ticker(stock+".NS")
112 |             stock_history = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
113 |             stock_history = stock_history.dropna()
114 |             scan_for_box(stock_history, stock)
115 |         except Exception as e:
116 |             print(f"Error: {stock} ==> {e}")
117 |     
118 |     # Append current timestamp to the file name
119 |     now = datetime.datetime.now()
120 |     timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
121 |     file_name = f'{output_path}/box_scan_{timestamp}.csv'
122 |     # Export the DataFrame to CSV
123 |     output_df.to_csv(file_name, index=False)
124 |     print(f'Done, output saved in {file_name}')
125 | 
126 | if __name__ == "__main__":
127 |     main()
128 | 


--------------------------------------------------------------------------------
/py/yf/green_dot.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import yfinance as yf
  3 | import pandas as pd
  4 | import numpy as np
  5 | import datetime
  6 | 
  7 | # Set output folder path
  8 | output_path = "output"
  9 | 
 10 | # Read the list of stocks from the CSV file
 11 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
 12 | 
 13 | # Set start Date
 14 | start_date = '2020-01-01'
 15 | 
 16 | # Set end Date
 17 | end_date = '2023-01-21'
 18 | 
 19 | # Specify the benchmark symbol
 20 | benchmark = "^NSEI"
 21 | 
 22 | # Interval
 23 | data_interval_daily = '1d' # '1wk' or '1d'
 24 | data_interval_weekly = '1wk'
 25 | 
 26 | # Lookback for green dot
 27 | lookback = 5
 28 | 
 29 | def calculateReversionExpansion(stock_data):
 30 |     # Extract the close prices from the DataFrame
 31 |     src = stock_data["Close"]
 32 | 
 33 |     # Perform the EMA calculations
 34 |     l1, l2, l3, l4 = 20, 50, 100, 200 #EMA periods
 35 | 
 36 |     # Compute the exponential moving average with a lookback length of 20
 37 |     ema1 = src.ewm(span=l1).mean()
 38 |     ema2 = src.ewm(span=l2).mean()
 39 |     ema3 = src.ewm(span=l3).mean()
 40 |     ema4 = src.ewm(span=l4).mean()
 41 | 
 42 |     # Merge the series into one DataFrame
 43 |     merged_df = pd.concat([ema1, ema2, ema3, ema4], axis=1, keys=['EMA 20', 'EMA 50', 'EMA 100', 'EMA 200'])
 44 |     merged_df.fillna(0, inplace=True)
 45 |     # Find the lowest and the highest of this emas
 46 |     merged_df['lowest'] =  merged_df[(merged_df > 0)].min(axis=1)
 47 |     # Cheeky way to replace zero with a miniscule value to get rid of div by zero error
 48 |     merged_df['lowest'].replace(0, 1e-10, inplace=True)
 49 |     merged_df['highest'] = merged_df.max(axis=1)
 50 | 
 51 |     # Now, merge the close, otherwise lowest will consider Close values also
 52 |     merged_df = pd.concat([merged_df, src], axis=1)
 53 |     # Calculate delta between lowest and highest
 54 |     merged_df['delta'] = (merged_df['highest'] - merged_df['lowest']) / merged_df['lowest']
 55 |     # Calculate emadelta
 56 |     merged_df['emadelta'] = merged_df['delta'].ewm(span=7).mean()
 57 |     # Calculate delta between close and lowest ema
 58 |     merged_df['pricedelta'] = ( merged_df['Close'] - merged_df['lowest']) / merged_df['lowest']
 59 |     # Calculate ema of this pricedelta
 60 |     merged_df['emapricedelta'] = merged_df['pricedelta'].ewm(span=7).mean()
 61 |     # Determine if a crossover has happened between delta crossing over emadelta
 62 |     merged_df['crossover'] = np.where((merged_df['delta'] > merged_df['emadelta']) & (merged_df['delta'].shift(1) < merged_df['emadelta'].shift(1)), 1, 0)
 63 |     # Determine if a crossunder has happened between delta crossing over emadelta
 64 |     merged_df['crossunder'] = np.where((merged_df['delta'] < merged_df['emadelta']) & (merged_df['delta'].shift(1) > merged_df['emadelta'].shift(1)), 1, 0)
 65 | 
 66 |     return merged_df
 67 | 
 68 | def checkforGreenDot(rev_exp_data):
 69 |     # Check last lookback rows if there has been a crossover and no crossunder in the last
 70 |     rev_exp_data_21 = rev_exp_data.tail(lookback)
 71 | 
 72 |     crossover = False
 73 |     idx = ''
 74 |     delta = 0.0
 75 |     for index, row in rev_exp_data_21.iterrows():
 76 |         if (row['crossover'] == 1 and row['Close'] > row['highest']):
 77 |             crossover = True
 78 |             idx = index
 79 |             delta = row['delta']
 80 |         
 81 |         if (crossover and row['crossunder'] == 1):
 82 |             crossover = False
 83 |     return [crossover, idx, delta]
 84 | 
 85 | def main():
 86 |     print("Started...")
 87 |     # Create the DataFrame
 88 |     result_df = pd.DataFrame(columns=['stock', 'dailyXoverDate', 'dailyDelta', 'weeklyXoverDate', 'weeklyDelta'])
 89 |     # Iterate through the list of stocks
 90 |     for stock in stocks["Ticker"]:
 91 |         try:
 92 |             # Get the stock data
 93 |             # Get the stock data from yfinance, dont adjust OHLC
 94 |             stock_data_daily = yf.Ticker(stock+".NS").history(start=start_date, end=end_date,interval=data_interval_daily,auto_adjust=False, prepost=False)
 95 |             # Drop those with NaN
 96 |             stock_data_daily = stock_data_daily.dropna()
 97 | 
 98 |             # Calculate the entire series of reversion and expansion -- daily
 99 |             rev_exp_data = calculateReversionExpansion(stock_data_daily)
100 |             result_daily = checkforGreenDot(rev_exp_data)
101 | 
102 |             # Weekly data
103 |             stock_data_weekly = yf.Ticker(stock+".NS").history(start=start_date, end=end_date,interval=data_interval_weekly,auto_adjust=False, prepost=False)
104 |             # Drop those with NaN
105 |             stock_data_weekly = stock_data_weekly.dropna()
106 | 
107 |             # Calculate the entire series of reversion and expansion -- weekly
108 |             rev_exp_data_weekly = calculateReversionExpansion(stock_data_weekly)
109 |             result_weekly = checkforGreenDot(rev_exp_data_weekly)
110 | 
111 |             condition = result_daily[0] or result_weekly[0]
112 |             if (condition):
113 |                 row = {'stock': stock, 'dailyXoverDate': str(result_daily[1]), 'dailyDelta': str(result_daily[2]), 'weeklyXoverDate': str(result_weekly[1]), 'weeklyDelta': str(result_weekly[2])}
114 |                 # Append the new row to the DataFrame
115 |                 result_df.loc[len(result_df)] = row
116 | 
117 |         except Exception as e:
118 |             print("Error: " + stock)
119 |             print(e)
120 | 
121 |     # Append current timestamp to the file name
122 |     now = datetime.datetime.now()
123 |     timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
124 |     file_name = 'green_dot_' + timestamp + '.csv'
125 |     # Export the DataFrame to CSV
126 |     result_df.to_csv(output_path + "/" + file_name, index=False)
127 | 
128 | 
129 | if __name__ == "__main__":
130 |     main()
131 |    
132 | 


--------------------------------------------------------------------------------
/py/yf/trendreversal_ha.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | We tryto analyze trend reversal in stocks with major corrections
  3 | In order to reduce noise we select monthly candles and further use HA
  4 | 5 consecutive red candles, followed by 2 green candles should be a clean trend reversal
  5 | These reversals must be validated with price action on lower timeframes.
  6 | Also, one just confirm demand, by checking limevolumes.
  7 | Relative strength across benchmark and sector must be checked.
  8 | '''
  9 | import yfinance as yf
 10 | import pandas as pd
 11 | import datetime
 12 | 
 13 | # Folder location
 14 | output = 'output'
 15 | 
 16 | # Read the list of stocks from the CSV file
 17 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
 18 | 
 19 | # Set the time frame to max
 20 | time_frame = 'max'
 21 | 
 22 | # Set the bar time frame
 23 | data_interval = '1mo'
 24 | 
 25 | # Crore
 26 | One_Cr = 10000000
 27 | 
 28 | def create_HA_Candles(df):
 29 | 
 30 |     # Create a new DataFrame to store the Heikin-Ashi values
 31 |     heikin_ashi_data = pd.DataFrame(index=df.index)
 32 | 
 33 |     if (len(df) < 2): # We need at least 2
 34 |         return heikin_ashi_data
 35 | 
 36 |     # Append the 'High' and 'Low' columns from the original data
 37 |     heikin_ashi_data[['High', 'Low']] = df[['High', 'Low']]
 38 |     # Calculate the Heikin-Ashi open, close, high, and low values
 39 |     heikin_ashi_data['HA_Close'] = (df['Open'] + df['High'] + df['Low'] + df['Close']) / 4
 40 |     # Handle the first row separately
 41 |     first_row_open = (df['Open'][0] + df['Close'][0]) / 2
 42 |     heikin_ashi_data['HA_Open'] = first_row_open
 43 |     # Calculate HA_Open correctly for subsequent rows
 44 |     for i in range(1, len(heikin_ashi_data)):
 45 |         heikin_ashi_data['HA_Open'][i] = (heikin_ashi_data['HA_Open'][i-1] + heikin_ashi_data['HA_Close'][i-1]) / 2
 46 | 
 47 |     heikin_ashi_data['HA_High'] = heikin_ashi_data[['HA_Open', 'HA_Close', 'High']].max(axis=1)
 48 |     heikin_ashi_data['HA_Low'] = heikin_ashi_data[['HA_Open', 'HA_Close', 'Low']].min(axis=1)
 49 | 
 50 |     # Drop the 'High' and 'Low' columns
 51 |     heikin_ashi_data.drop(['High', 'Low'], axis=1, inplace=True)
 52 | 
 53 |     #print(heikin_ashi_data.tail(5))
 54 |     return heikin_ashi_data
 55 | 
 56 | 
 57 | def check_trend_change(df):
 58 |         # Check for the first 5 candles as red and the last 2 candles as green
 59 |         last_7_candles = df.tail(7)  # Select the last 7 candles
 60 | 
 61 |         red_candles_count = 0
 62 |         green_candles_count = 0
 63 |         valid_pattern = False
 64 | 
 65 |         for i in range(5):
 66 |             candle = last_7_candles.iloc[i]
 67 |             if candle['HA_Close'] < candle['HA_Open']:
 68 |                 red_candles_count += 1
 69 |             else:
 70 |                 break
 71 | 
 72 |         for i in range(5, 7):
 73 |             candle = last_7_candles.iloc[i]
 74 |             if candle['HA_Close'] > candle['HA_Open']:
 75 |                 green_candles_count += 1
 76 |             else:
 77 |                 break
 78 | 
 79 |         if red_candles_count == 5 and green_candles_count == 2:
 80 |             valid_pattern = True
 81 | 
 82 |         return valid_pattern
 83 | 
 84 | 
 85 | def main():
 86 |     print("Started... ")
 87 |     # Create the DataFrame
 88 |     df = pd.DataFrame(columns=['stock', 'mcap', 'vol1', 'vol2d', 'vol3d', 'sector' , 'industry'])
 89 | 
 90 |     # Iterate through the list of stocks
 91 |     for stock in stocks["Ticker"]:
 92 |         try:
 93 |             # Get the stock data from yfinance, dont adjust OHLC
 94 |             stk_ticker = yf.Ticker(stock+".NS")
 95 |             data = stk_ticker.history(period=time_frame,interval=data_interval,auto_adjust=False)
 96 |             # Drop those with NaN
 97 |             data = data.dropna()
 98 |             if (len(data) < 2): # cannot do much analysis with 2 month candle
 99 |                 continue
100 |             # Drop last row, if 2nd last is already of the month
101 |             if data.index[-1].month == data.index[-2].month:
102 |                 # Replace the values in the second-to-last row with the values in the last row
103 |                 data.loc[data.index[-2]] = data.loc[data.index[-1]]
104 |                 # Delete the last row
105 |                 data = data.drop(data.index[-1])
106 |             
107 |             heikin_ashi_data = create_HA_Candles(data)
108 |             if (len(heikin_ashi_data) < 7) :
109 |                 print(f'Skipped for {stock} too less data')
110 |             
111 |             # Merge it to data
112 |             heikin_ashi_data = heikin_ashi_data.join(data)
113 | 
114 |             # Check if there is a trend change
115 |             if check_trend_change(heikin_ashi_data):
116 |                 sector = ''
117 |                 industry = ''
118 |                 marketCap = ''
119 |                 try:
120 |                     if stk_ticker.info:
121 |                         sector = stk_ticker.info['sector']
122 |                         industry = stk_ticker.info['industry']
123 |                         marketCap = round(stk_ticker.info['marketCap'] / One_Cr, 0)
124 |                 except Exception as err:
125 |                     pass
126 | 
127 |                 # Get volume data
128 |                 vols = data.tail(3)['Volume']
129 |                 vol1 = vols[0]
130 |                 vol2d = vols[1] - vol1
131 |                 vol3d = vols[2] - vols[1]
132 | 
133 |                 # Append to row
134 |                 row = {'stock': stock, 'mcap' : marketCap, 'vol1' : vol1, 'vol2d' : vol2d,'vol3d' : vol3d, 'sector' : sector, 'industry' : industry}
135 |                 # Append the new row to the DataFrame
136 |                 df.loc[len(df)] = row
137 |             
138 |         except Exception as e:
139 |             print(f'Error for ticker {stock} ==> {e}')
140 |     # Append current timestamp to the file name
141 |     now = datetime.datetime.now()
142 |     timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
143 |     file_name = f'{output}/ha_trendreversal_{timestamp}.csv'
144 |     # Export the DataFrame to CSV
145 |     df.to_csv(file_name, index=False)
146 |     print('Done')
147 | 
148 | 
149 | if __name__ == "__main__":
150 |     main()
151 | 


--------------------------------------------------------------------------------
/py/eodhd/saucer_crs.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | A script to determine a trend reversal. This script uses Relative Strength (Stock Price / Benchmark ratio).
  3 | The script calculates the moving average of the relative strength values for a specified length (avg_length).
  4 | It determines the current trend of this average, based on the following logic:
  5 | - If the value of the average is rising means greater that max of any of last 3 (trend_length) weeks, the trend is considered uptrend. This is denoted by letter G.
  6 | - If the value of the average is falling means less than minimum of any of last 3 (trend_length)  weeks, the trend is considered downtrend. This is denoted by letter R.
  7 | - If the value of the average is neither rising nor falling, the trend is considered sideways. This is denoted by letter S.
  8 | Next, the script will create a string of these trends (G,R,S) for the last 26 (analysis_window) weeks, with the most recent week being the last character in the string.
  9 | It will save this string in the output column 'Trend' of the output CSV file.
 10 | '''
 11 | 
 12 | import pandas as pd
 13 | import pricereader as pr
 14 | import datetime
 15 | 
 16 | # Set output folder path
 17 | output_path = "output"
 18 | 
 19 | # Read the list of stocks from the CSV file
 20 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
 21 | 
 22 | # Specify the benchmark symbol
 23 | benchmark = "NSEI"
 24 | 
 25 | # Interval
 26 | data_interval_weekly = 'w'
 27 | 
 28 | # Weekly CRS Average length
 29 | avg_length = 52 # Weeks
 30 | ratio_col = f'ratio{avg_length}W'
 31 | 
 32 | # Trend length
 33 | trend_length = 3 # Weeks
 34 | 
 35 | # Window of analysis
 36 | analysis_window = 26 # Weeks
 37 | 
 38 | def ratio_mean(data, benchmark_data, avg_length):
 39 |     # Calculate the relative strength of the stock by dividing its weekly closing price by the weekly closing price of the Nifty 50 index
 40 |     relative_strength = data['Close'] / benchmark_data['Close']
 41 |     data[f'relativeRatio'] = relative_strength
 42 |     # print(relative_strength.tail(10))
 43 | 
 44 |     # Calculate the mean of the relative strength values for length
 45 |     data[ratio_col] = relative_strength.rolling(window=avg_length).mean()
 46 |     return data
 47 | 
 48 | 
 49 | def rising(source, length):
 50 |     return source > source.shift(1).rolling(window=length).max()
 51 | 
 52 | def falling(source, length):
 53 |     return source < source.shift(1).rolling(window=length).min()
 54 | 
 55 | def sideways(source, length):
 56 |     # Sideways is true when not rising and not falling
 57 |     is_rising = rising(source, length)
 58 |     is_falling = falling(source, length)
 59 |     return ~(is_rising | is_falling)  # Not rising and not falling
 60 | 
 61 | def detect_reversal(sequence, initial_count, initial_type, transition_length, final_pattern):
 62 |     if sequence[:initial_count].count(initial_type) >= initial_count and sequence[-len(final_pattern):] == final_pattern:
 63 |         return True
 64 |     return False
 65 | 
 66 | def main():
 67 |     print("Started...")
 68 |     # Create the DataFrame
 69 |     result_df = pd.DataFrame(columns=['stock', 'Trend Sequence', 'Reversal Message'])
 70 | 
 71 |     # Benchmark data
 72 |     benchmark_data = pr.get_price_data(benchmark, data_interval_weekly)
 73 |     benchmark_data = benchmark_data.dropna()
 74 | 
 75 |     # Iterate through the list of stocks
 76 |     for stock in stocks["Ticker"]:
 77 |         try:
 78 |             # Get the stock data, sample as below. Latest data is at the end
 79 |             '''
 80 |                 Date,Open,High,Low,Close,Volume,Adj Close
 81 |                 2017-11-16,400.0,400.0,361.0,361.0,29447,361.0
 82 |                 2017-11-20,343.0,343.0,279.45,279.45,5389,279.45
 83 |                 2017-11-27,265.5,265.5,194.15,206.45,613081,206.45
 84 |                 2017-12-04,196.0,227.55,181.0,227.55,615553,227.55
 85 |                 2017-12-11,238.9,290.25,238.9,290.25,87251,290.25
 86 |             '''
 87 |             data = pr.get_price_data(stock, data_interval_weekly)
 88 |             # Drop those with NaN
 89 |             data = data.dropna()
 90 | 
 91 |             # Calculate the relative ratio and average avg_lengthW
 92 |             data = ratio_mean(data, benchmark_data, avg_length)
 93 | 
 94 |             # Apply the rising, falling, and sideways functions
 95 |             data['MA_rising'] = rising(data[ratio_col], trend_length)
 96 |             data['MA_falling'] = falling(data[ratio_col],trend_length)
 97 |             data['MA_sideways'] = sideways(data[ratio_col], trend_length)
 98 | 
 99 |             # Extract the last analysis_window rows
100 |             analysis_data = data[['MA_rising', 'MA_falling', 'MA_sideways']].tail(analysis_window)
101 |             
102 |             # Create a sequence string from the last 13 rows
103 |             sequence = ''.join(['G' if row['MA_rising'] else 'R' if row['MA_falling'] else 'S' for index, row in analysis_data.iterrows()])
104 | 
105 |             # Detect reversals, 14 weeks of current trend and 4 weeks of opposite trend, in between we do not care
106 |             bullish_reversal = detect_reversal(sequence, 14, 'R', 4, 'GG')
107 |             bearish_reversal = detect_reversal(sequence, 14, 'G', 4, 'RR')
108 | 
109 |             # Determine reversal message
110 |             reversal_message = ""
111 |             if bullish_reversal:
112 |                 reversal_message = "Bullish reversal detected."
113 |             elif bearish_reversal:
114 |                 reversal_message = "Bearish reversal detected."
115 | 
116 |             # Save the results to the DataFrame
117 |             row = {'stock': stock, 'Trend Sequence': sequence, 'Reversal Message': reversal_message}
118 |             # Append the new row to the DataFrame
119 |             result_df.loc[len(result_df)] = row
120 |         except Exception as e:
121 |             print("Error: " + stock)
122 |             print(e)
123 | 
124 |     # Append current timestamp to the file name
125 |     now = datetime.datetime.now()
126 |     timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
127 |     file_name = 'weeklyRS_Saucer_' + timestamp + '.csv'
128 |     # Export the DataFrame to CSV
129 |     result_df.to_csv(output_path + "/" + file_name, index=False)
130 |     print('Done')
131 | 
132 | if __name__ == "__main__":
133 |     main()


--------------------------------------------------------------------------------
/py/yf/supply_exhaustion_6m_scan.py:
--------------------------------------------------------------------------------
  1 | import yfinance as yf
  2 | import pandas as pd
  3 | import os
  4 | from datetime import datetime, timedelta
  5 | 
  6 | # Set output folder path
  7 | output_path = "output"
  8 | 
  9 | # Read the list of stocks from the CSV file
 10 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
 11 | 
 12 | # Set start Date
 13 | start_date = '2021-01-24'
 14 | 
 15 | # Set end Date
 16 | end_date = '2023-01-25'
 17 | 
 18 | # Interval
 19 | data_interval = '1d'
 20 | 
 21 | # lowest close lookback dataset length
 22 | lowest_low_lookback = 250
 23 | 
 24 | # minimum days since last lowest close
 25 | minimum_low_length = 123
 26 | 
 27 | # mimnum days since last peak after lowest close
 28 | minimum_days_since_high = 55
 29 | 
 30 | # determine highest close in the dataset , Priorr to lowest low
 31 | def highestClose(stock_data):
 32 |     highest_close = stock_data["Close"][0]
 33 |     highest_close_date = stock_data.index[0]
 34 |     for i in range(1, len(stock_data)):
 35 |         if stock_data["Close"][i] >= highest_close:
 36 |             highest_close = stock_data["Close"][i]
 37 |             highest_close_date = stock_data.index[i]
 38 |    
 39 |     return [highest_close, highest_close_date]
 40 | 
 41 | 
 42 | # determine if lowest close was minimum_low_length ago.
 43 | def lowestLow(stock_data):
 44 | 
 45 |     lowest_close = stock_data["Close"][0]
 46 |     lowest_close_date = stock_data.index[0]
 47 |     lowest_close_idx = 0
 48 |     for i in range(1, len(stock_data)):
 49 |         if stock_data["Close"][i] <= lowest_close:
 50 |             lowest_close = stock_data["Close"][i]
 51 |             lowest_close_date = stock_data.index[i]
 52 |             lowest_close_idx = i
 53 |     if len(stock_data) - lowest_close_idx >= minimum_low_length:
 54 |        return [True, lowest_close, lowest_close_date]
 55 |     else:
 56 |        return [False, '', '']
 57 | 
 58 | def write_dataframe_to_file(df, name):
 59 |     # Get the current timestamp
 60 |     timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
 61 | 
 62 |     # Create the filename
 63 |     filename = f'{name}_{timestamp}.csv'
 64 |     # Save the DataFrame as a CSV file with specific column names as the header
 65 |     df.to_csv(output_path + "/" + filename, index=False, columns=["Stock", "Lowest Close", "Low Date", "High Prior", "High Prior Date", "23_6 Retrace", \
 66 |         "38_2 Retrace", "50_0 Retrace", "Curr/High %"])
 67 | 
 68 | 
 69 | def main():
 70 |     print("Started...")
 71 |     # create an empty dataframe to store the results
 72 |     results_df = pd.DataFrame(columns=["Stock", "Lowest Close", "Low Date", "High Prior", "High Prior Date", "23_6 Retrace", "38_2 Retrace", \
 73 |         "50_0 Retrace", "Curr/High %"])
 74 |     # Iterate through the list of stocks
 75 |     for stock in stocks["Ticker"]:
 76 |         try:
 77 |             result_lowestLow = [False, '', '']
 78 |             below_23_6 = False
 79 |             below_38_2 = False
 80 |             below_50 = False
 81 | 
 82 |             # Get the stock data
 83 |             # Get the stock data from yfinance, dont adjust OHLC
 84 |             stock_data = yf.Ticker(stock+".NS").history(start=start_date, end=end_date,interval=data_interval,auto_adjust=False, prepost=False)
 85 |             # Drop those with NaN
 86 |             stock_data = stock_data.dropna()
 87 | 
 88 |             # Lowest low should be beyond last minimum_low_length months
 89 |             result_lowestLow = lowestLow(stock_data.tail(lowest_low_lookback))
 90 |             lowest_low_condition = result_lowestLow[0]
 91 |             lowest_low_close = result_lowestLow[1]
 92 |             lowest_low_date = result_lowestLow[2]
 93 | 
 94 |             # if lowest low condition is met, find out max in the data set Priorr to lowest low date
 95 |             if (lowest_low_condition):
 96 |                # Get dataset upto lowest_low_date
 97 |                before_low_data = stock_data.loc[stock_data.index < lowest_low_date]
 98 | 
 99 |                # Get highest Priorr to low
100 |                result_highestClosePriorr = highestClose(before_low_data)
101 |                highest_Priorr_close = result_highestClosePriorr[0]
102 |                highest_Priorr_date = result_highestClosePriorr[1]
103 | 
104 |                # Calcualte difference between close and high
105 |                diff = (highest_Priorr_close - lowest_low_close)
106 |                # 23.6%, 38.2% and 50% retracement value
107 |                level_23_6 = lowest_low_close + (diff * 0.236)
108 |                level_38_2 = lowest_low_close + (diff * 0.382)
109 |                level_50 = lowest_low_close + (diff * 0.50)
110 | 
111 |                # Get dataset after lowest_low_date
112 |                after_low_data = stock_data.loc[stock_data.index > lowest_low_date]
113 |                 # Get highest after low
114 |                result_highestCloseAfter = highestClose(after_low_data)
115 |                highest_after_close = result_highestCloseAfter[0]
116 |                highest_after_date = result_highestCloseAfter[1]
117 | 
118 |                # Check if the highest close, is within the retracement level
119 |                if highest_after_close <= level_50:
120 |                 below_50 = True
121 |                 if highest_after_close <= level_38_2:
122 |                     below_38_2 = True
123 |                     if highest_after_close <= level_23_6:
124 |                         below_23_6 = True
125 |                 # Calculate distance of current price with respect to the highest value in the retracement
126 |                 current_close  = stock_data["Close"].tail(1).values[-1]
127 |                 curr_diff = round(((current_close - highest_after_close) / (highest_after_close)) * 100, 2)
128 |             
129 |             if (below_50 or below_23_6 or below_38_2):
130 |                 new_row = pd.DataFrame({"Stock": stock, "Lowest Close": lowest_low_close, "Low Date": lowest_low_date, "High Prior": highest_Priorr_close, \
131 |                     "High Prior Date": highest_Priorr_date, "23_6 Retrace": below_23_6, "38_2 Retrace": below_38_2, "50_0 Retrace": below_50, \
132 |                         "Curr/High %": curr_diff}, index=[0])
133 |                 results_df = pd.concat([results_df, new_row])
134 | 
135 |         except Exception as e:
136 |             print("Error: " + stock)
137 |             print(e)
138 | 
139 |     # print(results_df)
140 |     write_dataframe_to_file(results_df, "Supply_Exhaustion_6M_")
141 |     print("Done")
142 | 
143 | if __name__ == "__main__":
144 |     main()
145 | 


--------------------------------------------------------------------------------
/py/ai/fininsightgpt/src/master_file_generator.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Master File Generator Module
  3 | 
  4 | This module handles the creation of the consolidated master markdown file from individual markdown files.
  5 | """
  6 | 
  7 | import os
  8 | import re
  9 | import logging
 10 | from pathlib import Path
 11 | from typing import List, Optional
 12 | import datetime
 13 | 
 14 | # Configure logging
 15 | logging.basicConfig(
 16 |     level=logging.INFO,
 17 |     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 18 | )
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | 
 22 | def generate_master_file(
 23 |     company_name: str,
 24 |     markdown_files: List[str],
 25 |     output_dir: Optional[str] = None
 26 | ) -> str:
 27 |     """Generate a consolidated master markdown file for a company.
 28 | 
 29 |     Args:
 30 |         company_name: Name of the company
 31 |         markdown_files: List of paths to markdown files to include
 32 |         output_dir: Directory to save the master file (defaults to company folder)
 33 | 
 34 |     Returns:
 35 |         Path to the generated master file
 36 |     """
 37 |     logger.info(f"Generating master file for {company_name} from {len(markdown_files)} markdown files")
 38 |     
 39 |     # Create timestamp for the master file
 40 |     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
 41 |     master_filename = f"{company_name}_master_{timestamp}.md"
 42 |     
 43 |     # Determine output directory
 44 |     if output_dir is None:
 45 |         # Try to infer from the first markdown file
 46 |         if markdown_files:
 47 |             first_file = Path(markdown_files[0])
 48 |             output_dir = first_file.parent.parent  # Go up one level from processed/
 49 |         else:
 50 |             output_dir = os.getcwd()
 51 |     
 52 |     output_path = Path(output_dir) / master_filename
 53 |     
 54 |     # Prepare master file content
 55 |     master_content = [
 56 |         f"# {company_name.upper()} - Consolidated Analysis",
 57 |         f"Generated on: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
 58 |         f"Number of source documents: {len(markdown_files)}",
 59 |         "\n---\n"
 60 |     ]
 61 |     
 62 |     # Table of Contents
 63 |     toc = ["## Table of Contents"]
 64 |     
 65 |     # Track sections for organizing content
 66 |     sections = {
 67 |         "Financial Data": [],
 68 |         "Business Overview": [],
 69 |         "Management": [],
 70 |         "Industry Analysis": [],
 71 |         "News & Media": [],
 72 |         "Miscellaneous": []
 73 |     }
 74 |     
 75 |     # Process each markdown file
 76 |     for idx, md_file in enumerate(markdown_files):
 77 |         try:
 78 |             with open(md_file, 'r', encoding='utf-8') as f:
 79 |                 content = f.read()
 80 |             
 81 |             # Extract filename for reference
 82 |             filename = Path(md_file).stem
 83 |             
 84 |             # Determine section based on content keywords
 85 |             section = "Miscellaneous"
 86 |             content_lower = content.lower()
 87 |             
 88 |             if any(kw in content_lower for kw in ["profit", "revenue", "financial", "balance sheet", "income", "statement", "ratio"]):
 89 |                 section = "Financial Data"
 90 |             elif any(kw in content_lower for kw in ["business", "product", "service", "segment", "overview"]):
 91 |                 section = "Business Overview"
 92 |             elif any(kw in content_lower for kw in ["ceo", "director", "management", "board"]):
 93 |                 section = "Management"
 94 |             elif any(kw in content_lower for kw in ["industry", "market", "competitor", "competition"]):
 95 |                 section = "Industry Analysis"
 96 |             elif any(kw in content_lower for kw in ["news", "press", "announcement", "media"]):
 97 |                 section = "News & Media"
 98 |             
 99 |             # Add to appropriate section
100 |             sections[section].append((filename, content))
101 |             
102 |             # Add to TOC
103 |             toc.append(f"- [{filename}](#{filename.lower().replace(' ', '-')})")
104 |             
105 |         except Exception as e:
106 |             logger.error(f"Error processing markdown file {md_file}: {str(e)}")
107 |             sections["Miscellaneous"].append((
108 |                 f"Error_{idx}",
109 |                 f"Error processing file {md_file}: {str(e)}"
110 |             ))
111 |     
112 |     # Add TOC to master content
113 |     master_content.extend(toc)
114 |     master_content.append("\n---\n")
115 |     
116 |     # Add content by section
117 |     for section_name, section_contents in sections.items():
118 |         if section_contents:
119 |             master_content.append(f"# {section_name}")
120 |             
121 |             for filename, content in section_contents:
122 |                 # Add section anchor
123 |                 master_content.append(f"<a id='{filename.lower().replace(' ', '-')}'></a>")
124 |                 
125 |                 # Clean up the content by removing the first heading if it matches the filename
126 |                 # This avoids duplication with our added heading
127 |                 content_lines = content.split("\n")
128 |                 if len(content_lines) > 0 and content_lines[0].startswith("# ") and filename in content_lines[0]:
129 |                     content = "\n".join(content_lines[1:])
130 |                 
131 |                 master_content.append(f"## {filename}")
132 |                 master_content.append(content)
133 |                 master_content.append("\n---\n")
134 |     
135 |     # Add metadata and summary section
136 |     master_content.append("# Metadata")
137 |     master_content.append("## Document Sources")
138 |     
139 |     sources_table = ["| Source | Type | Date Included |"]
140 |     sources_table.append("| --- | --- | --- |")
141 |     
142 |     for md_file in markdown_files:
143 |         file_path = Path(md_file)
144 |         file_type = file_path.suffix
145 |         file_date = datetime.datetime.fromtimestamp(os.path.getmtime(md_file)).strftime('%Y-%m-%d')
146 |         sources_table.append(f"| {file_path.stem} | {file_type} | {file_date} |")
147 |     
148 |     master_content.extend(sources_table)
149 |     
150 |     # Write the master file
151 |     try:
152 |         with open(output_path, 'w', encoding='utf-8') as f:
153 |             f.write("\n\n".join(master_content))
154 |         logger.info(f"Master file generated: {output_path}")
155 |     except Exception as e:
156 |         logger.error(f"Error writing master file: {str(e)}")
157 |         return ""
158 |     
159 |     return str(output_path)


--------------------------------------------------------------------------------
/py/eodhd/gareebman_entry_exit.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | We are working here on identifying my favorite point of a company's business 
  3 | when there is a turn around. This analysis will try to capture from a price 
  4 | movement perspective.
  5 | 
  6 | We solely rely on technical indicators for shortlisting in this scan. Ideally
  7 | we should look for long bases, and then, we see if price is bottoming and
  8 | then picking up.
  9 | 
 10 | To keep it simple, we will track only RSI and Volstop.
 11 | For favourable entries into watchlist we will look for, (in weekly timeframe)
 12 | rsi > threshold (45) and volstop in uptrend. We will check with the previous
 13 | weeks to see if we had a "False", and now we have a "True". This means entry.
 14 | We dont expect to see too many flip-flops.
 15 | We are also defining an exit (from the watchlist), if volstop is in downtrend.
 16 | Again the same logic of comparing with previous week will apply.
 17 | 
 18 | We are also keeping a count of the current "entry" or "exit". So, let us say
 19 | a "trend" is "entry" and "duration" is 8, it means entry condition satisfied 
 20 | 8 bars ago and continues to remain "entry" (without "exit" condition triggered)
 21 | 
 22 | So, do not confuse with the normal "entry" - "exit" terminology and method of 
 23 | trading. "entry" doesnt mean sell your house and take position. It means start
 24 | to track it.
 25 | '''
 26 | 
 27 | import pandas as pd
 28 | import numpy as np
 29 | import ta
 30 | from ta.volatility import AverageTrueRange
 31 | import datetime
 32 | import pricereader as pr
 33 | 
 34 | # Set output folder path
 35 | output_path = "output"
 36 | 
 37 | # Read the list of stocks from the CSV file
 38 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
 39 | 
 40 | # Interval
 41 | data_interval_weekly = 'w'
 42 | 
 43 | # RSI interval
 44 | rsi_length = 14
 45 | # RSI weekly threshold
 46 | rsi_weekly_threshold = 45
 47 | 
 48 | def rsi(data):
 49 |     # Calculate the RSI
 50 |     data['rsi'] = ta.momentum.RSIIndicator(data['Close'], window=rsi_length).rsi()
 51 |     return data
 52 | 
 53 | def calculate_true_range(df):
 54 |     high_low = df['High'] - df['Low']
 55 |     high_close = np.abs(df['High'] - df['Close'].shift())
 56 |     low_close = np.abs(df['Low'] - df['Close'].shift())
 57 |     true_ranges = pd.concat([high_low, high_close, low_close], axis=1)
 58 |     return true_ranges.max(axis=1)
 59 | 
 60 | def calculate_atr(df, atrlen):
 61 |     df['TR'] = calculate_true_range(df)
 62 |     return df['TR'].rolling(window=atrlen, min_periods=1).mean()
 63 | 
 64 | def vol_stop(df, atrlen=10, atrfactor=2.0):
 65 |     df['ATR'] = calculate_atr(df, atrlen) * atrfactor
 66 |     max_val = df['Close'].iloc[0]
 67 |     min_val = df['Close'].iloc[0]
 68 |     uptrend = True
 69 |     stop = 0.0
 70 | 
 71 |     stops = []
 72 |     uptrends = []
 73 | 
 74 |     for index, row in df.iterrows():
 75 |         max_val = max(max_val, row['Close'])
 76 |         min_val = min(min_val, row['Close'])
 77 |         atrM = row['ATR']
 78 | 
 79 |         if uptrend:
 80 |             stop = max(stop, max_val - atrM)
 81 |         else:
 82 |             stop = min(stop, min_val + atrM)
 83 | 
 84 |         if row['Close'] - stop >= 0.0:
 85 |             uptrend = True
 86 |         else:
 87 |             uptrend = False
 88 | 
 89 |         if uptrend != uptrends[-1] if uptrends else True:
 90 |             max_val = row['Close']
 91 |             min_val = row['Close']
 92 |             stop = max_val - atrM if uptrend else min_val + atrM
 93 | 
 94 |         stops.append(stop)
 95 |         uptrends.append(uptrend)
 96 | 
 97 |     df['VolStop'] = stops
 98 |     df['Uptrend'] = uptrends
 99 |     return df
100 | 
101 | def main():
102 |     print("Started...")
103 |     # Create the DataFrame
104 |     result_df = pd.DataFrame(columns=['stock', 'Close', 'VolStop10_2.0', 'RSI(14)', 'Entry', 'Exit', 'Trend', 'Duration'])
105 |      # Iterate through the list of stocks
106 |     for stock in stocks["Ticker"]:
107 |         try:
108 |             # Get the stock data
109 |             data = pr.get_price_data(stock, data_interval_weekly)
110 |             # Drop those with NaN
111 |             data = data.dropna()
112 | 
113 |             # Get RSI data
114 |             data = rsi(data)
115 | 
116 |             # Get VolStop
117 |             data = vol_stop(data)
118 | 
119 |             # Creating the 'entry' column
120 |             data['entry'] = (data['rsi'] > rsi_weekly_threshold) & data['Uptrend']
121 |             
122 |             # Creating the 'exit' column
123 |             data['exit'] = ~data['Uptrend']
124 | 
125 |             # Check entry toggle
126 |             entry =  data['entry'].iloc[-1] and not data['entry'].iloc[-2]
127 | 
128 |             # Check exit toggle
129 |             exit =  data['exit'].iloc[-1] and not data['exit'].iloc[-2]
130 | 
131 |             # Combine the 'entry' and 'exit' columns into a single column representing the current trend
132 |             data['trend'] = np.where(data['entry'], 'entry', 'exit')
133 | 
134 |             # Identify where the trend changes
135 |             trend_changes = data['trend'].ne(data['trend'].shift()).cumsum()
136 | 
137 |             # Group by these changes and count within each group
138 |             data['trend_duration'] = data.groupby(trend_changes).cumcount() + 1
139 | 
140 |             row = {}
141 | 
142 |             if (entry or exit):
143 |                 row = {'stock': stock,'Close': str(round(data['Close'].iloc[-1], 2)),'VolStop10_2.0':str(round(data['VolStop'].iloc[-1])), \
144 |                        'RSI(14)':str(round(data['rsi'].iloc[-1])), 'Trend': data['trend'].iloc[-1], \
145 |                         'Duration': data['trend_duration'].iloc[-1], 'Entry':entry,'Exit':exit}
146 |             else:
147 |                 row = {'stock': stock,'Close': str(round(data['Close'].iloc[-1], 2)),'VolStop10_2.0':str(round(data['VolStop'].iloc[-1])), \
148 |                        'RSI(14)':str(round(data['rsi'].iloc[-1])), 'Trend': data['trend'].iloc[-1], \
149 |                         'Duration': data['trend_duration'].iloc[-1], 'Entry':'-','Exit':'-'}
150 |             
151 |             # Append the new row to the DataFrame
152 |             result_df.loc[len(result_df)] = row
153 | 
154 |         except Exception as e:
155 |             print("Error: " + stock)
156 |             print(e)
157 | 
158 |     # Append current timestamp to the file name
159 |     now = datetime.datetime.now()
160 |     timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
161 |     file_name = f'{output_path}/gareebman_report_' + timestamp + '.csv'
162 |     # Export the DataFrame to CSV
163 |     result_df.to_csv(file_name, index=False)
164 |     print('Done')    
165 | 
166 | if __name__ == "__main__":
167 |     main()
168 | 


--------------------------------------------------------------------------------
/py/ai/turnaround/README.md:
--------------------------------------------------------------------------------
  1 | # Business Turnaround Detection System
  2 | 
  3 | An AI-powered financial analysis tool that identifies potential business turnarounds by analyzing companies listed in a CSV file. The system uses advanced AI agents to research financial data and market conditions for each company, generating comprehensive markdown reports with turnaround potential verdicts.
  4 | 
  5 | ## 🎯 Purpose
  6 | 
  7 | This tool is designed to help investors and analysts identify companies that may be experiencing business turnarounds by:
  8 | - Fetching latest financial reports and news
  9 | - Analyzing financial health indicators
 10 | - Determining turnaround potential with AI-driven insights
 11 | - Generating structured markdown reports for each company
 12 | 
 13 | ## 📁 Project Structure
 14 | 
 15 | ```
 16 | turnaround/
 17 | ├── main.py                 # Main execution script
 18 | ├── data/
 19 | │   └── financial_data.csv  # Input CSV with company data
 20 | ├── my_tools/               # Custom tools for the AI agent
 21 | │   ├── __init__.py
 22 | │   ├── cmd_executor.py     # Shell command execution tool
 23 | │   ├── fs_reader.py        # File system reader tool
 24 | │   ├── markdown_report.py  # Report generation tool
 25 | │   └── web_fetcher.py      # Web search tool
 26 | ├── output/                 # Generated reports directory
 27 | └── README.md              # This file
 28 | ```
 29 | 
 30 | ## 🔧 Prerequisites
 31 | 
 32 | Before running this project, ensure you have:
 33 | 
 34 | 1. **Python 3.8+** installed
 35 | 2. **OpenAI API Key** - Required for the AI agent
 36 | 3. **Internet connection** - For web research functionality
 37 | 
 38 | ## 📦 Installation & Setup
 39 | 
 40 | ### 1. Install Required Dependencies
 41 | 
 42 | #### Option A: Using requirements.txt (Recommended)
 43 | ```bash
 44 | pip install -r requirements.txt
 45 | ```
 46 | 
 47 | #### Option B: Manual Installation
 48 | ```bash
 49 | pip install smolagents python-dotenv openai litellm pandas numpy requests
 50 | ```
 51 | 
 52 | ### 2. Environment Configuration
 53 | 
 54 | Create a `.env` file in the project root directory:
 55 | 
 56 | ```bash
 57 | touch .env
 58 | ```
 59 | 
 60 | Add your OpenAI API key to the `.env` file:
 61 | 
 62 | ```
 63 | OPENAI_API_KEY=your_openai_api_key_here
 64 | ```
 65 | 
 66 | ### 3. Prepare Input Data
 67 | 
 68 | Ensure your `data/financial_data.csv` file follows this format:
 69 | 
 70 | ```csv
 71 | Name,BSE Code,NSE Code
 72 | 63 Moons Tech.,526881,63MOONS
 73 | Apex Frozen Food,540692,APEX
 74 | Arman Financial,531179,ARMANFIN
 75 | ```
 76 | 
 77 | **Required Columns:**
 78 | - `Name`: Company name (required)
 79 | - `BSE Code`: Bombay Stock Exchange code (optional)
 80 | - `NSE Code`: National Stock Exchange code (optional)
 81 | 
 82 | ### 4. Create Output Directory
 83 | 
 84 | ```bash
 85 | mkdir -p output
 86 | ```
 87 | 
 88 | ## 🚀 Usage
 89 | 
 90 | ### Basic Execution
 91 | 
 92 | Run the turnaround analysis:
 93 | 
 94 | ```bash
 95 | cd /path/to/turnaround
 96 | python main.py
 97 | ```
 98 | 
 99 | ### What Happens During Execution
100 | 
101 | 1. **Data Loading**: Reads companies from `data/financial_data.csv`
102 | 2. **AI Analysis**: For each company, the AI agent:
103 |    - Searches web for latest financial reports
104 |    - Gathers recent news and market data
105 |    - Analyzes financial health indicators
106 |    - Determines turnaround potential
107 | 3. **Report Generation**: Creates detailed markdown reports in the `output/` directory
108 | 
109 | ### Sample Output
110 | 
111 | Reports are saved as: `output/{business_name}{timestamp}_report.md`
112 | 
113 | Each report includes:
114 | - **Business Name & Codes**
115 | - **Summary of Financial Data**
116 | - **Analysis of Financial Health**
117 | - **Turnaround Potential Verdict**: "Strong Turnaround", "Weak Turnaround", or "No Turnaround"
118 | 
119 | ## 🔧 Configuration
120 | 
121 | ### Model Configuration
122 | 
123 | The system uses OpenAI's GPT-4.1-mini by default. To change the model, modify the `model` variable in `main.py`:
124 | 
125 | ```python
126 | model = LiteLLMModel(model_id="openai/gpt-4-turbo", api_key=os.getenv("OPENAI_API_KEY"))
127 | ```
128 | 
129 | ### Analysis Steps
130 | 
131 | The AI agent follows these steps:
132 | 1. Company identification and code mapping
133 | 2. Web research for financial data and news
134 | 3. Financial health analysis
135 | 4. Turnaround potential assessment
136 | 5. Report generation and saving
137 | 
138 | ## 📊 Best Practices
139 | 
140 | ### When to Run
141 | - **Ideal timing**: After quarterly earnings season
142 | - **Frequency**: Quarterly or semi-annually for best results
143 | - **Market conditions**: Consider running during market downturns for maximum turnaround identification
144 | 
145 | ### Data Quality
146 | - Ensure company names and stock codes are accurate
147 | - Remove delisted or defunct companies from the CSV
148 | - Update the CSV with new companies of interest
149 | 
150 | ## 🛠️ Troubleshooting
151 | 
152 | ### Common Issues
153 | 
154 | 1. **Missing API Key**
155 |    ```
156 |    Error: OpenAI API key not found
157 |    Solution: Check your .env file and ensure OPENAI_API_KEY is set
158 |    ```
159 | 
160 | 2. **CSV File Not Found**
161 |    ```
162 |    Error: The financial data file data/financial_data.csv does not exist
163 |    Solution: Ensure the CSV file exists in the data/ directory
164 |    ```
165 | 
166 | 3. **Network Issues**
167 |    ```
168 |    Error: Web search failed
169 |    Solution: Check internet connection and API quotas
170 |    ```
171 | 
172 | 4. **Permission Errors**
173 |    ```
174 |    Error: Cannot write to output directory
175 |    Solution: Ensure output/ directory exists and has write permissions
176 |    ```
177 | 
178 | ### Debugging
179 | 
180 | Enable verbose logging by modifying the agent configuration:
181 | 
182 | ```python
183 | response = agent.run(final_instructions, max_steps=20, verbose=True)
184 | ```
185 | 
186 | ## 📈 Output Interpretation
187 | 
188 | ### Turnaround Verdicts
189 | 
190 | - **Strong Turnaround**: Company shows clear signs of recovery with improving fundamentals
191 | - **Weak Turnaround**: Some positive indicators but recovery uncertain
192 | - **No Turnaround**: No significant improvement indicators found
193 | 
194 | ### Report Sections
195 | 
196 | Each generated report contains:
197 | - Executive summary with verdict
198 | - Financial metrics analysis
199 | - Market sentiment and news analysis
200 | - Risk factors and considerations
201 | - Timeline for potential recovery
202 | 
203 | ## 🤝 Contributing
204 | 
205 | To enhance this tool:
206 | 1. Add new analysis tools in the `my_tools/` directory
207 | 2. Extend the financial metrics analysis
208 | 3. Improve web scraping capabilities
209 | 4. Add visualization features
210 | 
211 | ## ⚠️ Disclaimer
212 | 
213 | This tool is for informational purposes only and should not be considered as financial advice. Always conduct thorough due diligence and consult with financial professionals before making investment decisions.
214 | 
215 | ## 📝 License
216 | 
217 | This project is part of the BharatTrader stock analysis suite. Please refer to the main project license for usage terms.
218 | 


--------------------------------------------------------------------------------
/py/yf/ss_result_parser.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Stock Result Analysis Script for Screener Source Data
  4 | 
  5 | This script processes stock data from ss_result_file.csv, which contains stock information
  6 | from Screener. For each stock, it downloads historical price data from Yahoo Finance
  7 | and calculates various performance metrics relative to benchmark.
  8 | 
  9 | The script:
 10 | 1. Reads stock information from a CSV file with 'companyId' format as 'NSE:SYMBOL' or 'BSE:SYMBOL'
 11 | 2. Downloads historical price data for each stock using yfinance
 12 | 3. Calculates performance metrics (stock change %, benchmark change %, Alpha, ARS)
 13 | 4. Saves the enriched data to a new CSV file
 14 | 
 15 | Usage:
 16 |     python ss_result_parser.py
 17 | """
 18 | 
 19 | # Standard library imports
 20 | import datetime
 21 | import numpy as np
 22 | import pandas as pd
 23 | import yfinance as yf
 24 | 
 25 | # Constants
 26 | ARS_DATE = "2024-05-10"  # ARS (Adaptive Relative Strength) reference date
 27 | START_DATE = '2024-01-01'  # Beginning of analysis period
 28 | END_DATE = (datetime.datetime.now() + datetime.timedelta(days=1)).strftime('%Y-%m-%d')  # today + 1 day
 29 | 
 30 | RESULT_FILE = "ss_result_file.csv"
 31 | OUTPUT_FILE = "final_ss_result_parser.csv"
 32 | 
 33 | 
 34 | def main():
 35 |     """
 36 |     Main function to process stock data and calculate performance metrics.
 37 |     """
 38 |     print('Started... with yfinance version:', yf.__version__)
 39 |     
 40 |     # Use yfinance to retrieve the benchmark data
 41 |     benchmark_ticker = yf.Ticker("^NSEI")  # NIFTY 50 Index
 42 |     benchmark_data = benchmark_ticker.history(start=START_DATE, end=END_DATE, interval='1d', auto_adjust=False, prepost=False)
 43 |     benchmark_data = benchmark_data.dropna()
 44 | 
 45 |     # Read the result file
 46 |     result = pd.read_csv(RESULT_FILE)
 47 |     result = result.dropna(subset=['companyId'])  # Only drop rows with no companyId
 48 |     
 49 |     # Process each stock
 50 |     for index, row in result.iterrows():
 51 |         try:
 52 |             # Extract exchange and symbol from companyId
 53 |             company_id_parts = row['companyId'].split(':')
 54 |             exchange = company_id_parts[0]
 55 |             symbol = company_id_parts[1]
 56 |             
 57 |             print(f"Processing {row['Name']}...")
 58 |             
 59 |             # Set ticker format based on exchange
 60 |             if exchange == "NSE":
 61 |                 stk_ticker = symbol + '.NS'
 62 |             elif exchange == "BSE":
 63 |                 stk_ticker = symbol + '.BO'
 64 |             else:
 65 |                 print(f"Unknown exchange for {row['companyId']}")
 66 |                 continue
 67 |                 
 68 |             stk_ticker = yf.Ticker(stk_ticker)
 69 |             stock_data = stk_ticker.history(start=START_DATE, end=END_DATE, interval='1d', auto_adjust=False, prepost=False)
 70 |             
 71 |             if stock_data.empty:
 72 |                 print(f"No data available for {row['companyId']}")
 73 |                 continue
 74 |                 
 75 |             # Fetch Result Date, and then fetch the price on that date from stock_data.
 76 |             if pd.isna(row['Last Result Date']):
 77 |                 print(f"No result date for {row['companyId']}")
 78 |                 continue
 79 |                 
 80 |             result_date = datetime.datetime.strptime(row['Last Result Date'], '%Y-%m-%d').strftime('%Y-%m-%d')
 81 |             result_price = 0.00
 82 |             
 83 |             # Get the last date in the stock data
 84 |             last_date = stock_data.index[-1].strftime('%Y-%m-%d')
 85 |             if last_date < result_date:
 86 |                 print(f"Error: {row['companyId']} => Result Date {result_date} is greater than last date in stock data {last_date}")
 87 |                 continue
 88 |             
 89 |             # If price not found on result date, try following dates
 90 |             while result_date <= last_date:
 91 |                 try:
 92 |                     result_price = stock_data.loc[stock_data.index == result_date, "Close"].values[0]
 93 |                     break
 94 |                 except:
 95 |                     result_date = (datetime.datetime.strptime(result_date, '%Y-%m-%d') + datetime.timedelta(days=1)).strftime('%Y-%m-%d')
 96 |                     continue
 97 |             
 98 |             # Calculate and add stock performance metrics
 99 |             add_stock_metrics(result, index, stock_data, result_date, result_price)
100 |             
101 |             # Calculate and add benchmark performance metrics
102 |             add_benchmark_metrics(result, index, benchmark_data, result_date)
103 |             
104 |             # Calculate alpha and ARS
105 |             calculate_comparative_metrics(result, index, stock_data, benchmark_data)
106 | 
107 |         except Exception as e:
108 |             print(f'Error processing {row.get("companyId", "unknown")}: {e}')
109 |             continue
110 |             
111 |     # Save the result file
112 |     result.to_csv(OUTPUT_FILE, index=False)
113 |     print(f"Processing complete. Results saved to {OUTPUT_FILE}")
114 | 
115 | 
116 | def add_stock_metrics(result_df, index, stock_data, result_date, result_price):
117 |     """
118 |     Calculate and add stock-specific metrics to the result dataframe.
119 |     
120 |     Args:
121 |         result_df: The dataframe containing stock information
122 |         index: The row index in the dataframe
123 |         stock_data: Historical stock data from yfinance
124 |         result_date: The date when the result was announced
125 |         result_price: The stock price on the result date
126 |     """
127 |     result_df.at[index, 'Result Date Price'] = round(result_price, 2)
128 |     result_df.at[index, 'Last Close Date'] = stock_data.index[-1].strftime('%Y-%m-%d')
129 |     result_df.at[index, 'Last Close Price'] = round(stock_data['Close'].iloc[-1], 2)
130 |     result_df.at[index, '% Stock change'] = round((stock_data['Close'].iloc[-1] - result_price) / result_price * 100, 2)
131 | 
132 | 
133 | def add_benchmark_metrics(result_df, index, benchmark_data, result_date):
134 |     """
135 |     Calculate and add benchmark metrics to the result dataframe.
136 |     
137 |     Args:
138 |         result_df: The dataframe containing stock information
139 |         index: The row index in the dataframe
140 |         benchmark_data: Historical benchmark data from yfinance
141 |         result_date: The date when the result was announced
142 |     """
143 |     benchmark_result_price = benchmark_data.loc[benchmark_data.index == result_date, "Close"].values[0]
144 |     result_df.at[index, 'Result Date Benchmark Price'] = round(benchmark_result_price, 2)
145 |     result_df.at[index, 'Last Benchmark Date'] = benchmark_data.index[-1].strftime('%Y-%m-%d')
146 |     result_df.at[index, 'Last Benchmark Price'] = round(benchmark_data['Close'].iloc[-1], 2)
147 |     result_df.at[index, '% Benchmark change'] = round((benchmark_data['Close'].iloc[-1] - benchmark_result_price) / benchmark_result_price * 100, 2)
148 | 
149 | 
150 | def calculate_comparative_metrics(result_df, index, stock_data, benchmark_data):
151 |     """
152 |     Calculate comparative performance metrics like Alpha and ARS.
153 |     
154 |     Args:
155 |         result_df: The dataframe containing stock information
156 |         index: The row index in the dataframe
157 |         stock_data: Historical stock data from yfinance
158 |         benchmark_data: Historical benchmark data from yfinance
159 |     """
160 |     # Calculate alpha (stock performance relative to benchmark)
161 |     result_df.at[index, 'Alpha'] = result_df.at[index, '% Stock change'] - result_df.at[index, '% Benchmark change']
162 | 
163 |     # Calculate ARS (Adaptive Relative Strength)
164 |     try:
165 |         result_df.at[index, 'ARS'] = round(
166 |             (stock_data['Close'].iloc[-1] / stock_data.loc[stock_data.index == ARS_DATE, "Close"].values[0]) / 
167 |             (benchmark_data['Close'].iloc[-1] / benchmark_data.loc[benchmark_data.index == ARS_DATE, "Close"].values[0]) - 1, 2)
168 |     except:
169 |         result_df.at[index, 'ARS'] = 0.00  # Error in calculating ARS, set it to 0.00
170 | 
171 | 
172 | if __name__ == "__main__":
173 |     main()


--------------------------------------------------------------------------------
/py/ai/nse_announcements/weekly_nse_announcements_analysis.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import requests
  3 | import fitz  # PyMuPDF
  4 | import os
  5 | from openai import OpenAI
  6 | from urllib.parse import urlparse
  7 | from dotenv import load_dotenv, find_dotenv
  8 | from datetime import datetime
  9 | import argparse
 10 | import logging
 11 | 
 12 | log_timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
 13 | LOCAL_MODEL = '' #'llama3.1:latest'  # keep it blank if, gpt is used
 14 | LOCAL_URL = 'http://10.0.0.4:7862/v1'  # Update with cloud URL or Local
 15 | GPT_MODEL = 'gpt-4o-mini'  # if LOCAL_MODEL is blank, GPT will be used
 16 | CONTEXT_LEN = 1500
 17 | 
 18 | # Logging configuration
 19 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 20 | logger = logging.getLogger()
 21 | 
 22 | # Load environment variables
 23 | def set_api():
 24 |     load_dotenv(find_dotenv())
 25 |     return os.getenv('OPENAI_API_KEY')
 26 | 
 27 | # Get LLM client (GPT or local)
 28 | def get_llm_client_model():
 29 |     if not LOCAL_MODEL:
 30 |         gpt_client = OpenAI(api_key=set_api())
 31 |         return gpt_client, GPT_MODEL
 32 |     else:
 33 |         my_local_client = OpenAI(base_url=LOCAL_URL, api_key="local-llm")
 34 |         return my_local_client, LOCAL_MODEL
 35 | 
 36 | client, model = get_llm_client_model()
 37 | 
 38 | critical_subjects = [
 39 |     "Updates", "Press Release", "Financial Result Updates", "Sale or Disposal-XBRL",
 40 |     "Acquisition-XBRL", "Record Date", "Investor Presentation",
 41 |     "Change in Directors/Key Managerial Personnel/Auditor/Compliance Officer/Share Transfer Agent",
 42 |     "Acquisition", "Scheme of Arrangement", "Resignation", "Appointment", 
 43 |     "Date of Payment of Dividend", "Dividend", "Increase in Authorised Capital",
 44 |     "Credit Rating", "Rights Issue", "Public Announcement-Open Offer"
 45 | ]
 46 | 
 47 | routine_updates_subjects = [
 48 |     "Shareholders meeting", "Outcome of Board Meeting", "Copy of Newspaper Publication",
 49 |     "Analysts/Institutional Investor Meet/Con. Call Updates", "Loss/Duplicate-Share Certificate-XBRL",
 50 |     "Board Meeting Intimation", "Trading Window-XBRL", "Notice Of Shareholders Meetings-XBRL",
 51 |     "Change in Director(s)", "ESOP/ESOS/ESPS", "Clarification - Financial Results",
 52 |     "Corporate Insolvency Resolution Process-XBRL", "Limited Review Report",
 53 |     "Disclosure under SEBI (PIT) Reg 2015"
 54 | ]
 55 | 
 56 | # Function to download and extract PDF or XML text
 57 | def download_and_extract_pdf(url, local_path):
 58 |     # Skip download if file already exists
 59 |     if os.path.exists(local_path):
 60 |         logger.info(f"File already exists locally: {local_path}")
 61 |         return extract_pdf_text(local_path)
 62 | 
 63 |     try:
 64 |         # Make the request to download the file
 65 |         response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
 66 |         response.raise_for_status()  # Ensure no bad response
 67 | 
 68 |         # Check the Content-Type header to determine file type
 69 |         content_type = response.headers.get('Content-Type')
 70 |         file_extension = None
 71 |         
 72 |         if 'application/pdf' in content_type:
 73 |             file_extension = 'pdf'
 74 |         elif 'application/xml' in content_type:
 75 |             file_extension = 'xml'
 76 |         
 77 |         # Ensure we append the correct file extension to local_path
 78 |         if file_extension:
 79 |             local_path += f'.{file_extension}'
 80 |         else:
 81 |             logger.warning(f"Unknown content type: {content_type}. Assuming default .pdf")
 82 |             file_extension = 'pdf'
 83 |             local_path += '.pdf'
 84 |         
 85 |         # Write the file to the local path
 86 |         with open(local_path, 'wb') as f:
 87 |             f.write(response.content)
 88 |         
 89 |         # Extract text based on file type
 90 |         if file_extension == 'pdf':
 91 |             return extract_pdf_text(local_path)
 92 |         elif file_extension == 'xml':
 93 |             return extract_xml_text(local_path)
 94 |         else:
 95 |             logger.error(f"Unsupported file type: {file_extension}")
 96 |             return ""
 97 |     
 98 |     except requests.RequestException as e:
 99 |         logger.error(f"Failed to download {url}: {e}")
100 |         return ""
101 | 
102 | # Function to extract text from PDF
103 | def extract_pdf_text(local_path):
104 |     try:
105 |         doc = fitz.open(local_path)
106 |         text = "".join(page.get_text() for page in doc)
107 |         return text
108 |     except Exception as e:
109 |         logger.error(f"Failed to extract text from PDF {local_path}: {e}")
110 |         return ""
111 | 
112 | # Function to extract text from XML
113 | def extract_xml_text(local_path):
114 |     try:
115 |         with open(local_path, 'r') as f:
116 |             return f.read()
117 |     except Exception as e:
118 |         logger.error(f"Failed to extract text from XML file {local_path}: {e}")
119 |         return ""
120 | 
121 | # Truncate text to context length
122 | def truncate_words(text):
123 |     words = text.split()
124 |     return ' '.join(words[:CONTEXT_LEN]) if len(words) > CONTEXT_LEN else text
125 | 
126 | # Get summary and sentiment using OpenAI API
127 | def get_summary_and_sentiment(text):
128 |     truncated_text = truncate_words(text)
129 |     try:
130 |         response = client.chat.completions.create(
131 |             model=model, temperature=1.0, max_tokens=500,
132 |             messages=[
133 |                 {"role": "user", "content": "Please summarize the company announcement provided."},
134 |                 {"role": "user", "content": truncated_text}
135 |             ]
136 |         )
137 |         summary = response.choices[0].message.content
138 |         sentiment_response = client.chat.completions.create(
139 |             model=model, temperature=1.0, max_tokens=20,
140 |             messages=[
141 |                 {"role": "user", "content": f"Provide an investor sentiment analysis score in a scale between 0 (negative sentiment) to 1 (positive sentiment) for the following text. The answer should be a single float value, no explanation is required: {summary}"}
142 |             ]
143 |         )
144 |         sentiment_score = float(sentiment_response.choices[0].message.content.strip())
145 |         return summary, sentiment_score
146 |     except Exception as e:
147 |         logger.error(f"Error in generating summary/sentiment: {e}")
148 |         return "", -1.0
149 | 
150 | # Write result to file
151 | def write_to_file(file, data):
152 |     with open(file, 'a') as f:
153 |         f.write(data)
154 | 
155 | # Main processing function
156 | def process_announcement(index, row, stock):
157 |     pdf_url = row['ATTACHMENT']
158 |     filename = os.path.basename(urlparse(pdf_url).path)
159 |     pdf_local_path = os.path.join('notifications', filename)
160 |     pdf_text = download_and_extract_pdf(pdf_url, pdf_local_path)
161 |     summary, sentiment_score = get_summary_and_sentiment(pdf_text)
162 |     return {
163 |         'Stock': stock, 'Company': row['COMPANY NAME'], 'Subject': row['SUBJECT'],
164 |         'Summary': summary, 'Score': sentiment_score, 'Link': row['ATTACHMENT']
165 |     }
166 | 
167 | # Main function
168 | def main():
169 |     # Parse command line arguments
170 |     parser = argparse.ArgumentParser(description='Analyze announcements')
171 |     parser.add_argument('--file', type=str, help='Input file path')
172 |     parser.add_argument('--start', type=str, help='Stock to start from in stocks.csv')
173 |     args = parser.parse_args()
174 | 
175 |     try:
176 |         stocks = pd.read_csv("stocks.csv", usecols=["Ticker"])
177 |         df = pd.read_csv(args.file)
178 |         df = df[~df['SUBJECT'].isin(routine_updates_subjects) & df['SUBJECT'].isin(critical_subjects)]
179 |         logger.info(f"Analyzing {len(df)} announcements")
180 | 
181 |         result_df = pd.DataFrame(columns=['Stock', 'Company', 'Subject', 'Summary', 'Score', 'Link'])
182 | 
183 |         for stock in stocks["Ticker"]:
184 |             for index, row in df[df['SYMBOL'] == stock].iterrows():
185 |                 try:
186 |                     result = process_announcement(index, row, stock)
187 |                     # Append the new row to the DataFrame
188 |                     result_df.loc[len(result_df)] = result
189 |                 except Exception as e:
190 |                     logger.error(f"Error processing {stock}: {e}")
191 | 
192 |         file_name = f'output/{args.file}_report_{log_timestamp}.csv'
193 |         result_df.to_csv(file_name, index=False)
194 |         logger.info(f"Results saved to {file_name}")
195 | 
196 |     except Exception as e:
197 |         logger.error(f"Error during processing: {e}")
198 | 
199 | if __name__ == "__main__":
200 |     main()
201 | 


--------------------------------------------------------------------------------
/py/eodhd/mip12_scanner.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Momentum Investing Scanner (MIP‑12) (Modified from Prashanth Sir’s book)
  3 | 
  4 | This module implements a momentum‑based stock scanner following the “MIP‑12” strategy 
  5 | from Prashanth Sir’s recent book. It filters and ranks Nifty 500 stocks by multiple 
  6 | technical criteria and outputs a CSV report. The original algorithm has been modified 
  7 | to include a ranking metric based on the Sharpe ratio, rather than Volar as that is proprietary.
  8 | 
  9 | --- Overview ---
 10 | 1. Market Trend Filter:
 11 |     Checks if the benchmark index (e.g., Nifty 500) is above its 20‑day EMA.
 12 | 2. Entry Filters (applied only when market is bullish):
 13 |     • 52‑Week High Retracement: stock must be within 50% of its 52‑week high.
 14 |     • 200‑Day EMA: stock’s latest close must exceed its 200‑day EMA.
 15 | 3. Ranking Metric:
 16 |     Computes a simple Sharpe ratio (mean daily return ÷ standard deviation of daily returns).
 17 | 4. Final Selection:
 18 |     • If market is bullish: all stocks passing entry filters are ranked by Sharpe ratio.
 19 |     • If market is bearish: no new entries are considered, but ranking is still performed.
 20 | 5. Output:
 21 |     • `mip12_scan_report.csv` with columns: 
 22 |          Ticker, Rank#, Price, 52W_High, 200D_EMA, Sharpe_Ratio
 23 |     • `mip12_scan_errors.csv` capturing any per‑symbol exceptions.
 24 | 
 25 | --- Functions ---
 26 | market_trend_filter(benchmark_df, ema_period=20) → bool  
 27 | get_52w_high(stock_df, period=252) → float  
 28 | get_200d_ema(stock_df, period=200) → float  
 29 | compute_sharpe_ratio(stock_df) → float  
 30 | 
 31 | --- Main Flow ---
 32 | 1. Load benchmark data.
 33 | 2. Determine `is_bullish` flag based on the market trend filter.
 34 | 3. Loop over each symbol:
 35 |     a. Load its price series.
 36 |     b. If bullish, enforce entry filters (52W High Retracement and 200D EMA).
 37 |     c. Compute Sharpe ratio for ranking.
 38 |     d. Append record (Ticker, Price, 52W_High, 200D_EMA, Sharpe_Ratio).
 39 |     e. Catch and log any exceptions per symbol.
 40 | 4. Build a DataFrame, sort by Sharpe ratio, and insert Rank#.
 41 | 5. Export the report and any errors to CSV.
 42 | 
 43 | --- Logging & Error Handling ---
 44 | - Uses Python’s `logging` module to record INFO and ERROR messages.
 45 | - Errors for individual symbols are collected and saved to `mip12_scan_errors.csv`.
 46 | 
 47 | Usage:
 48 |      python mip12_scanner.py
 49 | 
 50 | """
 51 | import pricereader as pr
 52 | import pandas as pd
 53 | import numpy as np
 54 | import logging
 55 | 
 56 | # Configure logging
 57 | logging.basicConfig(
 58 |     level=logging.INFO,
 59 |     format='%(asctime)s %(levelname)s: %(message)s',
 60 |     datefmt='%Y-%m-%d %H:%M:%S'
 61 | )
 62 | 
 63 | # Interval
 64 | data_interval = 'd'
 65 | 
 66 | # Benchmark symbol
 67 | benchmark = "CRSLDX"  # Nifty 500 Index
 68 | 
 69 | # Read the list of stocks from the CSV file
 70 | stocks = pd.read_csv("nifty500.csv", header=0, usecols=["Ticker"])
 71 | 
 72 | # --- Helper functions ---
 73 | 
 74 | def market_trend_filter(benchmark_df: pd.DataFrame,
 75 |                         ema_period: int = 20,
 76 |                         price_col: str = 'Close') -> bool:
 77 |     """Return True if latest benchmark Close > its EMA."""
 78 |     ema = benchmark_df[price_col].ewm(span=ema_period, adjust=False).mean()
 79 |     return benchmark_df[price_col].iloc[-1] > ema.iloc[-1]
 80 | 
 81 | def get_52w_high(stock_df: pd.DataFrame,
 82 |                  period: int = 252,
 83 |                  price_col: str = 'Close') -> float:
 84 |     """Return the 52‑week high price, or NaN if insufficient data."""
 85 |     closes = stock_df[price_col].dropna()
 86 |     if len(closes) < period:
 87 |         return float('nan')
 88 |     return closes.iloc[-period:].max()
 89 | 
 90 | def get_200d_ema(stock_df: pd.DataFrame,
 91 |                 period: int = 200,
 92 |                 price_col: str = 'Close') -> float:
 93 |     """Return the most recent 200‑day EMA, or NaN if insufficient data."""
 94 |     closes = stock_df[price_col].dropna()
 95 |     if len(closes) < period:
 96 |         return float('nan')
 97 |     ema = closes.ewm(span=period, adjust=False).mean()
 98 |     return ema.iloc[-1]
 99 | 
100 | def passes_ratio_200d_ema(stock_df: pd.DataFrame,
101 |                           benchmark_df: pd.DataFrame,
102 |                           period: int = 200,
103 |                           price_col: str = 'Close') -> bool:
104 |     """
105 |     Return True if the latest ratio of stock/benchmark Close is above
106 |     its 200‑day EMA on the ratio series.
107 |     """
108 |     # align on common dates
109 |     ratio = (stock_df[price_col] / benchmark_df[price_col]).dropna()
110 |     if len(ratio) < period:
111 |         return False
112 |     ema = ratio.ewm(span=period, adjust=False).mean()
113 |     return ratio.iloc[-1] > ema.iloc[-1]
114 | 
115 | def compute_sharpe_ratio(stock_df: pd.DataFrame,
116 |                          price_col: str = 'Close',
117 |                          period: int = 252) -> float:
118 |     """
119 |     Compute the Sharpe ratio as mean(daily returns) / std(daily returns) for the last `period` days.
120 |     Returns 0.0 if there is insufficient data or if the annualized volatility is zero.
121 |     """
122 |     df_1y = stock_df.tail(period).copy()
123 | 
124 |     # Calculate 12M ROC
125 |     current_price = df_1y['Close'].iloc[-1]
126 |     price_1y_ago = df_1y['Close'].iloc[0]
127 |     roc_12m = (current_price / price_1y_ago) - 1
128 | 
129 |     # Daily returns & volatility
130 |     df_1y['daily_return'] = df_1y['Close'].pct_change()
131 |     daily_vol = df_1y['daily_return'].std()
132 |     annualized_vol = daily_vol * np.sqrt(period)
133 | 
134 |     return 0.0 if annualized_vol == 0 else roc_12m / annualized_vol
135 | 
136 | 
137 | # --- Main scanning function ---
138 | 
139 | def main():
140 |      
141 |     logging.info("Scan started.")
142 |     
143 |     # 1. Load & trim benchmark data
144 |     benchmark_data = pr.get_price_data(benchmark, data_interval)
145 |     
146 |     # 2. Check market trend
147 |     is_bullish = market_trend_filter(benchmark_data)
148 |     if is_bullish:
149 |         logging.info("Market is bullish → full entry filters apply.")
150 |     else:
151 |         logging.info("Market is NOT bullish → only ranking/exits, no new entries.")
152 |         print("Market is NOT bullish → only ranking/exits, no new entries.")
153 |     
154 |     # 3. Prepare lists
155 |     candidates = stocks["Ticker"].tolist()
156 |     records = []
157 |     errors = []
158 |     
159 |     # 4. Per‐stock processing
160 |     for symbol in candidates:
161 |         try:
162 |             print(f"Processing {symbol}...")
163 |             df = pr.get_price_data(symbol, data_interval)
164 |             if df.empty:
165 |                 continue  # no data in date range
166 |             
167 |             # Entry filters if bullish
168 |             high_52w = get_52w_high(df)
169 |             ema_200  = get_200d_ema(df)
170 |             
171 |             
172 |             price = df['Close'].iloc[-1]
173 |             if pd.isna(high_52w) or price < 0.5 * high_52w:
174 |                 logging.info("Skipping %s: 52W high retracement not met.", symbol)
175 |                 continue
176 |             if pd.isna(ema_200) or price <= ema_200:
177 |                 logging.info("Skipping %s: 200D EMA not met.", symbol)
178 |                 continue
179 | 
180 |             if not passes_ratio_200d_ema(df, benchmark_data):
181 |                 logging.info("Skipping %s: ratio chart condition not met.", symbol)
182 |                 continue
183 |             
184 |             # Compute ranking metric
185 |             sharpe = compute_sharpe_ratio(df)
186 |             
187 |             # Record all required fields
188 |             records.append({
189 |                 "Ticker":       symbol,
190 |                 "Price":        df['Close'].iloc[-1],
191 |                 "52W_High":     high_52w,
192 |                 "200D_EMA":     ema_200,
193 |                 "Sharpe_Ratio": sharpe
194 |             })
195 |         
196 |         except Exception as e:
197 |             logging.error(f"Error processing {symbol}: {e}")
198 |             errors.append({"Ticker": symbol, "Error": str(e)})
199 |     
200 |     # 5. Build final report DataFrame
201 |     report_df = pd.DataFrame(records)
202 |     report_df = report_df.dropna(subset=["Sharpe_Ratio"])
203 |     report_df = report_df.sort_values("Sharpe_Ratio", ascending=False)
204 |     report_df.insert(1, "Rank#", range(1, len(report_df) + 1))
205 |     
206 |     # 6. Export results
207 |     report_df.to_csv("mip12_scan_report.csv", index=False)
208 |     logging.info("Report saved to mip12_scan_report.csv.")
209 |     
210 |     # 7. Optionally export errors
211 |     if errors:
212 |         err_df = pd.DataFrame(errors)
213 |         err_df.to_csv("mip12_scan_errors.csv", index=False)
214 |         logging.info("Errors saved to mip12_scan_errors.csv.")
215 |     
216 |     return report_df
217 | 
218 | # If this script is run directly, invoke main():
219 | if __name__ == "__main__":
220 |     main()
221 | 


--------------------------------------------------------------------------------
/py/yf/limevolume.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Volume is where the whole story begins. So it is important to determine volume expansions.
  3 | On charts, one can look for volume expansions, when they breach daily/weekly averages by huge margins. (LimeVolume day)
  4 | This indicate institutional demand.
  5 | Expansion of volume and presense of demand at different life cycle stages of a stock can mean different things.
  6 | For example, a limevolume day observed in Stage 1 for the first time, may be the first signal of demand, but not good 
  7 | to initiate a long trade just yet, because instituion will absorb the supply gradually.
  8 | If the base is instead formed well, and we start to see limevolume with higher lows on price chart, it might indicate
  9 | begining of stage 2.
 10 | If a scrip is already in an established up trend, (Stage 2), then limevolume days on a sideways (resting) trend, indicates
 11 | renewed demand either by the same institution or a new player interested in the company. Maybe suitable for top-up.
 12 | '''
 13 | 
 14 | import yfinance as yf
 15 | import pandas as pd
 16 | import numpy as np
 17 | import math
 18 | import csv
 19 | import datetime
 20 | 
 21 | # Read the list of stocks from the CSV file
 22 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"])
 23 | # Exchange ".BO" for BSE, ".NS" for Nifty
 24 | exchg = ".NS"
 25 | 
 26 | # Set start Date
 27 | start_date = '2022-07-25' # Should be a date that is start of the week date, so that daily and weekly data can match
 28 | 
 29 | # Set end Date
 30 | end_date = '2023-07-29'
 31 | # Folder location
 32 | output = 'output'
 33 | 
 34 | # Interval
 35 | data_interval_wkeely = '1wk'
 36 | data_interval_daily = '1d'
 37 | 
 38 | # Weekly volume average length
 39 | weekly_volume_length = 10
 40 | # Daily volume average length
 41 | daily_volume_length = 100
 42 | 
 43 | # Number of days to check for limevolume
 44 | lookback_length = 55 #3-months daily
 45 | 
 46 | # Read up sector/industry information from text data
 47 | stock_industry_map = pd.read_csv("stock_sector_industry_map.csv", header=0, usecols=["NSE Code","Industry","Market Cap", "Sector"])
 48 | 
 49 | # Crore
 50 | One_Cr = 10000000
 51 | 
 52 | def fetch_industry_mcap(nse_code):
 53 | 
 54 |     industry = ''
 55 |     mcap = ''
 56 |     sector = ''
 57 | 
 58 |     try:
 59 |         # We try to get from local file first
 60 |         sector = stock_industry_map[stock_industry_map['NSE Code'] == nse_code]['Sector'].iloc[0]
 61 |         industry = stock_industry_map[stock_industry_map['NSE Code'] == nse_code]['Industry'].iloc[0]
 62 |         mcap =  stock_industry_map[stock_industry_map['NSE Code'] == nse_code]['Market Cap'].iloc[0]
 63 |     except Exception as err:
 64 |         pass
 65 | 
 66 |     if industry == '' or mcap == '':
 67 |         try:
 68 |                 # Try yf
 69 |                 ticker = yf.Ticker(nse_code+".NS")
 70 |                 if ticker.info:
 71 |                     if industry == '':
 72 |                         industry = ticker.info['industry']
 73 |                     if mcap == '':
 74 |                         mcap = round(ticker.info['marketCap'] / One_Cr, 0)
 75 |                     if sector == '':
 76 |                         sector = ticker.info['sector']
 77 |         except Exception as err:
 78 |             pass
 79 | 
 80 |     return [sector, industry, mcap]
 81 | 
 82 | def main():
 83 |     print("Started... " + start_date + " - " + end_date)
 84 | 
 85 |     # Create the DataFrame
 86 |     df = pd.DataFrame(columns=['stock', 'mcap', 'blueVolCount', 'limeVolToday', 'limeVolCount', 'latestLimeVolDate',  'earliestLimeVolDate', 'tealVolCount', 'latestTealVolDate', \
 87 |                                'earliestTealVolDate', 'priceChng', 'sector' , 'industry'])
 88 |     # Iterate through the list of stocks
 89 |     for stock in stocks["Ticker"]:
 90 |         try:
 91 |             print(f'Analyzing {stock}...')
 92 |             # Get the stock data
 93 |             stk_ticker = yf.Ticker(stock+exchg)
 94 |             # Get the stock data from yfinance, dont adjust OHLC
 95 |             stock_data_daily = stk_ticker.history(start=start_date, end=end_date,interval=data_interval_daily,auto_adjust=False, prepost=False)
 96 |             # Drop those with NaN
 97 |             stock_data_daily = stock_data_daily.dropna()
 98 | 
 99 |             stock_data_weekly = stk_ticker.history(start=start_date, end=end_date,interval=data_interval_wkeely,auto_adjust=False, prepost=False)
100 |             # Drop those with NaN
101 |             stock_data_weekly = stock_data_weekly.dropna()
102 | 
103 |             #10wk avg volume
104 |             weekly_vol_avg_col = f'Weekly_Volume_Avg{weekly_volume_length}'
105 |             stock_data_weekly[weekly_vol_avg_col] = stock_data_weekly['Volume'].rolling(window=weekly_volume_length, min_periods=1).mean().fillna(0)
106 | 
107 |             #100d avg volule
108 |             daily_vol_avg_col = f'Daily_Volume_Avg{daily_volume_length}'
109 |             stock_data_daily[daily_vol_avg_col] = stock_data_daily['Volume'].rolling(window=daily_volume_length, min_periods=1).mean().fillna(0)
110 | 
111 |             # Create a new column in the daily data to store the corresponding weekly volume
112 |             stock_data_daily[weekly_vol_avg_col] = 0
113 | 
114 |             # Loop through each row in the daily data
115 |             mismatch_ctr = 0
116 |             never_matched = True
117 |             for i, row in stock_data_daily.iterrows():
118 |                 # Extract the date from the current row
119 |                 date = row.name.date()
120 |                 
121 |                 # Look up the corresponding row in the weekly data
122 |                 weekly_row = stock_data_weekly.loc[stock_data_weekly.index.date == date]
123 |                 
124 |                 # If there is no corresponding weekly data for the current date, propagate the last known weekly volume forward
125 |                 if len(weekly_row) == 0:
126 |                     if never_matched and mismatch_ctr < 7:
127 |                         mismatch_ctr = mismatch_ctr + 1
128 |                         continue # Try to match up data for next week
129 |                     stock_data_daily.at[i, weekly_vol_avg_col] = stock_data_daily[weekly_vol_avg_col].shift(1)[i]         
130 |                 # If there is corresponding weekly data for the current date, fetch the volume and set it in the daily data
131 |                 else:
132 |                     never_matched = False
133 |                     weekly_avg_volume = weekly_row[weekly_vol_avg_col].iloc[0]
134 |                     stock_data_daily.at[i, weekly_vol_avg_col] = weekly_avg_volume
135 |           
136 |             isTodayLimeVolume = False
137 |             cntLimeCount = 0
138 |             cntTealCount = 0
139 |             pctChange = 0
140 |             earliestLimeVolDate = ''
141 |             latestLimeVolDate = ''
142 |             earliestTealVolDate = ''
143 |             latestTealVolDate = ''
144 |             # reverse
145 |             stock_data_daily = stock_data_daily.iloc[::-1]
146 | 
147 |             if len(stock_data_daily) > lookback_length:
148 |                 for i in range(0, lookback_length):
149 |                     if stock_data_daily['Close'][i] > stock_data_daily['Close'][i+1]: # Up Day
150 |                         weekly_avg_to_compare = stock_data_daily[weekly_vol_avg_col][i]
151 |                         for j in range(i+1, i+7): # Find the previous week volume average, by checking previous unmatched value
152 |                             _weekly_avg = stock_data_daily[weekly_vol_avg_col][j]
153 |                             if _weekly_avg != weekly_avg_to_compare:
154 |                                 weekly_avg_to_compare = _weekly_avg
155 |                                 break
156 |                         if stock_data_daily['Volume'][i] > weekly_avg_to_compare: # Now compare if this day's volume is greater than weekly average volume
157 |                             cntLimeCount = cntLimeCount + 1
158 |                             earliestLimeVolDate = stock_data_daily.index[i].strftime("%d-%b-%Y")
159 |                             if cntLimeCount == 1:
160 |                                 latestLimeVolDate = stock_data_daily.index[i].strftime("%d-%b-%Y")
161 |                                 pctChange = round(((stock_data_daily['Close'][i] / stock_data_daily['Close'][i+1]) - 1 ) * 100, 2)
162 |                             if i == 0:
163 |                                 isTodayLimeVolume = True
164 |                         # Teal Volume
165 |                         if stock_data_daily['Volume'][i] > stock_data_daily[daily_vol_avg_col][i]: # Now compare if this day's volume is greater than daily average volume
166 |                             cntTealCount = cntTealCount + 1
167 |                             earliestTealVolDate = stock_data_daily.index[i].strftime("%d-%b-%Y")
168 |                             if cntTealCount == 1:
169 |                                 latestTealVolDate = stock_data_daily.index[i].strftime("%d-%b-%Y")
170 |             
171 |             # Fetch industy and mcap
172 |             [sector, industry, marketCap] = fetch_industry_mcap(stock)
173 | 
174 |             blueVolCnt = cntLimeCount + cntTealCount
175 |             row = {'stock': stock, 'blueVolCount': str(blueVolCnt), 'limeVolToday' : str(isTodayLimeVolume), 'limeVolCount': str(cntLimeCount), \
176 |                    'latestLimeVolDate' : str(latestLimeVolDate), 'earliestLimeVolDate' : str(earliestLimeVolDate), \
177 |                     'tealVolCount': str(cntTealCount), 'latestTealVolDate' : str(latestTealVolDate), 'earliestTealVolDate' : str(earliestTealVolDate), \
178 |                     'mcap' : marketCap, 'priceChng': str(pctChange),  'sector' : sector, 'industry' : industry}
179 |             # Append the new row to the DataFrame
180 |             df.loc[len(df)] = row
181 | 
182 |         except Exception as e:
183 |             print(f'Error: {stock} => {e}')
184 |     # Append current timestamp to the file name
185 |     now = datetime.datetime.now()
186 |     timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
187 |     file_name = f'{output}/limevolume_{timestamp}.csv'
188 |     # Export the DataFrame to CSV
189 |     df.to_csv(file_name, index=False)
190 |     print('Done')
191 | 
192 | if __name__ == "__main__":
193 |     main()
194 | 


--------------------------------------------------------------------------------
/py/ai/fininsightgpt/src/main.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | FinInsightGPT - AI-Powered Investment Analysis Application
  4 | 
  5 | This application processes company data files, converts them to markdown,
  6 | creates consolidated master files, and generates equity research reports.
  7 | """
  8 | 
  9 | import os
 10 | import sys
 11 | import argparse
 12 | import logging
 13 | from pathlib import Path
 14 | from typing import List, Optional
 15 | 
 16 | # Load environment variables from .env file
 17 | try:
 18 |     from dotenv import load_dotenv
 19 |     load_dotenv()  # Load variables from .env file
 20 |     ENV_LOADED = True
 21 | except ImportError:
 22 |     ENV_LOADED = False
 23 |     logging.warning("dotenv not found, environment variables must be set manually")
 24 | 
 25 | # Configure logging
 26 | logging.basicConfig(
 27 |     level=logging.INFO,
 28 |     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 29 | )
 30 | logger = logging.getLogger(__name__)
 31 | 
 32 | # Import local modules
 33 | from document_processor import process_company_folder
 34 | from master_file_generator import generate_master_file
 35 | from report_generator import generate_report
 36 | 
 37 | 
 38 | def setup_argparse() -> argparse.ArgumentParser:
 39 |     """Set up command-line arguments."""
 40 |     parser = argparse.ArgumentParser(
 41 |         description="FinInsightGPT - AI-Powered Investment Analysis Application"
 42 |     )
 43 |     
 44 |     subparsers = parser.add_subparsers(dest='command', help='Command to run')
 45 |     
 46 |     # Process command
 47 |     process_parser = subparsers.add_parser('process', help='Process files in a company folder')
 48 |     process_parser.add_argument('company_folder', help='Path to the company folder')
 49 |     
 50 |     # Master file command
 51 |     master_parser = subparsers.add_parser('master', help='Generate master file from processed files')
 52 |     master_parser.add_argument('company_folder', help='Path to the company folder')
 53 |     master_parser.add_argument('--output-dir', help='Directory to save the master file (defaults to company folder)')
 54 |     
 55 |     # Report command
 56 |     report_parser = subparsers.add_parser('report', help='Generate report from master file')
 57 |     report_parser.add_argument('master_file', help='Path to the master markdown file')
 58 |     report_parser.add_argument('--template', help='Path to the report template (default: prompt_master/Equity_Research_Report_Template.md)')
 59 |     report_parser.add_argument('--output-dir', help='Directory to save the report (defaults to master file directory)')
 60 |     report_parser.add_argument('--model', help='LLM model to use (default: gpt-4-turbo)')
 61 |     
 62 |     # All-in-one command
 63 |     all_parser = subparsers.add_parser('all', help='Process everything end-to-end')
 64 |     all_parser.add_argument('company_folder', help='Path to the company folder')
 65 |     all_parser.add_argument('--template', help='Path to the report template (default: prompt_master/Equity_Research_Report_Template.md)')
 66 |     all_parser.add_argument('--model', default='gpt-4-turbo', help='LLM model to use (default: gpt-4-turbo)')
 67 |     
 68 |     # List companies command
 69 |     subparsers.add_parser('list', help='List all available company folders')
 70 |     
 71 |     return parser
 72 | 
 73 | 
 74 | def list_companies(base_path: str = "../company_data") -> List[str]:
 75 |     """List all company folders in the company_data directory."""
 76 |     base_path = Path(base_path)
 77 |     
 78 |     if not base_path.exists() or not base_path.is_dir():
 79 |         logger.error(f"Company data directory not found: {base_path}")
 80 |         return []
 81 |     
 82 |     companies = []
 83 |     
 84 |     for item in base_path.iterdir():
 85 |         if item.is_dir() and not item.name.startswith('.'):
 86 |             companies.append(item.name)
 87 |     
 88 |     return companies
 89 | 
 90 | 
 91 | def run_process_command(args: argparse.Namespace) -> None:
 92 |     """Process files in a company folder."""
 93 |     company_folder = args.company_folder
 94 |     
 95 |     # Ensure path is absolute
 96 |     if not os.path.isabs(company_folder):
 97 |         script_dir = Path(__file__).parent.absolute()
 98 |         company_data_dir = script_dir.parent / "company_data"
 99 |         company_folder = os.path.join(company_data_dir, company_folder)
100 |     
101 |     logger.info(f"Processing files in: {company_folder}")
102 |     processed_files = process_company_folder(company_folder)
103 |     
104 |     if not processed_files:
105 |         logger.warning("No files were processed.")
106 |     else:
107 |         logger.info(f"Successfully processed {len(processed_files)} files.")
108 | 
109 | 
110 | def run_master_command(args: argparse.Namespace) -> Optional[str]:
111 |     """Generate master file from processed files."""
112 |     company_folder = args.company_folder
113 |     output_dir = args.output_dir
114 |     
115 |     # Ensure path is absolute
116 |     if not os.path.isabs(company_folder):
117 |         script_dir = Path(__file__).parent.absolute()
118 |         company_data_dir = script_dir.parent / "company_data"
119 |         company_folder = os.path.join(company_data_dir, company_folder)
120 |     
121 |     # Get company name from folder path
122 |     company_name = Path(company_folder).name
123 |     
124 |     # Find processed markdown files
125 |     processed_folder = Path(company_folder) / "processed"
126 |     
127 |     if not processed_folder.exists() or not processed_folder.is_dir():
128 |         logger.error(f"Processed folder not found: {processed_folder}")
129 |         return None
130 |     
131 |     markdown_files = []
132 |     for file in processed_folder.glob("*.md"):
133 |         if file.is_file():
134 |             markdown_files.append(str(file))
135 |     
136 |     if not markdown_files:
137 |         logger.error("No processed markdown files found.")
138 |         return None
139 |     
140 |     logger.info(f"Found {len(markdown_files)} processed files.")
141 |     
142 |     # Generate master file
143 |     master_file_path = generate_master_file(
144 |         company_name=company_name,
145 |         markdown_files=markdown_files,
146 |         output_dir=output_dir
147 |     )
148 |     
149 |     if master_file_path:
150 |         logger.info(f"Successfully generated master file: {master_file_path}")
151 |     else:
152 |         logger.error("Failed to generate master file.")
153 |     
154 |     return master_file_path
155 | 
156 | 
157 | def run_report_command(args: argparse.Namespace) -> Optional[str]:
158 |     """Generate report from master file."""
159 |     master_file = args.master_file
160 |     template_path = args.template
161 |     output_dir = args.output_dir
162 |     model = args.model
163 |     
164 |     # If model not specified in args, use the environment variable
165 |     if model is None:
166 |         model = os.environ.get("OPENAI_TEXT_MODEL", "gpt-4-turbo")
167 |     
168 |     # Ensure master file path is absolute
169 |     if not os.path.isabs(master_file):
170 |         script_dir = Path(__file__).parent.absolute()
171 |         company_data_dir = script_dir.parent / "company_data"
172 |         master_file = os.path.join(company_data_dir, master_file)
173 |     
174 |     # Ensure template path is set
175 |     if template_path is None:
176 |         script_dir = Path(__file__).parent.absolute()
177 |         template_path = script_dir.parent / "prompt_master" / "Equity_Research_Report_Template.md"
178 |     elif not os.path.isabs(template_path):
179 |         script_dir = Path(__file__).parent.absolute()
180 |         template_path = script_dir.parent / template_path
181 |     
182 |     # Check if files exist
183 |     if not os.path.exists(master_file):
184 |         logger.error(f"Master file not found: {master_file}")
185 |         return None
186 |     
187 |     if not os.path.exists(template_path):
188 |         logger.error(f"Template file not found: {template_path}")
189 |         return None
190 |     
191 |     # Generate report
192 |     report_file_path = generate_report(
193 |         master_file_path=master_file,
194 |         template_path=str(template_path),
195 |         output_dir=output_dir,
196 |         model=model
197 |     )
198 |     
199 |     if report_file_path:
200 |         logger.info(f"Successfully generated report: {report_file_path}")
201 |     else:
202 |         logger.error("Failed to generate report.")
203 |     
204 |     return report_file_path
205 | 
206 | 
207 | def run_all_command(args: argparse.Namespace) -> None:
208 |     """Process everything end-to-end: process files, generate master file, and generate report."""
209 |     company_folder = args.company_folder
210 |     template_path = args.template
211 |     model = args.model
212 |     
213 |     # Process files
214 |     process_args = argparse.Namespace(company_folder=company_folder)
215 |     run_process_command(process_args)
216 |     
217 |     # Generate master file
218 |     master_args = argparse.Namespace(company_folder=company_folder, output_dir=None)
219 |     master_file_path = run_master_command(master_args)
220 |     
221 |     if not master_file_path:
222 |         logger.error("Cannot continue without a master file.")
223 |         return
224 |     
225 |     # Generate report
226 |     report_args = argparse.Namespace(
227 |         master_file=master_file_path,
228 |         template=template_path,
229 |         output_dir=None,
230 |         model=model
231 |     )
232 |     report_file_path = run_report_command(report_args)
233 |     
234 |     if report_file_path:
235 |         logger.info(f"End-to-end processing completed successfully.")
236 |     else:
237 |         logger.error("End-to-end processing failed during report generation.")
238 | 
239 | 
240 | def check_environment():
241 |     """Check if required environment variables are set."""
242 |     if not os.environ.get("OPENAI_API_KEY"):
243 |         logger.warning("OPENAI_API_KEY environment variable is not set. Set it in your .env file or export it in your shell.")
244 |         return False
245 |     return True
246 | 
247 | 
248 | def main() -> None:
249 |     """Main entry point of the application."""
250 |     parser = setup_argparse()
251 |     args = parser.parse_args()
252 |     
253 |     if args.command is None:
254 |         parser.print_help()
255 |         sys.exit(1)
256 |     
257 |     elif args.command == 'process':
258 |         run_process_command(args)
259 |     
260 |     elif args.command == 'master':
261 |         run_master_command(args)
262 |     
263 |     elif args.command == 'report':
264 |         run_report_command(args)
265 |     
266 |     elif args.command == 'all':
267 |         run_all_command(args)
268 |     
269 |     elif args.command == 'list':
270 |         script_dir = Path(__file__).parent.absolute()
271 |         company_data_dir = script_dir.parent / "company_data"
272 |         companies = list_companies(str(company_data_dir))
273 |         
274 |         if companies:
275 |             print("Available company folders:")
276 |             for company in companies:
277 |                 print(f"- {company}")
278 |         else:
279 |             print("No company folders found.")
280 |     
281 |     else:
282 |         parser.print_help()
283 |         sys.exit(1)
284 | 
285 | 
286 | if __name__ == "__main__":
287 |     main()


--------------------------------------------------------------------------------
/py/yf/stock_sector_strength.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | A comparitive analysis of the stock market based on sectors (or any grouping) from a significant date/event of past as reflected on the benchmark.
  3 | The idea then is to calculate the gains not only of the individual stocks but the entire group, with respect to that event.
  4 | Interesting analysis can be done, if the entire group is considered, where we can see that the leader stocks move much in advance of their peers and
  5 | start outperformance with respect to benchmark and the sectors. We can also see how the sector as a group is performing with respect to the benchmark.
  6 | '''
  7 | import pandas as pd
  8 | import os
  9 | from datetime import datetime, timedelta
 10 | import csv
 11 | import yfinance as yf
 12 | 
 13 | 
 14 | # Read up sector/industry information from text data
 15 | stock_industry_map = pd.read_csv("stock_sector_industry_map.csv", header=0, usecols=["NSE Code","Industry","Market Cap", "Sector"])
 16 | 
 17 | # Reference Date for comaprison, preferred <= 200
 18 | reference_date = '2022-12-01'
 19 | 
 20 | # Run date, must be greater than reference date
 21 | run_date = '2023-08-05'
 22 | 
 23 | # Minimum number of trading days to consider for index
 24 | min_trading_days = 200
 25 | 
 26 | # Maximum number of stocks to include in a sector group
 27 | max_stocks_per_sector = 10
 28 | 
 29 | # Limit on marketcap
 30 | min_cap = 500 # Crores
 31 | 
 32 | # Calculate gain percentages for different time periods
 33 | periods = [5, 21, 55, 123]
 34 | 
 35 | # Specify the benchmark symbol
 36 | benchmark = "^NSEI"
 37 | 
 38 | # Folder location
 39 | output = 'output'
 40 | 
 41 | def has_min_days_data(nse_code):
 42 |     # Calculate the start date as one year before the run_date
 43 |     start_date = (datetime.strptime(run_date, '%Y-%m-%d') - timedelta(days=365)).strftime('%Y-%m-%d')
 44 | 
 45 |     # Get the daily data for the specified period
 46 |     ticker = yf.Ticker(nse_code+'.NS')    
 47 |     stock_data = ticker.history(start=start_date, end=run_date, interval='1d',auto_adjust=False, prepost=False)
 48 |     
 49 |     # Check if the stock has at least min_trading_days days of trading data
 50 |     if len(stock_data) >= min_trading_days:
 51 |         return True
 52 |     else:
 53 |         return False
 54 | 
 55 | def prepare_custom_indexes(df):
 56 |     # Group the stocks by their sectors into a dictionary
 57 |     custom_indices = {}
 58 | 
 59 |     # Iterate through each row in the DataFrame
 60 |     for index, row in df.iterrows():
 61 |         sector = row['Sector']
 62 |         stock_info = {
 63 |             'NSE Code': row['NSE Code'],
 64 |             'Industry': row['Industry'],
 65 |             'Market Cap': row['Market Cap']
 66 |         }
 67 |         nse_code = row['NSE Code']
 68 |         
 69 |         # Check if the stock has at least 200 days of trading data
 70 |         if has_min_days_data(nse_code):
 71 |             # Check if the sector already exists in the dictionary
 72 |             if sector in custom_indices:
 73 |                 custom_indices[sector].append(stock_info)
 74 |             else:
 75 |                 custom_indices[sector] = [stock_info]
 76 | 
 77 |     # Sort the stocks within each sector by decreasing market cap
 78 |     for sector in custom_indices:
 79 |         stocks_in_sector = custom_indices[sector]
 80 |         stocks_sorted_by_market_cap = sorted(stocks_in_sector, key=lambda x: x['Market Cap'], reverse=True)
 81 |         custom_indices[sector] = stocks_sorted_by_market_cap[:max_stocks_per_sector]
 82 | 
 83 |     # print(custom_indices)
 84 |     return custom_indices
 85 | 
 86 | def generate_watchlist_with_headers(custom_indices):
 87 |     watchlist_string_withheaders = ""
 88 |     watchlist_string = ""
 89 | 
 90 |     sector_index_mapper = {}
 91 | 
 92 |     for sector, stocks in custom_indices.items():
 93 |         # Calculate the number of stocks in the sector
 94 |         num_stocks = len(stocks)
 95 |         str = ''
 96 |         str_header = f'###{sector},'
 97 |         for stock in stocks:
 98 |             nse_code = 'NSE:' + stock['NSE Code']
 99 |             str += nse_code.replace('-','_').replace('&','_') + "+"
100 |         
101 |         str = str.rsplit('+', 1)[0].strip()
102 |         str = f'( {str} )/{num_stocks}' + ','
103 |         watchlist_string += str
104 |         sector_index_mapper[sector.upper()] = str
105 |         watchlist_string_withheaders = watchlist_string_withheaders + str_header.upper() + str
106 | 
107 |     # Write the watchlist to the txt file
108 |     with open('custom_indices_without_headers.txt', 'w') as file:
109 |         file.write(watchlist_string)
110 | 
111 |     # Write the watchlist to the txt file
112 |     with open('custom_indices_with_headers.txt', 'w') as file:
113 |         file.write(watchlist_string_withheaders)
114 |     
115 |     return sector_index_mapper
116 | 
117 | def calculate_gain_percentages(data_df, reference_date, run_date):
118 |     # Filter the data from the reference date to the run date
119 |     filtered_data = data_df.loc[reference_date:run_date]
120 | 
121 |     # Calculate the gain percentage for the original period
122 |     start_price = filtered_data.iloc[0]['Close']
123 |     end_price = filtered_data.iloc[-1]['Close']
124 |     gain_percentage = ((end_price - start_price) / start_price) * 100
125 | 
126 |     gain_percentages = [gain_percentage]
127 | 
128 |     for period in periods:
129 |         if len(filtered_data) < period:
130 |             gain_percentages.append(None)  # Append None if there's insufficient data for the period
131 |         else:
132 |             start_price_period = filtered_data.iloc[-period]['Close']
133 |             gain_percentage_period = ((end_price - start_price_period) / start_price_period) * 100
134 |             gain_percentages.append(round(gain_percentage_period, 2))
135 | 
136 |     return gain_percentages
137 | 
138 | def calculate_sector_gains(custom_indices, reference_date, run_date):
139 |     sector_gains = {}
140 | 
141 |     for sector, stocks in custom_indices.items():
142 |         total_close_start = 0.0
143 |         total_close_end = 0.0
144 | 
145 |         for stock in stocks:
146 |             nse_code = stock['NSE Code']
147 |             ticker = yf.Ticker(nse_code+'.NS')
148 |             stock_data = ticker.history(start=reference_date, end=run_date, interval='1d',auto_adjust=False, prepost=False)
149 |             if not stock_data.empty:
150 |                 # Get the closing price on the reference_date and run_date
151 |                 close_start = stock_data.iloc[0]['Close']
152 |                 close_end = stock_data.iloc[-1]['Close']
153 |                 total_close_start += close_start
154 |                 total_close_end += close_end
155 | 
156 |         # Calculate the gain percentage for the sector from reference_date to run_date
157 |         sector_gain = round(((total_close_end - total_close_start) / total_close_start) * 100, 2)
158 |         sector_gains[sector] = sector_gain
159 | 
160 |     return sector_gains
161 | 
162 | def main():
163 |     print("Started...")
164 |     # Prepare working dataset We only take NSE Codes and Market Cap > min_cap Crores
165 |     df = stock_industry_map[(stock_industry_map['NSE Code'].notna()) & (stock_industry_map['Market Cap'] >= min_cap)]
166 |     print(f'{len(df)} NSE stocks with mcap > {min_cap} Cr')
167 |     # print(df.tail(10))
168 |     # Prepare custom index
169 |     ### df = df.tail(30) ### FOR TESTS ONLY####################
170 |     print("Preparing custom indices...")
171 |     custom_indices = prepare_custom_indexes(df)
172 |     sector_index_mapper = generate_watchlist_with_headers(custom_indices)
173 | 
174 |     print("Calculating benchmark gain...")
175 |     # Calculate gains of benchmark from reference date to run date
176 |     benchmark_ticker = yf.Ticker(benchmark)
177 |     benchmark_data = benchmark_ticker.history(start=reference_date, end=run_date, interval='1d',auto_adjust=False, prepost=False)
178 |     benchmark_gain = calculate_gain_percentages(benchmark_data, reference_date, run_date)[0]
179 | 
180 |     print("Calculating sector gains...")
181 |     sector_gains = calculate_sector_gains(custom_indices, reference_date, run_date)
182 | 
183 |     # Convert the date strings to datetime objects
184 |     date1 = datetime.strptime(run_date, '%Y-%m-%d')
185 |     date2 = datetime.strptime(reference_date, '%Y-%m-%d')
186 | 
187 |     # Calculate the difference in days between the two dates
188 |     days_difference = (date1 - date2).days
189 | 
190 |     # Now we run for all stocks and create a big list and report
191 |     result_df = pd.DataFrame(columns=['symbol', 'start','end','days', 'mcap', 'sector', 'industry', 'gain_stock_sector',  'gain_stock_benchmrk', 'gain_sector_benchmrk', \
192 |                                       'gain_stock_refdate', 'gain_sector_refdate', 'gain_benchmrk_refdate', 'gain_stock_5d', 'gain_stock_21d', 'gain_stock_55d', 'gain_stock_123d',\
193 |                                         'sector_index'])
194 |     
195 |     print("Calculating stock performances...")
196 |     # Iterate through each row in the DataFrame
197 |     for index, row in df.iterrows():
198 |         nse_code = row['NSE Code']
199 |         ticker = yf.Ticker(nse_code+'.NS')
200 |         try:
201 |             stock_data = ticker.history(start=reference_date, end=run_date, interval='1d',auto_adjust=False, prepost=False)
202 |             if (len(stock_data) <= 2):
203 |                 print(f'Skipping... {nse_code}')
204 |                 continue
205 |             stock_gains = calculate_gain_percentages(stock_data,reference_date, run_date)
206 |             stock_gain_from_refdate = stock_gains[0]
207 |             sector = row['Sector']
208 |             industry = row['Industry']
209 |             mcap = row['Market Cap']
210 |             gain_stock_sector = stock_gain_from_refdate - sector_gains[sector]
211 |             gain_stock_benchmrk = stock_gain_from_refdate - benchmark_gain
212 |             gain_sector_benchmrk = sector_gains[sector] - benchmark_gain
213 |             gain_sector_refdate = sector_gains[sector]
214 |             sector_index = sector_index_mapper[sector.upper()]
215 |             
216 |             row = {'symbol': nse_code, 'start': reference_date, 'end' : run_date, 'days' : days_difference, 'mcap': str(mcap), 'sector' : sector.upper(), 'industry' : industry.upper(), \
217 |                 'gain_stock_sector' : str(gain_stock_sector), 'gain_stock_benchmrk' : str(gain_stock_benchmrk), 'gain_sector_benchmrk' : str(gain_sector_benchmrk), \
218 |                     'gain_stock_refdate' : str(stock_gain_from_refdate),  'gain_sector_refdate' : str(gain_sector_refdate), 'gain_benchmrk_refdate' : str(benchmark_gain), \
219 |                         'gain_stock_5d' : str(stock_gains[1]), 'gain_stock_55d' : str(stock_gains[2]), 'gain_stock_21d' : str(stock_gains[3]), 'gain_stock_123d' : str(stock_gains[4]),\
220 |                             'sector_index' : sector_index}
221 |             
222 |             # Append the new row to the DataFrame
223 |             result_df.loc[len(result_df)] = row        
224 |         except Exception as e:
225 |             print(f'Error: {nse_code} => {e}')
226 | 
227 |     # Append current timestamp to the file name
228 |     now = datetime.now()
229 |     timestamp = now.strftime("%Y-%m-%d %H-%M-%S")
230 |     file_name = f'{output}/stock_sector_benchmark_{reference_date}_{run_date}_{timestamp}.csv'
231 |     # Export the DataFrame to CSV
232 |     result_df.to_csv(file_name, index=False)
233 |     # print(sector_index_mapper)
234 |     print("Done")
235 | 
236 | if __name__ == "__main__":
237 |     main()
238 | 


--------------------------------------------------------------------------------
/py/eodhd/stocks.csv:
--------------------------------------------------------------------------------
  1 | Ticker
  2 | RELIANCE
  3 | HDFCBANK
  4 | TCS
  5 | ICICIBANK
  6 | HINDUNILVR
  7 | ITC
  8 | INFY
  9 | SBIN
 10 | BHARTIARTL
 11 | HDFC
 12 | BAJFINANCE
 13 | LICI
 14 | KOTAKBANK
 15 | LT
 16 | ASIANPAINT
 17 | HCLTECH
 18 | AXISBANK
 19 | MARUTI
 20 | ADANIENT
 21 | TITAN
 22 | SUNPHARMA
 23 | BAJAJFINSV
 24 | DMART
 25 | ULTRACEMCO
 26 | TATAMOTORS
 27 | WIPRO
 28 | NESTLEIND
 29 | ONGC
 30 | JSWSTEEL
 31 | M&M
 32 | NTPC
 33 | POWERGRID
 34 | ADANIGREEN
 35 | ADANIPORTS
 36 | LTIM
 37 | TATASTEEL
 38 | COALINDIA
 39 | IOC
 40 | HDFCLIFE
 41 | BAJAJ-AUTO
 42 | PIDILITIND
 43 | HINDZINC
 44 | SBILIFE
 45 | HAL
 46 | SIEMENS
 47 | DLF
 48 | BRITANNIA
 49 | GRASIM
 50 | TECHM
 51 | INDUSINDBK
 52 | GODREJCP
 53 | VBL
 54 | VEDL
 55 | INDIGO
 56 | BANKBARODA
 57 | DABUR
 58 | DIVISLAB
 59 | HINDALCO
 60 | CHOLAFIN
 61 | ADANIPOWER
 62 | BEL
 63 | EICHERMOT
 64 | ABB
 65 | DRREDDY
 66 | ADANITRANS
 67 | BPCL
 68 | CIPLA
 69 | SHREECEM
 70 | AMBUJACEM
 71 | BAJAJHLDNG
 72 | HAVELLS
 73 | SBICARD
 74 | ICICIPRULI
 75 | TATACONSUM
 76 | MANKIND
 77 | MCDOWELL-N
 78 | APOLLOHOSP
 79 | GAIL
 80 | ATGL
 81 | MARICO
 82 | TATAPOWER
 83 | ICICIGI
 84 | PNB
 85 | ZOMATO
 86 | POLYCAB
 87 | SHRIRAMFIN
 88 | LODHA
 89 | BERGEPAINT
 90 | MOTHERSON
 91 | TORNTPHARM
 92 | SRF
 93 | JINDALSTEL
 94 | TVSMOTOR
 95 | CGPOWER
 96 | TIINDIA
 97 | ZYDUSLIFE
 98 | HEROMOTOCO
 99 | IDBI
100 | UNIONBANK
101 | CANBK
102 | TRENT
103 | NAUKRI
104 | PFC
105 | MAXHEALTH
106 | INDHOTEL
107 | BOSCHLTD
108 | PIIND
109 | IDFCFIRSTB
110 | PAYTM
111 | ASHOKLEY
112 | HDFCAMC
113 | CUMMINSIND
114 | AWL
115 | YESBANK
116 | MUTHOOTFIN
117 | ASTRAL
118 | AUBANK
119 | PGHH
120 | IOB
121 | COLPAL
122 | IRCTC
123 | ABBOTINDIA
124 | SCHAEFFLER
125 | ABCAPITAL
126 | PATANJALI
127 | UPL
128 | JSWENERGY
129 | NHPC
130 | BALKRISIND
131 | AUROPHARMA
132 | IRFC
133 | INDUSTOWER
134 | TATAELXSI
135 | TATACOMM
136 | GODREJPROP
137 | SUPREMEIND
138 | ALKEM
139 | MPHASIS
140 | MRF
141 | HINDPETRO
142 | LTTS
143 | LUPIN
144 | RECLTD
145 | NYKAA
146 | CONCOR
147 | INDIANB
148 | PAGEIND
149 | UBL
150 | BHARATFORG
151 | APLAPOLLO
152 | LINDEINDIA
153 | M&MFIN
154 | OBEROIRLTY
155 | IDEA
156 | HONAUT
157 | MAZDOCK
158 | STARHEALTH
159 | SAIL
160 | PERSISTENT
161 | DALBHARAT
162 | BANDHANBNK
163 | UCOBANK
164 | BANKINDIA
165 | IGL
166 | SOLARINDS
167 | GICRE
168 | POLICYBZR
169 | PETRONET
170 | SONACOMS
171 | OFSS
172 | ACC
173 | AIAENG
174 | BHEL
175 | UNOMINDA
176 | NMDC
177 | GUJGASLTD
178 | L&TFH
179 | BIOCON
180 | 3MINDIA
181 | FACT
182 | SYNGENE
183 | MANYAVAR
184 | ESCORTS
185 | JUBLFOOD
186 | JSL
187 | FLUOROCHEM
188 | DELHIVERY
189 | METROBRAND
190 | TORNTPOWER
191 | THERMAX
192 | PHOENIXLTD
193 | EMBASSY
194 | SUNDARMFIN
195 | COROMANDEL
196 | POONAWALLA
197 | CRISIL
198 | RVNL
199 | FEDERALBNK
200 | COFORGE
201 | OIL
202 | MFSL
203 | KPITTECH
204 | CENTRALBK
205 | DEEPAKNTR
206 | GMRINFRA
207 | APOLLOTYRE
208 | KANSAINER
209 | SKFINDIA
210 | SUNDRMFAST
211 | MSUMI
212 | FORTIS
213 | VOLTAS
214 | TATACHEM
215 | DIXON
216 | JKCEMENT
217 | TIMKEN
218 | GRINDWELL
219 | SUZLON
220 | DEVYANI
221 | ENDURANCE
222 | PEL
223 | HATSUN
224 | GLAXO
225 | ZFCVINDIA
226 | KEI
227 | MAHABANK
228 | RELAXO
229 | PSB
230 | KAJARIACER
231 | CARBORUNIV
232 | KPRMILL
233 | NAVINFLUOR
234 | PRESTIGE
235 | BATAINDIA
236 | IIFL
237 | BDL
238 | EXIDEIND
239 | GLENMARK
240 | LICHSGFIN
241 | ZEEL
242 | NH
243 | RAMCOCEM
244 | SUNTV
245 | BAYERCROP
246 | ATUL
247 | SUMICHEM
248 | CREDITACC
249 | ISEC
250 | GLAND
251 | ABFRL
252 | IPCALAB
253 | SJVN
254 | NIACL
255 | NAM-INDIA
256 | JBCHEPHARM
257 | INDIAMART
258 | LALPATHLAB
259 | MEDANTA
260 | FIVESTAR
261 | LAURUSLABS
262 | RADICO
263 | VINATIORGA
264 | CIEINDIA
265 | CROMPTON
266 | EMAMILTD
267 | 360ONE
268 | WHIRLPOOL
269 | RATNAMANI
270 | GILLETTE
271 | IDFC
272 | MINDSPACE
273 | AJANTPHARM
274 | KALYANKJIL
275 | TATAMTRDVR
276 | POWERINDIA
277 | ELGIEQUIP
278 | PFIZER
279 | NXST
280 | CHOLAHLDNG
281 | BLUEDART
282 | AARTIIND
283 | TANLA
284 | TRIDENT
285 | NATIONALUM
286 | PNBHOUSING
287 | JBMA
288 | CGCL
289 | NLCINDIA
290 | CYIENT
291 | TTML
292 | GODREJIND
293 | GSPL
294 | KEC
295 | SANOFI
296 | IRB
297 | FINCABLES
298 | BLUESTARCO
299 | ASTERDM
300 | RAJESHEXPO
301 | MRPL
302 | KIMS
303 | CENTURYPLY
304 | LAXMIMACH
305 | PVRINOX
306 | SONATSOFTW
307 | BAJAJELEC
308 | FINEORG
309 | TEJASNET
310 | HAPPSTMNDS
311 | APARINDS
312 | REDINGTON
313 | DCMSHRIRAM
314 | NATCOPHARM
315 | CLEAN
316 | AFFLE
317 | WESTLIFE
318 | EIHOTEL
319 | ANGELONE
320 | ASAHIINDIA
321 | APLLTD
322 | APTUS
323 | CASTROLIND
324 | RBLBANK
325 | AETHER
326 | BRIGADE
327 | NSLNISP
328 | TRITURBINE
329 | NUVOCO
330 | AEGISCHEM
331 | GRINFRA
332 | PPLPHARMA
333 | AAVAS
334 | RHIM
335 | ALKYLAMINE
336 | CDSL
337 | SUVENPHAR
338 | VGUARD
339 | AKZOINDIA
340 | JINDALSAW
341 | HUDCO
342 | RAYMOND
343 | TATAINVEST
344 | SFL
345 | FINPIPE
346 | KIOCL
347 | HINDCOPPER
348 | BIKAJI
349 | DATAPATTNS
350 | BASF
351 | CAMS
352 | MEDPLUS
353 | RAINBOW
354 | ABSLAMC
355 | CHAMBLFERT
356 | CANFINHOME
357 | IEX
358 | MOTILALOFS
359 | ZENSARTECH
360 | RITES
361 | MANAPPURAM
362 | GESHIP
363 | TTKPRESTIG
364 | POLYMED
365 | EQUITASBNK
366 | CENTURYTEX
367 | AMARAJABAT
368 | BSOFT
369 | VTL
370 | ANURAS
371 | MGL
372 | OLECTRA
373 | KAYNES
374 | ITI
375 | KARURVYSYA
376 | UTIAMC
377 | ERIS
378 | WELSPUNIND
379 | BSE
380 | SUNCLAYLTD
381 | USHAMART
382 | RENUKA
383 | CESC
384 | CERA
385 | SHYAMMETL
386 | CEATLTD
387 | FSL
388 | CUB
389 | CRAFTSMAN
390 | GALAXYSURF
391 | ASTRAZEN
392 | CAMPUS
393 | CHALET
394 | ZYDUSWELL
395 | GODREJAGRO
396 | ROUTE
397 | BIRLACORPN
398 | GNFC
399 | KPIL
400 | SAPPHIRE
401 | PNCINFRA
402 | HFCL
403 | JYOTHYLAB
404 | BLS
405 | BIRET
406 | NCC
407 | COCHINSHIP
408 | IRCON
409 | INGERRAND
410 | KRBL
411 | ECLERX
412 | INTELLECT
413 | SHOPERSTOP
414 | PGHL
415 | SAREGAMA
416 | GODFRYPHLP
417 | VIPIND
418 | SPLPETRO
419 | WELCORP
420 | UJJIVANSFB
421 | CCL
422 | EIDPARRY
423 | SYRMA
424 | ELECON
425 | MCX
426 | RKFORGE
427 | GRAPHITE
428 | BALRAMCHIN
429 | IONEXCHANG
430 | LATENTVIEW
431 | MAPMYINDIA
432 | GLS
433 | JKLAKSHMI
434 | GPIL
435 | GRANULES
436 | BBTC
437 | PRAJIND
438 | KSB
439 | ENGINERSIN
440 | JWL
441 | ALOKINDS
442 | AMBER
443 | DEEPAKFERT
444 | MAHLIFE
445 | SPARC
446 | NBCC
447 | ALLCARGO
448 | TITAGARH
449 | EASEMYTRIP
450 | ACE
451 | MHRIL
452 | LEMONTREE
453 | SAFARI
454 | MINDACORP
455 | J&KBANK
456 | HOMEFIRST
457 | INDIGOPNTS
458 | EPL
459 | METROPOLIS
460 | BALAMINES
461 | ESABINDIA
462 | JMFINANCIL
463 | TEGA
464 | BEML
465 | PRINCEPIPE
466 | TV18BRDCST
467 | SWSOLAR
468 | GRSE
469 | CHEMPLASTS
470 | KNRCON
471 | KIRLFER
472 | TMB
473 | SCHNEIDER
474 | JUSTDIAL
475 | RUSTOMJEE
476 | LXCHEM
477 | GSFC
478 | TRIVENI
479 | CHENNPETRO
480 | MASTEK
481 | GMMPFAUDLR
482 | MAHSCOOTER
483 | BORORENEW
484 | ACI
485 | GET&D
486 | KTKBANK
487 | HNDFDS
488 | MTARTECH
489 | VRLLOG
490 | JUBLINGREA
491 | CAPLIPOINT
492 | KFINTECH
493 | INDIACEM
494 | JINDWORLD
495 | QUESS
496 | MAHSEAMLES
497 | ANANTRAJ
498 | GARFIBRES
499 | RCF
500 | HEG
501 | SARDAEN
502 | FUSION
503 | GOCOLORS
504 | HSCL
505 | SIS
506 | NETWORK18
507 | PRSMJOHNSN
508 | SYMPHONY
509 | HGINFRA
510 | ROLEXRINGS
511 | STLTECH
512 | JKTYRE
513 | GREENLAM
514 | SWANENERGY
515 | KIRLOSENG
516 | JUBLPHARMA
517 | PCBL
518 | SUPRAJIT
519 | GAEL
520 | GPPL
521 | RPOWER
522 | CMSINFO
523 | TCI
524 | GMDCLTD
525 | NEWGEN
526 | STARCEMENT
527 | POWERMECH
528 | TCIEXP
529 | MIDHANI
530 | RELINFRA
531 | IBULHSGFIN
532 | DAAWAT
533 | KENNAMET
534 | VSTIND
535 | VAIBHAVGBL
536 | HGS
537 | VESUVIUS
538 | FDC
539 | RBA
540 | RAIN
541 | SUNTECK
542 | RTNINDIA
543 | KIRLOSBROS
544 | AVANTIFEED
545 | JKPAPER
546 | INOXWIND
547 | RELIGARE
548 | BCG
549 | RSYSTEMS
550 | SOBHA
551 | ICRA
552 | UJJIVAN
553 | ISGEC
554 | PTCIL
555 | ZENTEC
556 | SPANDANA
557 | PARADEEP
558 | LAOPALA
559 | VARROC
560 | RESPONIND
561 | MMTC
562 | CSBBANK
563 | DELTACORP
564 | TECHNOE
565 | ORIENTELEC
566 | JSWHL
567 | GHCL
568 | RAILTEL
569 | MARKSANS
570 | BECTORFOOD
571 | BOROLTD
572 | GUJALKALI
573 | SHRIPISTON
574 | SANSERA
575 | IDEAFORGE
576 | GENUSPOWER
577 | NAVA
578 | ROSSARI
579 | RATEGAIN
580 | AARTIDRUGS
581 | VOLTAMP
582 | PRUDENT
583 | HBLPOWER
584 | SHARDACROP
585 | TATACOFFEE
586 | VIJAYA
587 | SCI
588 | AHLUCONT
589 | DODLA
590 | EDELWEISS
591 | PDSL
592 | GRAVITA
593 | NESCO
594 | HCG
595 | HLEGLAS
596 | LUXIND
597 | VMART
598 | ARVINDFASN
599 | ANANDRATHI
600 | JAMNAAUTO
601 | NAZARA
602 | SURYAROSNI
603 | SOUTHBANK
604 | PRIVISCL
605 | GREENPANEL
606 | MANINFRA
607 | AMIORG
608 | AGI
609 | RALLIS
610 | NEULANDLAB
611 | KKCL
612 | TEAMLEASE
613 | MASFIN
614 | AVALON
615 | HINDWAREAP
616 | EMIL
617 | KIRLPNU
618 | ICIL
619 | IRBINVIT
620 | DBCORP
621 | DREAMFOLKS
622 | JPPOWER
623 | SULA
624 | SBCL
625 | POLYPLEX
626 | SHAREINDIA
627 | HARSHA
628 | MFL
629 | INFIBEAM
630 | TIIL
631 | STAR
632 | THOMASCOOK
633 | TDPOWERSYS
634 | CYIENTDLM
635 | HEIDELBERG
636 | NEOGEN
637 | RAJRATAN
638 | BHARATRAS
639 | DCBBANK
640 | EMUDHRA
641 | MOIL
642 | SUNFLAG
643 | TIPSINDLTD
644 | JTEKTINDIA
645 | HIKAL
646 | GANESHHOUC
647 | GATEWAY
648 | LGBBROSLTD
649 | TINPLATE
650 | NILKAMAL
651 | TATVA
652 | IBREALEST
653 | SSWL
654 | PATELENG
655 | DISHTV
656 | ARVIND
657 | SHANTIGEAR
658 | DBL
659 | NOCIL
660 | DHANUKA
661 | ASTRAMICRO
662 | WOCKPHARMA
663 | CHOICEIN
664 | PFOCUS
665 | NFL
666 | ETHOSLTD
667 | WELENT
668 | MOLDTKPAC
669 | TASTYBITE
670 | GLOBUSSPR
671 | BANARISUG
672 | FORCEMOT
673 | IFBIND
674 | ADVENZYMES
675 | PGEL
676 | ELECTCAST
677 | SAKSOFT
678 | PRICOLLTD
679 | SUDARSCHEM
680 | AUTOAXLES
681 | DATAMATICS
682 | PTC
683 | TI
684 | NAVNETEDUL
685 | JAICORPLTD
686 | GOKEX
687 | MAITHANALL
688 | TATASTLLP
689 | BBOX
690 | WABAG
691 | KSCL
692 | KIRLOSIND
693 | GOODYEAR
694 | WSTCSTPAPR
695 | IKIO
696 | GREAVESCOT
697 | WONDERLA
698 | TARSONS
699 | UFLEX
700 | BSHSL
701 | FCL
702 | JTLIND
703 | DALMIASUG
704 | SOMANYCERA
705 | TIMETECHNO
706 | THYROCARE
707 | GABRIEL
708 | BAJAJCON
709 | INDOCO
710 | AARTIPHARM
711 | ITDCEM
712 | APOLLOPIPE
713 | HEMIPROP
714 | KPIGREEN
715 | KOVAI
716 | LANDMARK
717 | MAHLOG
718 | HCC
719 | NUCLEUS
720 | RAMKY
721 | ORIENTCEM
722 | JAYNECOIND
723 | UNIPARTS
724 | RAJRILTD
725 | MAXVIL
726 | MSTCLTD
727 | HINDOILEXP
728 | APCOTEXIND
729 | ITDC
730 | SUBROS
731 | ORCHPHARMA
732 | KOLTEPATIL
733 | JCHAC
734 | STYLAMIND
735 | IFCI
736 | JINDALPOLY
737 | TEXRAIL
738 | SHILPAMED
739 | DIVGIITTS
740 | MBAPL
741 | HATHWAY
742 | SAGCEM
743 | IWEL
744 | VENKEYS
745 | DYNAMATECH
746 | UNICHEMLAB
747 | TATAMETALI
748 | DBREALTY
749 | RTNPOWER
750 | PARAS
751 | PSPPROJECT
752 | TCNSBRANDS
753 | BARBEQUE
754 | BESTAGRO
755 | SIYSIL
756 | ASHOKA
757 | VSTTILLERS
758 | DCXINDIA
759 | JISLJALEQS
760 | SDBL
761 | IPL
762 | JKIL
763 | ASTEC
764 | FIEMIND
765 | VINDHYATEL
766 | ISMTLTD
767 | HERITGFOOD
768 | LUMAXTECH
769 | SANGHVIMOV
770 | GRAUWEIL
771 | SHARDAMOTR
772 | EXPLEOSOL
773 | EVEREADY
774 | CAMLINFINE
775 | DCAL
776 | SWARAJENG
777 | VENUSPIPES
778 | GULFOILLUB
779 | BAJAJHIND
780 | FINOPB
781 | UGROCAP
782 | CARTRADE
783 | TVSSRICHAK
784 | BOMDYEING
785 | ADFFOODS
786 | THANGAMAYL
787 | JAGRAN
788 | BANCOINDIA
789 | PRECAM
790 | GUFICBIO
791 | PURVA
792 | ORISSAMINE
793 | KINGFA
794 | SANGHIIND
795 | IMAGICAA
796 | BALMLAWRIE
797 | GANECOS
798 | PAISALO
799 | INDOSTAR
800 | CIGNITITEC
801 | AURIONPRO
802 | KESORAMIND
803 | NRBBEARING
804 | PILANIINVS
805 | BEPL
806 | MAYURUNIQ
807 | MMFL
808 | CARERATING
809 | HIL
810 | SUNDARMHLD
811 | HONDAPOWER
812 | IOLCP
813 | SEQUENT
814 | CONFIPET
815 | TARC
816 | GREENPLY
817 | ASHIANA
818 | ACCELYA
819 | RUPA
820 | BBL
821 | DHANI
822 | BUTTERFLY
823 | VADILALIND
824 | MOL
825 | VISHNU
826 | SANDHAR
827 | SOTL
828 | FMGOETZE
829 | STYRENIX
830 | GRWRHITECH
831 | DOLLAR
832 | SHALBY
833 | ATFL
834 | WENDT
835 | MANORAMA
836 | GNA
837 | HUHTAMAKI
838 | MPSLTD
839 | ALEMBICLTD
840 | IIFLSEC
841 | AMRUTANJAN
842 | LUMAXIND
843 | JPASSOCIAT
844 | VIDHIING
845 | APTECHT
846 | SIRCA
847 | TIRUMALCHM
848 | DIAMONDYD
849 | NOVARTIND
850 | ANUP
851 | SUPRIYA
852 | REPCOHOME
853 | DHAMPURSUG
854 | SURYODAY
855 | GATI
856 | INDIAGLYCO
857 | CARYSIL
858 | SESHAPAPER
859 | TIDEWATER
860 | NELCO
861 | GOCLCORP
862 | IMFA	
863 | SEAMECLTD
864 | OPTIEMUS
865 | WHEELS
866 | KDDL
867 | MUKANDLTD
868 | SUBEXLTD
869 | CENTUM
870 | TTKHLTCARE
871 | AXISCADES
872 | JAIBALAJI
873 | RAMASTEEL
874 | ARMANFIN
875 | SPICEJET
876 | SJS
877 | PARAGMILK
878 | PANAMAPET
879 | DWARKESH
880 | COSMOFIRST
881 | INOXGREEN
882 | GALLANTT
883 | ARTEMISMED
884 | NACLIND
885 | SKIPPER
886 | MONTECARLO
887 | SERVOTECH
888 | DEN
889 | FOSECOIND
890 | JSWISPL
891 | XPROINDIA
892 | HARIOMPIPE
893 | SHANKARA
894 | ADORWELD
895 | PRECWIRE
896 | ANDHRAPAP
897 | SALASAR
898 | VAKRANGEE
899 | PIXTRANS
900 | FILATEX
901 | KSL
902 | KUANTUM
903 | TAJGVK
904 | RPGLIFE
905 | CAPACITE
906 | CANTABIL
907 | GIPCL
908 | SEPC
909 | RANEHOLDIN
910 | ROSSELLIND
911 | SATIN
912 | ORIENTHOT
913 | AHL
914 | GOKULAGRO
915 | SHK
916 | EIHAHOTELS
917 | UNIVCABLES
918 | FAIRCHEMOR
919 | SMLISUZU
920 | PRAKASH
921 | VSSL
922 | GTPL
923 | ARVSMART
924 | ANDHRSUGAR
925 | SANGAMIND
926 | STOVEKRAFT
927 | IGARASHI
928 | RAMCOIND
929 | HESTERBIO
930 | MOREPENLAB
931 | KABRAEXTRU
932 | NDTV
933 | MARATHON
934 | REFEX
935 | TCPLPACK
936 | KCP
937 | HARDWYN
938 | SASKEN
939 | JASH
940 | STEELXIND
941 | RIIL
942 | 


--------------------------------------------------------------------------------
/py/ai/fininsightgpt/src/document_processor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Document Processor Module
  3 | 
  4 | This module handles the conversion of various file formats to markdown text.
  5 | Supported formats: txt, pdf, docx, pptx, xlsx, images
  6 | """
  7 | 
  8 | import os
  9 | import re
 10 | import logging
 11 | import base64
 12 | import json
 13 | from pathlib import Path
 14 | from typing import Dict, List, Optional, Tuple, Any
 15 | import datetime
 16 | 
 17 | # Load environment variables from .env file
 18 | try:
 19 |     from dotenv import load_dotenv
 20 |     load_dotenv()  # Load variables from .env
 21 |     ENV_LOADED = True
 22 | except ImportError:
 23 |     ENV_LOADED = False
 24 |     logging.warning("dotenv not found, environment variables must be set manually")
 25 | 
 26 | # Configure logging
 27 | logging.basicConfig(
 28 |     level=logging.INFO,
 29 |     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 30 | )
 31 | logger = logging.getLogger(__name__)
 32 | 
 33 | # Get model IDs and config from environment variables
 34 | OPENAI_VISION_MODEL = os.environ.get("OPENAI_VISION_MODEL", "gpt-4-vision-preview")
 35 | OPENAI_TEXT_MODEL = os.environ.get("OPENAI_TEXT_MODEL", "gpt-4-turbo")
 36 | # Flag to enable/disable LLM prompt logging (default: enabled)
 37 | ENABLE_LOGGING = os.environ.get("ENABLE_LLM_LOGGING", "true").lower() == "true"
 38 | 
 39 | # Try to import optional dependencies, with graceful fallbacks
 40 | try:
 41 |     import fitz  # PyMuPDF
 42 |     PDF_EXTRACTOR = "pymupdf"
 43 | except ImportError:
 44 |     PDF_EXTRACTOR = None
 45 |     logger.warning("PyMuPDF not found. PDF extraction will be limited.")
 46 | 
 47 | try:
 48 |     import docx
 49 |     DOCX_AVAILABLE = True
 50 | except ImportError:
 51 |     DOCX_AVAILABLE = False
 52 |     logger.warning("python-docx not found. DOCX extraction will be unavailable.")
 53 | 
 54 | try:
 55 |     from pptx import Presentation
 56 |     PPTX_AVAILABLE = True
 57 | except ImportError:
 58 |     PPTX_AVAILABLE = False
 59 |     logger.warning("python-pptx not found. PPTX extraction will be unavailable.")
 60 | 
 61 | try:
 62 |     import pandas as pd
 63 |     PANDAS_AVAILABLE = True
 64 | except ImportError:
 65 |     PANDAS_AVAILABLE = False
 66 |     logger.warning("pandas not found. XLSX extraction will be unavailable.")
 67 | 
 68 | try:
 69 |     from PIL import Image
 70 |     import pytesseract
 71 |     OCR_AVAILABLE = True
 72 | except ImportError:
 73 |     OCR_AVAILABLE = False
 74 |     logger.warning("PIL or pytesseract not found. OCR will be unavailable.")
 75 | 
 76 | try:
 77 |     import openai
 78 |     OPENAI_AVAILABLE = True
 79 | except ImportError:
 80 |     OPENAI_AVAILABLE = False
 81 |     logger.warning("OpenAI library not found. Advanced image analysis will be unavailable.")
 82 | 
 83 | 
 84 | def extract_from_txt(file_path: str) -> str:
 85 |     """Extract text from a plain text file.
 86 | 
 87 |     Args:
 88 |         file_path: Path to the text file
 89 | 
 90 |     Returns:
 91 |         Extracted text content
 92 |     """
 93 |     try:
 94 |         with open(file_path, 'r', encoding='utf-8') as f:
 95 |             return f.read()
 96 |     except UnicodeDecodeError:
 97 |         # Try with different encodings if utf-8 fails
 98 |         try:
 99 |             with open(file_path, 'r', encoding='latin-1') as f:
100 |                 return f.read()
101 |         except Exception as e:
102 |             logger.error(f"Error reading text file {file_path}: {str(e)}")
103 |             return f"ERROR: Could not read {file_path} due to encoding issues."
104 | 
105 | 
106 | def extract_from_pdf(file_path: str) -> str:
107 |     """Extract text from a PDF file.
108 | 
109 |     Args:
110 |         file_path: Path to the PDF file
111 | 
112 |     Returns:
113 |         Extracted text content
114 |     """
115 |     if PDF_EXTRACTOR == "pymupdf":
116 |         try:
117 |             text_content = []
118 |             with fitz.open(file_path) as doc:
119 |                 for page_num, page in enumerate(doc):
120 |                     text = page.get_text()
121 |                     text_content.append(f"# Page {page_num + 1}\n\n{text}\n\n")
122 |             return "\n".join(text_content)
123 |         except Exception as e:
124 |             logger.error(f"Error extracting text from PDF {file_path}: {str(e)}")
125 |             return f"ERROR: Could not extract text from {file_path}."
126 |     else:
127 |         logger.error("No PDF extraction library available")
128 |         return "ERROR: PDF extraction requires PyMuPDF. Please install with: pip install pymupdf"
129 | 
130 | 
131 | def extract_from_docx(file_path: str) -> str:
132 |     """Extract text from a DOCX file.
133 | 
134 |     Args:
135 |         file_path: Path to the DOCX file
136 | 
137 |     Returns:
138 |         Extracted text content
139 |     """
140 |     if not DOCX_AVAILABLE:
141 |         return "ERROR: DOCX extraction requires python-docx. Please install with: pip install python-docx"
142 |     
143 |     try:
144 |         doc = docx.Document(file_path)
145 |         full_text = []
146 |         
147 |         for para in doc.paragraphs:
148 |             full_text.append(para.text)
149 |             
150 |         # Add tables
151 |         for table in doc.tables:
152 |             for row in table.rows:
153 |                 row_text = " | ".join([cell.text for cell in row.cells])
154 |                 full_text.append(f"| {row_text} |")
155 |         
156 |         return "\n\n".join(full_text)
157 |     except Exception as e:
158 |         logger.error(f"Error extracting text from DOCX {file_path}: {str(e)}")
159 |         return f"ERROR: Could not extract text from {file_path}."
160 | 
161 | 
162 | def extract_from_pptx(file_path: str) -> str:
163 |     """Extract text from a PPTX file.
164 | 
165 |     Args:
166 |         file_path: Path to the PPTX file
167 | 
168 |     Returns:
169 |         Extracted text content
170 |     """
171 |     if not PPTX_AVAILABLE:
172 |         return "ERROR: PPTX extraction requires python-pptx. Please install with: pip install python-pptx"
173 |     
174 |     try:
175 |         presentation = Presentation(file_path)
176 |         text_content = []
177 |         
178 |         for slide_num, slide in enumerate(presentation.slides):
179 |             slide_text = []
180 |             slide_text.append(f"# Slide {slide_num + 1}")
181 |             
182 |             for shape in slide.shapes:
183 |                 if hasattr(shape, "text") and shape.text.strip():
184 |                     slide_text.append(shape.text)
185 |             
186 |             text_content.append("\n\n".join(slide_text))
187 |         
188 |         return "\n\n---\n\n".join(text_content)
189 |     except Exception as e:
190 |         logger.error(f"Error extracting text from PPTX {file_path}: {str(e)}")
191 |         return f"ERROR: Could not extract text from {file_path}."
192 | 
193 | 
194 | def extract_from_xlsx(file_path: str) -> str:
195 |     """Extract data from an Excel file.
196 | 
197 |     Args:
198 |         file_path: Path to the Excel file
199 | 
200 |     Returns:
201 |         Extracted data as markdown tables
202 |     """
203 |     if not PANDAS_AVAILABLE:
204 |         return "ERROR: Excel extraction requires pandas. Please install with: pip install pandas openpyxl"
205 |     
206 |     try:
207 |         result = []
208 |         # Read all sheets
209 |         excel_file = pd.ExcelFile(file_path)
210 |         
211 |         for sheet_name in excel_file.sheet_names:
212 |             df = pd.read_excel(file_path, sheet_name=sheet_name)
213 |             
214 |             # Convert to markdown table
215 |             md_table = f"## Sheet: {sheet_name}\n\n"
216 |             md_table += df.to_markdown(index=False)
217 |             result.append(md_table)
218 |         
219 |         return "\n\n---\n\n".join(result)
220 |     except Exception as e:
221 |         logger.error(f"Error extracting data from Excel {file_path}: {str(e)}")
222 |         return f"ERROR: Could not extract data from {file_path}."
223 | 
224 | 
225 | def log_llm_prompt(
226 |     company_name: str, 
227 |     phase: str, 
228 |     section: str, 
229 |     messages: List[Dict[str, Any]],
230 |     model: str,
231 |     temperature: float,
232 |     max_tokens: int,
233 |     run_timestamp: Optional[str] = None
234 | ) -> None:
235 |     """Log the prompt sent to the LLM.
236 |     
237 |     Args:
238 |         company_name: Name of the company
239 |         phase: Phase of processing (e.g., 'document_processing')
240 |         section: Section being generated (e.g., 'image_analysis')
241 |         messages: Messages sent to the LLM
242 |         model: Model name
243 |         temperature: Temperature setting
244 |         max_tokens: Max tokens setting
245 |         run_timestamp: Optional timestamp to use for the log filename. If provided, 
246 |                        appends to an existing log file with this timestamp.
247 |     """
248 |     if not ENABLE_LOGGING:
249 |         logger.info("LLM logging is disabled. Skipping log entry.")
250 |         return
251 |     
252 |     # Create logs directory
253 |     company_logs_dir = Path(f"company_data/{company_name}/logs")
254 |     company_logs_dir.mkdir(exist_ok=True, parents=True)
255 |     
256 |     # Generate timestamp for the log file or use provided one
257 |     timestamp = run_timestamp if run_timestamp else datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
258 |     log_filename = f"{company_name}_{phase}_{timestamp}.log"
259 |     log_path = company_logs_dir / log_filename
260 |     
261 |     # Prepare log entry
262 |     log_entry = {
263 |         "timestamp": datetime.datetime.now().isoformat(),
264 |         "company": company_name,
265 |         "phase": phase,
266 |         "section": section,
267 |         "model": model,
268 |         "temperature": temperature,
269 |         "max_tokens": max_tokens,
270 |         "messages": messages
271 |     }
272 |     
273 |     # Append to log file
274 |     try:
275 |         # Create file if it doesn't exist
276 |         if not log_path.exists():
277 |             with open(log_path, 'w', encoding='utf-8') as f:
278 |                 f.write(f"# LLM Interaction Log for {company_name}\n")
279 |                 f.write(f"# Phase: {phase}\n")
280 |                 f.write(f"# Created: {timestamp}\n\n")
281 |         
282 |         # Append log entry
283 |         with open(log_path, 'a', encoding='utf-8') as f:
284 |             f.write(f"\n## {section} - {datetime.datetime.now().isoformat()}\n")
285 |             f.write(json.dumps(log_entry, indent=2))
286 |             f.write("\n\n---\n\n")
287 |         
288 |         logger.info(f"Logged LLM prompt for {company_name}/{phase}/{section} to {log_path}")
289 |     except Exception as e:
290 |         logger.error(f"Failed to log LLM prompt: {str(e)}")
291 | 
292 | 
293 | def extract_from_image(file_path: str, run_timestamp: Optional[str] = None) -> str:
294 |     """Extract text from an image using OCR.
295 | 
296 |     Args:
297 |         file_path: Path to the image file
298 |         run_timestamp: Optional timestamp for consistent log file naming
299 | 
300 |     Returns:
301 |         Extracted text content
302 |     """
303 |     if not OCR_AVAILABLE:
304 |         return "ERROR: Image extraction requires Pillow and pytesseract. Please install with: pip install Pillow pytesseract"
305 |     
306 |     try:
307 |         image = Image.open(file_path)
308 |         text = pytesseract.image_to_string(image)
309 |         
310 |         # Use OpenAI for better image understanding if available
311 |         if OPENAI_AVAILABLE and os.environ.get("OPENAI_API_KEY"):
312 |             try:
313 |                 # Get company name from file path
314 |                 file_path_obj = Path(file_path)
315 |                 company_name = file_path_obj.parent.name
316 |                 
317 |                 # Create a timestamp for this processing run if not provided
318 |                 if run_timestamp is None:
319 |                     run_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
320 |                 
321 |                 # Try to get a better description using OpenAI's vision capabilities
322 |                 client = openai.Client(api_key=os.environ["OPENAI_API_KEY"])
323 |                 with open(file_path, "rb") as image_file:
324 |                     base_image = image_file.read()
325 |                 
326 |                 # Prepare messages
327 |                 messages = [
328 |                     {
329 |                         "role": "user",
330 |                         "content": [
331 |                             {"type": "text", "text": "Describe this image in detail, focusing on any financial data, charts, or business information visible."},
332 |                             {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64.b64encode(base_image).decode('utf-8')}"}}
333 |                         ]
334 |                     }
335 |                 ]
336 |                 
337 |                 # Log the prompt
338 |                 log_llm_prompt(
339 |                     company_name=company_name,
340 |                     phase="document_processing",
341 |                     section=f"image_analysis_{file_path_obj.stem}",
342 |                     messages=messages,
343 |                     model=OPENAI_VISION_MODEL,  # Use global variable
344 |                     temperature=0.3,
345 |                     max_tokens=300,
346 |                     run_timestamp=run_timestamp
347 |                 )
348 |                 
349 |                 response = client.chat.completions.create(
350 |                     model=OPENAI_VISION_MODEL,  # Use global variable
351 |                     messages=messages,
352 |                     temperature=0.3,
353 |                     max_tokens=300
354 |                 )
355 |                 vision_description = response.choices[0].message.content
356 |                 return f"## OCR Text:\n\n{text}\n\n## Image Analysis:\n\n{vision_description}"
357 |             except Exception as e:
358 |                 logger.warning(f"OpenAI vision processing failed: {str(e)}")
359 |                 return f"## OCR Text:\n\n{text}"
360 |         else:
361 |             return f"## OCR Text:\n\n{text}"
362 |     except Exception as e:
363 |         logger.error(f"Error extracting text from image {file_path}: {str(e)}")
364 |         return f"ERROR: Could not extract text from {file_path}."
365 | 
366 | 
367 | def convert_to_markdown(file_path: str, run_timestamp: Optional[str] = None) -> Tuple[str, str]:
368 |     """Convert various file formats to markdown text.
369 | 
370 |     Args:
371 |         file_path: Path to the file
372 |         run_timestamp: Optional timestamp for consistent log naming across a run
373 | 
374 |     Returns:
375 |         Tuple of (markdown_content, file_name)
376 |     """
377 |     file_path = Path(file_path)
378 |     file_ext = file_path.suffix.lower()[1:]  # Remove the dot
379 |     file_name = file_path.stem
380 |     
381 |     content = f"# {file_name}\n\n"
382 |     content += f"Source: {file_path}\n"
383 |     content += f"Processed on: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n---\n\n"
384 |     
385 |     try:
386 |         if file_ext == "txt":
387 |             extracted = extract_from_txt(str(file_path))
388 |         elif file_ext == "pdf":
389 |             extracted = extract_from_pdf(str(file_path))
390 |         elif file_ext == "docx":
391 |             extracted = extract_from_docx(str(file_path))
392 |         elif file_ext == "pptx":
393 |             extracted = extract_from_pptx(str(file_path))
394 |         elif file_ext in ["xlsx", "xls"]:
395 |             extracted = extract_from_xlsx(str(file_path))
396 |         elif file_ext in ["jpg", "jpeg", "png", "gif", "bmp"]:
397 |             extracted = extract_from_image(str(file_path), run_timestamp)
398 |         else:
399 |             extracted = f"Unsupported file format: {file_ext}"
400 |             logger.warning(f"Unsupported file format: {file_ext}")
401 |         
402 |         content += extracted
403 |         
404 |     except Exception as e:
405 |         logger.error(f"Error processing {file_path}: {str(e)}")
406 |         content += f"ERROR: Failed to process file {file_path}. Exception: {str(e)}"
407 |     
408 |     return content, f"{file_name}.md"
409 | 
410 | 
411 | def process_company_folder(company_folder: str) -> List[Tuple[str, str]]:
412 |     """Process all files in a company folder.
413 | 
414 |     Args:
415 |         company_folder: Path to the company folder
416 | 
417 |     Returns:
418 |         List of tuples (markdown_content, markdown_file_path)
419 |     """
420 |     logger.info(f"Processing company folder: {company_folder}")
421 |     company_path = Path(company_folder)
422 |     
423 |     if not company_path.exists() or not company_path.is_dir():
424 |         logger.error(f"Company folder does not exist: {company_folder}")
425 |         return []
426 |     
427 |     # Get company name from folder name
428 |     company_name = company_path.name
429 |     
430 |     # Create output folders proactively
431 |     output_folder = company_path / "processed"
432 |     output_folder.mkdir(exist_ok=True)
433 |     
434 |     # Create logs directory proactively
435 |     logs_folder = company_path / "logs"
436 |     logs_folder.mkdir(exist_ok=True)
437 |     logger.info(f"Ensured logs directory exists: {logs_folder}")
438 |     
439 |     # Create a single timestamp for this processing run
440 |     run_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
441 |     
442 |     results = []
443 |     
444 |     # Process all files in the folder
445 |     for file_path in company_path.glob("*"):
446 |         if file_path.is_file() and not file_path.name.startswith('.') and not file_path.name.endswith('.md'):
447 |             logger.info(f"Processing file: {file_path}")
448 |             
449 |             # Convert the file to markdown using the common run timestamp
450 |             markdown_content, markdown_name = convert_to_markdown(str(file_path), run_timestamp)
451 |             
452 |             # Save the markdown file
453 |             markdown_path = output_folder / markdown_name
454 |             with open(markdown_path, 'w', encoding='utf-8') as f:
455 |                 f.write(markdown_content)
456 |             
457 |             results.append((markdown_content, str(markdown_path)))
458 |     
459 |     logger.info(f"Processed {len(results)} files for company: {company_name}")
460 |     return results


--------------------------------------------------------------------------------