├── py ├── ai │ ├── fininsightgpt │ │ ├── src │ │ │ ├── __init__.py │ │ │ ├── requirements.txt │ │ │ ├── master_file_generator.py │ │ │ ├── main.py │ │ │ └── document_processor.py │ │ ├── company_data │ │ │ └── Description.txt │ │ ├── README.md │ │ └── prompt_master │ │ │ └── Equity_Research_Report_Template.md │ ├── market_analyzer │ │ ├── .env │ │ ├── output │ │ │ ├── Description.txt │ │ │ └── Avanti feeds_chat_log_20250323_211534.pdf │ │ ├── stock_chat.py │ │ ├── requirements.txt │ │ └── analysis_utils.py │ ├── turnaround │ │ ├── output │ │ │ └── description.txt │ │ ├── my_tools │ │ │ ├── __init__.py │ │ │ ├── web_fetcher.py │ │ │ ├── markdown_report.py │ │ │ ├── fs_reader.py │ │ │ └── cmd_executor.py │ │ ├── requirements.txt │ │ ├── data │ │ │ └── financial_data.csv │ │ ├── main.py │ │ └── README.md │ ├── nse_announcements │ │ ├── requirements.txt │ │ └── weekly_nse_announcements_analysis.py │ └── newsarranger │ │ ├── requirements.txt │ │ └── get_news_arrange.py ├── eodhd │ ├── price_data │ │ ├── RELIANCE_M.csv │ │ ├── RELIANCE_W.csv │ │ └── RELIANCE_D.csv │ ├── pricereader.py │ ├── ath_scan.py │ ├── my_rsi.py │ ├── how_many_weeks_high.py │ ├── saucer_crs.py │ ├── gareebman_entry_exit.py │ ├── mip12_scanner.py │ └── stocks.csv ├── beta │ ├── chatgpt │ │ ├── model.py │ │ └── generate_report_for_company.py │ └── concall_transcript_summarize.py └── yf │ ├── daily_rs_55_bo.py │ ├── glb_scan.py │ ├── ars_srs_scan.py │ ├── weeklyRSIVolStopBO.py │ ├── newHighMonthly.py │ ├── multimonthBO.py │ ├── box_scan.py │ ├── green_dot.py │ ├── trendreversal_ha.py │ ├── supply_exhaustion_6m_scan.py │ ├── ss_result_parser.py │ ├── limevolume.py │ └── stock_sector_strength.py ├── .gitignore └── README.md /py/ai/fininsightgpt/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /py/ai/market_analyzer/.env: -------------------------------------------------------------------------------- 1 | GOOGLE_API_KEY='YOUR API KEY' 2 | -------------------------------------------------------------------------------- /py/ai/turnaround/output/description.txt: -------------------------------------------------------------------------------- 1 | Output reports will be here 2 | -------------------------------------------------------------------------------- /py/ai/market_analyzer/output/Description.txt: -------------------------------------------------------------------------------- 1 | Your outputs are saved here 2 | -------------------------------------------------------------------------------- /py/ai/fininsightgpt/company_data/Description.txt: -------------------------------------------------------------------------------- 1 | Create folder for each business (company) you wish to analyse and place all documents here. -------------------------------------------------------------------------------- /py/ai/market_analyzer/output/Avanti feeds_chat_log_20250323_211534.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QueryType/bharattrader/HEAD/py/ai/market_analyzer/output/Avanti feeds_chat_log_20250323_211534.pdf -------------------------------------------------------------------------------- /py/ai/fininsightgpt/src/requirements.txt: -------------------------------------------------------------------------------- 1 | pymupdf>=1.22.5 2 | python-docx>=0.8.11 3 | python-pptx>=0.6.21 4 | pandas>=2.0.0 5 | openpyxl>=3.1.2 6 | Pillow>=10.0.0 7 | pytesseract>=0.3.10 8 | openai>=1.3.0 9 | tiktoken>=0.5.0 10 | python-dotenv>=1.0.0 -------------------------------------------------------------------------------- /py/ai/turnaround/my_tools/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tools submodule for turnaround. 3 | 4 | Contains all the individual tool implementations. 5 | """ 6 | 7 | from .fs_reader import fs_reader 8 | from .cmd_executor import cmd_executor 9 | from .web_fetcher import search_web 10 | from .markdown_report import save_report 11 | 12 | __all__ = [ 13 | "fs_reader", 14 | "cmd_executor", 15 | "search_web", 16 | "save_report" 17 | ] 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Environment variables 2 | py/ai/fininsightgpt/.env 3 | 4 | # Python 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | *.so 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # Virtual Environment 27 | venv/ 28 | ENV/ 29 | env/ 30 | 31 | # IDE specific files 32 | .idea/ 33 | .vscode/ 34 | *.swp 35 | *.swo 36 | 37 | # OS specific files 38 | .DS_Store 39 | .DS_Store? 40 | ._* 41 | .Spotlight-V100 42 | .Trashes 43 | ehthumbs.db 44 | Thumbs.db 45 | 46 | # Jupyter Notebook 47 | .ipynb_checkpoints -------------------------------------------------------------------------------- /py/eodhd/price_data/RELIANCE_M.csv: -------------------------------------------------------------------------------- 1 | Date,Open,High,Low,Close,Volume,Adj Close 2 | 1994-11-03,375.0,400.0,356.75,380.25,879250,380.25 3 | 1994-12-01,378.5,382.5,329.0,341.2,851600,341.2 4 | 1995-01-02,341.0,343.0,235.15,272.45,4167200,272.45 5 | 1995-02-01,275.0,287.0,237.25,271.75,6171750,271.75 6 | 1995-03-01,275.0,297.0,250.0,265.45,12290250,265.45 7 | 1995-04-03,267.0,288.05,246.5,249.9,6403250,249.9 8 | 1995-05-02,249.5,280.0,225.55,271.85,21273350,271.85 9 | 1995-06-01,274.95,284.0,260.4,264.0,27161750,264.0 10 | 1995-07-03,262.25,303.0,226.35,267.4,26989650,267.4 11 | 1995-08-01,266.0,280.0,256.5,261.85,32501950,261.85 12 | 1995-09-01,261.0,278.0,257.0,275.0,19358500,275.0 13 | 1995-10-04,278.0,290.0,234.25,244.0,44468050,244.0 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bharattrader 2 | Utilities for trading , scanning and other things. 3 | 4 | I am not going to pretend that I am a coder wiz. I am just a normal software guy, who has written these codes for my own personal use. I know there are intelligent and smart people out there, who can not only write better code but also develop better algos. In case some people find something valuable here, you can pick up and honour the license under which the code is released. 5 | 6 | This repo is strictly as-is. Please do not come to me for changes and modifications. The source is licensed under GNU GPL, so please go ahead. 7 | Also, I am not responsible for any trading/investing/financial loss that you encounter after using these codes. Please do your own analysis. 8 | -------------------------------------------------------------------------------- /py/ai/turnaround/requirements.txt: -------------------------------------------------------------------------------- 1 | # AI Agent Framework 2 | smolagents>=0.3.0 3 | 4 | # OpenAI API Integration 5 | openai>=1.0.0 6 | 7 | # LLM Model Support 8 | litellm>=1.0.0 9 | 10 | # Environment Variables Management 11 | python-dotenv>=1.0.0 12 | 13 | # Data Processing 14 | pandas>=2.0.0 15 | numpy>=1.24.0 16 | 17 | # HTTP Requests (for web fetching) 18 | requests>=2.31.0 19 | 20 | # JSON Processing (built-in, but some tools might need enhanced support) 21 | jsonschema>=4.17.0 22 | 23 | # File I/O and CSV Processing (built-in, but for completeness) 24 | # csv - built-in 25 | # os - built-in 26 | # datetime - built-in 27 | 28 | # Optional: For enhanced web scraping capabilities 29 | beautifulsoup4>=4.12.0 30 | selenium>=4.15.0 31 | 32 | # Optional: For better logging and debugging 33 | loguru>=0.7.0 34 | 35 | # Optional: For data validation 36 | pydantic>=2.0.0 37 | -------------------------------------------------------------------------------- /py/ai/turnaround/data/financial_data.csv: -------------------------------------------------------------------------------- 1 | Name,BSE Code,NSE Code 2 | 63 Moons Tech.,526881,63MOONS 3 | Apex Frozen Food,540692,APEX 4 | Arman Financial,531179,ARMANFIN 5 | Ashima,514286,ASHIMASYN 6 | Bajaj Hindusthan,500032,BAJAJHIND 7 | Concord Enviro,544315,CEWATER 8 | Elpro Internatio,504000, 9 | Embassy Develop,532832,EMBDL 10 | Graphite India,509488,GRAPHITE 11 | Gujarat Alkalies,530001,GUJALKALI 12 | IFCI,500106,IFCI 13 | Meghmani Organi.,543331,MOL 14 | Munjal Auto Inds,520059,MUNJALAU 15 | Nuvoco Vistas,543334,NUVOCO 16 | PNB Gilts,532366,PNBGILTS 17 | Precision Camshf,539636,PRECAM 18 | Prince Pipes,542907,PRINCEPIPE 19 | Reliance Infra.,500390,RELINFRA 20 | RSWM Ltd,500350,RSWM 21 | S Chand & Compan,540497,SCHAND 22 | Saurashtra Cem.,502175,SAURASHCEM 23 | Snowman Logistic,538635,SNOWMAN 24 | T N Newsprint,531426,TNPL 25 | Texmaco Infrast.,505400,TEXINFRA 26 | Utkarsh Small F.,543942,UTKARSHBNK 27 | Visaka Industrie,509055,VISAKAIND 28 | -------------------------------------------------------------------------------- /py/ai/nse_announcements/requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.6.0 2 | anyio==4.2.0 3 | arxiv==2.1.0 4 | autogenstudio==0.0.25a0 5 | boto3==1.34.145 6 | botocore==1.34.145 7 | certifi==2023.11.17 8 | charset-normalizer==3.3.2 9 | click==8.1.7 10 | diskcache==5.6.3 11 | distro==1.9.0 12 | exceptiongroup==1.2.0 13 | fastapi==0.109.0 14 | feedparser==6.0.10 15 | FLAML==2.1.1 16 | h11==0.14.0 17 | httpcore==1.0.2 18 | httpx==0.26.0 19 | idna==3.6 20 | jiter==0.5.0 21 | jmespath==1.0.1 22 | numpy==1.26.3 23 | openai==1.42.0 24 | packaging==24.1 25 | pandas==2.2.2 26 | pillow==10.4.0 27 | plotly==5.22.0 28 | pyautogen==0.2.6 29 | pydantic==2.5.3 30 | pydantic_core==2.14.6 31 | PyMuPDF==1.24.9 32 | PyMuPDFb==1.24.9 33 | python-dateutil==2.9.0.post0 34 | python-dotenv==1.0.0 35 | pytz==2024.1 36 | regex==2023.12.25 37 | requests==2.31.0 38 | s3transfer==0.10.2 39 | sgmllib3k==1.0.0 40 | six==1.16.0 41 | sniffio==1.3.0 42 | starlette==0.35.1 43 | tenacity==8.5.0 44 | termcolor==2.4.0 45 | tiktoken==0.5.2 46 | tqdm==4.66.1 47 | typer==0.9.0 48 | typing_extensions==4.12.2 49 | tzdata==2024.1 50 | urllib3==2.1.0 51 | uvicorn==0.25.0 52 | -------------------------------------------------------------------------------- /py/ai/turnaround/my_tools/web_fetcher.py: -------------------------------------------------------------------------------- 1 | from smolagents import tool 2 | import os 3 | from openai import OpenAI 4 | import json 5 | 6 | model="gpt-4.1-mini" 7 | client = OpenAI() 8 | 9 | @tool 10 | def search_web(query: str) -> str: 11 | """ 12 | This tool searches the web for the given query and returns the results. 13 | It is useful for gathering information from the web to assist in decision-making or analysis. 14 | Args: 15 | query (str): The search query to use. Be as specific as possible to get relevant results. 16 | Returns: 17 | str: The search results or an error message if the search fails. It is json formatted string. 18 | """ 19 | # check if the file exists on the filesystem 20 | if not query: 21 | return "No file path provided." 22 | 23 | response = client.responses.create( 24 | model=model, # or another supported model 25 | input=query, 26 | tools=[ 27 | { 28 | "type": "web_search" 29 | } 30 | ] 31 | ) 32 | return json.dumps(response.output, default=lambda o: o.__dict__, indent=2) -------------------------------------------------------------------------------- /py/ai/turnaround/my_tools/markdown_report.py: -------------------------------------------------------------------------------- 1 | from smolagents import tool 2 | import datetime 3 | 4 | instructions = """You are simple file writer tool that dumps the input text into a file.""" 5 | 6 | @tool 7 | def save_report(md_report: str, business_name: str) -> None: 8 | """ 9 | This tool saves a markdown formatted report to a file. 10 | Args: 11 | md_report (str): The markdown report content to save. 12 | business_name (str): The name of the business for which the report is generated. 13 | Returns: 14 | None: The function does not return anything, but saves the report to a file. 15 | """ 16 | # check if the file exists on the filesystem 17 | if not md_report: 18 | return "No file path provided." 19 | 20 | output_file = f"output/{business_name}" + datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + "_report.md" 21 | 22 | # Save the output to a file 23 | try: 24 | with open(output_file, "w", encoding="utf-8") as file: 25 | file.write(md_report) 26 | except Exception as e: 27 | return f"An error occurred while saving the report: {str(e)}" -------------------------------------------------------------------------------- /py/ai/turnaround/my_tools/fs_reader.py: -------------------------------------------------------------------------------- 1 | from smolagents import tool 2 | import os 3 | from huggingface_hub import list_models 4 | 5 | @tool 6 | def fs_reader(task: str) -> str: 7 | """ 8 | This tool reads a file from the filesystem and returns its content. 9 | This can read on plain text files, markdown files, source code files, etc. 10 | It is useful for reading files that are part of the project or for reading 11 | files that are provided as input to the agent. 12 | Args: 13 | task (str): The path to the file to read. 14 | Returns: 15 | str: The content of the file or an error message if the file cannot be read. 16 | """ 17 | # check if the file exists on the filesystem 18 | if not task: 19 | return "No file path provided." 20 | 21 | # Expand user path (handle ~ symbol) 22 | expanded_path = os.path.expanduser(task) 23 | 24 | try: 25 | with open(expanded_path, "r", encoding="utf-8") as file: 26 | content = file.read() 27 | return content 28 | except FileNotFoundError: 29 | return f"File not found: {expanded_path} (original path: {task})" 30 | except Exception as e: 31 | return f"An error occurred while reading the file: {str(e)}" -------------------------------------------------------------------------------- /py/eodhd/price_data/RELIANCE_W.csv: -------------------------------------------------------------------------------- 1 | Date,Open,High,Low,Close,Volume,Adj Close 2 | 1994-11-03,375.0,400.0,375.0,396.0,42650,396.0 3 | 1994-11-07,396.0,399.0,369.5,372.75,297500,372.75 4 | 1994-11-14,373.0,390.0,356.75,387.5,259000,387.5 5 | 1994-11-21,385.0,387.0,367.0,379.5,174750,379.5 6 | 1994-11-28,377.5,387.0,372.5,374.25,158850,374.25 7 | 1994-12-05,373.5,382.5,358.5,363.0,223050,363.0 8 | 1994-12-12,355.0,355.5,329.0,344.0,367400,344.0 9 | 1994-12-19,345.0,345.5,337.0,344.75,137600,344.75 10 | 1994-12-26,339.0,344.75,336.35,341.2,70050,341.2 11 | 1995-01-02,341.0,343.0,315.0,316.75,235400,316.75 12 | 1995-01-09,319.25,319.25,277.5,286.3,1330100,286.3 13 | 1995-01-16,288.0,294.0,271.0,272.5,724000,272.5 14 | 1995-01-23,274.5,274.5,235.15,256.2,1151100,256.2 15 | 1995-01-30,250.0,282.55,248.0,279.05,1962650,279.05 16 | 1995-02-06,280.0,281.0,262.0,269.85,1348800,269.85 17 | 1995-02-13,270.0,274.7,250.0,255.45,1114200,255.45 18 | 1995-02-20,255.0,282.0,237.25,266.7,1911100,266.7 19 | 1995-02-28,263.5,291.5,258.0,285.5,2423250,285.5 20 | 1995-03-06,286.0,297.0,271.0,286.45,2130300,286.45 21 | 1995-03-13,282.0,290.0,250.0,266.95,4470750,266.95 22 | 1995-03-20,255.0,267.25,250.0,259.75,2033150,259.75 23 | 1995-03-27,264.75,274.45,258.0,265.45,1794400,265.45 24 | 1995-04-03,267.0,288.05,262.0,282.75,1939300,282.75 25 | 1995-04-10,282.0,282.25,275.0,280.7,841750,280.7 26 | 1995-04-17,284.0,286.0,274.25,279.55,1512550,279.55 27 | 1995-04-24,280.0,280.2,246.5,249.9,2109650,249.9 28 | -------------------------------------------------------------------------------- /py/eodhd/pricereader.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | base_path = 'price_data' 3 | 4 | def get_price_data(stockname, period): 5 | """ 6 | Fetches stock price data from CSV files for the given stock name and period. 7 | Sets the 'Date' column as a DatetimeIndex. 8 | 9 | :param stockname: Name of the stock (str) 10 | :param period: List of periods for which to fetch data ['d', 'w', 'm'] 11 | :return: Dictionary of DataFrames with keys as the period 12 | """ 13 | 14 | df = pd.DataFrame() 15 | 16 | # Mapping of period to file suffix 17 | period_suffix = {'d': '_D.csv', 'w': '_W.csv', 'm': '_M.csv'} 18 | 19 | 20 | # Construct file path based on stock name and period 21 | file_path = f"{base_path}/{stockname}{period_suffix[period]}" 22 | try: 23 | # Read the data from the file and set the 'Date' column as the index 24 | df = pd.read_csv(file_path, parse_dates=['Date']) 25 | df.set_index('Date', inplace=True) 26 | except FileNotFoundError: 27 | print(f"No data available for {stockname} for period: {period}") 28 | 29 | return df 30 | 31 | ''' 32 | This requires to pass df, after selection of the timeframe 33 | ''' 34 | def get_price_daterange(df, start_date, end_date): 35 | # Ensure the dates are in the correct format 36 | start_date = pd.to_datetime(start_date) 37 | end_date = pd.to_datetime(end_date) 38 | 39 | # Filter the dataframe 40 | filtered_df = df[(df.index >= start_date) & (df.index <= end_date)] 41 | 42 | return filtered_df 43 | -------------------------------------------------------------------------------- /py/ai/newsarranger/requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.7.0 2 | anyio==4.8.0 3 | beautifulsoup4==4.12.3 4 | Brotli==1.1.0 5 | cachetools==5.5.0 6 | certifi==2024.12.14 7 | cffi==1.17.1 8 | charset-normalizer==3.4.1 9 | cobble==0.1.4 10 | cryptography==44.0.0 11 | cssselect2==0.7.0 12 | defusedxml==0.7.1 13 | distro==1.9.0 14 | docopt==0.6.2 15 | et_xmlfile==2.0.0 16 | fonttools==4.55.3 17 | google-auth==2.37.0 18 | google-genai==0.4.0 19 | h11==0.14.0 20 | httpcore==1.0.7 21 | httpx==0.28.1 22 | idna==3.10 23 | jiter==0.8.2 24 | lxml==5.3.0 25 | mammoth==1.9.0 26 | markdown2==2.5.2 27 | Markdown2PDF==0.1.4 28 | markdownify==0.14.1 29 | -e git+https://github.com/microsoft/markitdown.git@f58a864951da6c720d3e10987371133c67db296a#egg=markitdown 30 | md2pdf==1.0.1 31 | numpy==2.2.1 32 | olefile==0.47 33 | openai==1.59.6 34 | openpyxl==3.1.5 35 | pandas==2.2.3 36 | pathvalidate==3.2.3 37 | pdfkit==1.0.0 38 | pdfminer.six==20240706 39 | pillow==11.1.0 40 | puremagic==1.28 41 | pyasn1==0.6.1 42 | pyasn1_modules==0.4.1 43 | pycparser==2.22 44 | pydantic==2.10.5 45 | pydantic_core==2.27.2 46 | pydub==0.25.1 47 | pydyf==0.11.0 48 | pyphen==0.17.0 49 | python-dateutil==2.9.0.post0 50 | python-dotenv==1.0.1 51 | python-pptx==1.0.2 52 | pytz==2024.2 53 | requests==2.32.3 54 | rsa==4.9 55 | setuptools==75.1.0 56 | six==1.17.0 57 | sniffio==1.3.1 58 | soupsieve==2.6 59 | SpeechRecognition==3.13.0 60 | tinycss2==1.4.0 61 | tinyhtml5==2.0.0 62 | tqdm==4.67.1 63 | typing_extensions==4.12.2 64 | tzdata==2024.2 65 | urllib3==2.3.0 66 | weasyprint==63.1 67 | webencodings==0.5.1 68 | websockets==14.1 69 | wheel==0.44.0 70 | xlrd==2.0.1 71 | XlsxWriter==3.2.0 72 | youtube-transcript-api==0.6.3 73 | zopfli==0.2.3.post1 74 | -------------------------------------------------------------------------------- /py/ai/market_analyzer/stock_chat.py: -------------------------------------------------------------------------------- 1 | from analysis_utils import initialize_client, show_parts, log_message, log_message_r, start_log_file, end_log_file 2 | from datetime import datetime 3 | import os 4 | 5 | chat_output_folder = "output" 6 | my_model = 'gemini-2.0-flash' 7 | client = initialize_client('GOOGLE_API_KEY') 8 | 9 | def main(): 10 | search_tool = {'google_search': {}} 11 | stock_chat = client.chats.create(model=my_model, config={'tools': [search_tool]}) 12 | 13 | while True: 14 | stock = input('Enter stock or company to chat on (or type bye to leave): ') 15 | if stock == 'bye': 16 | break 17 | 18 | timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') 19 | log_file = f"{chat_output_folder}/{stock}_chat_log_{timestamp}.html" 20 | start_log_file(log_file) 21 | log_message(log_file, f"User selected stock/company: {stock}", "info") 22 | 23 | date_now = datetime.now().strftime('%Y-%m-%d') 24 | stock_prompt_prefix = f'Date today is: {date_now}. Answer following in context of the company/stock_code {stock}, listed in India.\n' 25 | 26 | while True: 27 | input_txt = input('Ask >> : ') 28 | if input_txt == 'exit': 29 | break 30 | log_message(log_file, f"User input: {input_txt}", "user") 31 | print("-" * 80) 32 | response = stock_chat.send_message(f"{stock_prompt_prefix}{input_txt}") 33 | show_parts(response) 34 | log_message_r(log_file, response, "model") 35 | print(f'Working on: {stock}\n Type exit to work on new stock/company.') 36 | 37 | end_log_file(log_file) 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /py/beta/chatgpt/model.py: -------------------------------------------------------------------------------- 1 | import openai 2 | import os 3 | from dotenv import load_dotenv, find_dotenv 4 | 5 | model_name = 'gpt-4' #gpt-3.5-turbo 6 | 7 | def get_completion(prompt, model=model_name): 8 | messages = [{"role": "user", "content": prompt}] 9 | response = openai.ChatCompletion.create( 10 | model=model, 11 | messages=messages, 12 | temperature=0, # this is the degree of randomness of the model's output 13 | ) 14 | return response.choices[0].message["content"] 15 | 16 | def get_completion_large(messages, 17 | model=model_name, 18 | temperature=0, 19 | max_tokens=1000): 20 | continuation_token = None 21 | 22 | while True: 23 | response = openai.Completion.create( 24 | model=model, 25 | messages=messages, 26 | temperature=temperature, 27 | max_tokens=max_tokens, 28 | continuation_token=continuation_token 29 | ) 30 | 31 | chunk = response.choices[0].message['content'] 32 | messages.append({'role': 'system', 'content': chunk}) 33 | 34 | continuation_token = response['choices'][0]['finish_reason'] 35 | 36 | if continuation_token == 'stop': 37 | break 38 | 39 | return response.choices[0].message["content"] 40 | 41 | 42 | def get_completion_from_messages(messages, 43 | model=model_name, 44 | temperature=0, 45 | max_tokens=500): 46 | response = openai.ChatCompletion.create( 47 | model=model, 48 | messages=messages, 49 | temperature=temperature, 50 | max_tokens=max_tokens, 51 | ) 52 | return response.choices[0].message["content"] 53 | 54 | def set_api(): 55 | _ = load_dotenv(find_dotenv()) # read local .env file 56 | openai.api_key = os.environ['OPENAI_API_KEY'] 57 | -------------------------------------------------------------------------------- /py/eodhd/ath_scan.py: -------------------------------------------------------------------------------- 1 | import pricereader as pr 2 | import pandas as pd 3 | import time 4 | 5 | # Read the list of stocks from the CSV file 6 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"]) 7 | 8 | # Set the bar time frame 9 | data_interval = 'm' 10 | 11 | # Initialize a list to store the results 12 | results = [] 13 | 14 | # Iterate through the list of stocks 15 | for stock in stocks["Ticker"]: 16 | try: 17 | # Get the stock data 18 | data = pr.get_price_data(stock, data_interval) 19 | # Drop those with NaN 20 | data = data.dropna() 21 | # Drop last row, if 2nd last is already of the month 22 | if data.index[-1].month == data.index[-2].month: 23 | # Replace the values in the second-to-last row with the values in the last row 24 | data.loc[data.index[-2]] = data.loc[data.index[-1]] 25 | # Delete the last row 26 | data = data.drop(data.index[-1]) 27 | 28 | # print(data) 29 | # data = data.iloc[:-1 , :] // If previous month ATH stocks are desired 30 | 31 | # Initialize the ATH to the first close price and the ATH date to the first date 32 | ath = data.at[data.index[0], 'High'] 33 | ath_date = data.index[0] 34 | 35 | data_iter = data.iloc[:-1] 36 | 37 | # Loop through each row of the dataframe 38 | for index, row in data_iter.iterrows(): 39 | # Update the ATH and ATH date if the current close price is higher 40 | if row['High'] > ath: 41 | ath = row['High'] 42 | ath_date = index 43 | 44 | # print(stock + " green line: " + str(green_line) + " green line date: " + str(green_line_date)) 45 | last_close = data.at[data.index[-1], 'Close'] 46 | 47 | if last_close > ath: 48 | # print(stock +" close: " + str(last_close) + " ath: " + str(ath) + " ath date: " + str(ath_date)) 49 | results.append(stock) 50 | 51 | except Exception as e: 52 | print("Error for ticker: " + stock) 53 | print(e) 54 | 55 | # Print the results 56 | print(results) 57 | print("Done") -------------------------------------------------------------------------------- /py/yf/daily_rs_55_bo.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Detect breakout of CRS from 55 day average 3 | Daily timeframe 4 | ''' 5 | 6 | import yfinance as yf 7 | import pandas as pd 8 | 9 | # Set the bar time frame 10 | data_interval = '1d' 11 | 12 | # Set the time frame to max 13 | time_frame = '1y' 14 | 15 | # Set CRS average length 16 | average_length = 55 17 | 18 | # Specify the benchmark symbol 19 | benchmark = "^NSEI" 20 | 21 | # Read the list of stocks from the CSV file 22 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"]) 23 | 24 | def main(): 25 | print('Started') 26 | 27 | # Use yfinance to retrieve the benchmark data 28 | benchmark_ticker = yf.Ticker(benchmark) 29 | benchmark_data = benchmark_ticker.history(period=time_frame,interval=data_interval,auto_adjust=False) 30 | benchmark_data = benchmark_data.dropna() 31 | 32 | # Iterate through the list of stocks 33 | for stock in stocks["Ticker"]: 34 | try: 35 | ticker = yf.Ticker(stock+".NS") 36 | stock_history = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False) 37 | stock_history = stock_history.dropna() 38 | 39 | # Create a new column in the stock dataframe for relative strength 40 | rs_column = 'Relative_Strength' 41 | stock_history[rs_column] = stock_history['Close'] / benchmark_data['Close'] 42 | 43 | # Calculate the average_length-day moving average of the 'Relative_Strength' column 44 | crs_average_column = f'{average_length}_RS_MA' 45 | stock_history[crs_average_column] = stock_history[rs_column].rolling(window=average_length).mean() 46 | 47 | # Check if there is a cross over of crs 48 | isCrossOver = stock_history.iloc[-2][rs_column] <= stock_history.iloc[-2][crs_average_column] and \ 49 | stock_history.iloc[-1][rs_column] > stock_history.iloc[-1][crs_average_column] 50 | if (isCrossOver): 51 | print(stock) 52 | 53 | except Exception as e: 54 | print(f"Error: {stock} ==> {e}") 55 | 56 | if __name__ == "__main__": 57 | main() 58 | -------------------------------------------------------------------------------- /py/beta/concall_transcript_summarize.py: -------------------------------------------------------------------------------- 1 | import os 2 | from PyPDF2 import PdfReader 3 | from transformers import PegasusTokenizer, PegasusForConditionalGeneration 4 | 5 | #path of the folder where your pdfs are located 6 | folder_path = "concallpdfs" 7 | 8 | # Max token size 9 | max_seq_length = 512 10 | 11 | # Max token for pegasus financial summarization 12 | max_length_pegasus_fin_summ = 32 13 | 14 | # Pick model 15 | # model_name = "google/pegasus-xsum" -- used for testing 16 | model_name = "human-centered-summarization/financial-summarization-pegasus" 17 | 18 | # Load pretrained tokenizer 19 | pegasus_tokenizer = PegasusTokenizer.from_pretrained(model_name) 20 | 21 | # Make model from pre-trained model 22 | model = PegasusForConditionalGeneration.from_pretrained(model_name) 23 | 24 | for filename in os.listdir(folder_path): 25 | if filename.endswith(".pdf"): 26 | pdf_path = os.path.join(folder_path, filename) 27 | with open(pdf_path, "rb") as file: 28 | print(f'Summarizing {filename}') 29 | reader = PdfReader(file) 30 | page_summaries = [] 31 | count = 0 32 | for page in reader.pages: # summarize page by page 33 | page_text = page.extract_text() 34 | # Generate input tokens 35 | input_ids = pegasus_tokenizer(page_text, max_length=max_seq_length, truncation=True, return_tensors="pt").input_ids 36 | # Generate Summary 37 | summary_ids = model.generate(input_ids, max_length=max_length_pegasus_fin_summ, num_beams=5, early_stopping=True) 38 | tgt_texts = pegasus_tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) 39 | page_summaries.append(tgt_texts[0]) 40 | count = count + 1 41 | # print(f'{count} page(s) done') 42 | # Merge all page summaries 43 | merged_summary = "\n".join(page_summaries) 44 | # Write the merged summary to a file 45 | with open(f'{folder_path}/{filename}_summary.txt', 'w') as f: 46 | f.write(merged_summary) 47 | print(f'{filename} done') 48 | -------------------------------------------------------------------------------- /py/ai/market_analyzer/requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==23.2.1 2 | annotated-types==0.6.0 3 | anthropic==0.34.2 4 | anyio==4.3.0 5 | attrs==24.2.0 6 | black==24.10.0 7 | cachetools==5.3.2 8 | cattrs==23.2.3 9 | certifi==2023.11.17 10 | charset-normalizer==3.3.2 11 | click==8.1.7 12 | colorama==0.4.6 13 | dill==0.3.9 14 | distro==1.9.0 15 | dnspython==2.7.0 16 | ell-ai==0.0.14 17 | email_validator==2.2.0 18 | exceptiongroup==1.2.0 19 | fastapi==0.115.6 20 | fastapi-cli==0.0.5 21 | ffmpy==0.5.0 22 | filelock==3.16.1 23 | fsspec==2024.10.0 24 | google-ai-generativelanguage==0.6.10 25 | google-api-core==2.15.0 26 | google-api-python-client==2.125.0 27 | google-auth==2.25.2 28 | google-auth-httplib2==0.2.0 29 | google-genai==0.1.0 30 | google-generativeai==0.8.3 31 | googleapis-common-protos==1.62.0 32 | gradio==5.9.1 33 | gradio_client==1.5.2 34 | groq==0.11.0 35 | grpcio==1.60.0 36 | grpcio-status==1.60.0 37 | h11==0.14.0 38 | httpcore==1.0.5 39 | httplib2==0.22.0 40 | httptools==0.6.4 41 | httpx==0.27.0 42 | huggingface-hub==0.26.2 43 | idna==3.6 44 | Jinja2==3.1.4 45 | jiter==0.7.0 46 | markdown-it-py==3.0.0 47 | markdown2==2.5.2 48 | MarkupSafe==2.1.5 49 | mdurl==0.1.2 50 | mypy-extensions==1.0.0 51 | numpy==2.1.3 52 | openai==1.54.3 53 | orjson==3.10.12 54 | packaging==24.2 55 | pandas==2.2.3 56 | pathspec==0.12.1 57 | pillow==10.4.0 58 | platformdirs==4.3.6 59 | proto-plus==1.23.0 60 | protobuf==4.25.1 61 | psutil==5.9.8 62 | pyasn1==0.5.1 63 | pyasn1-modules==0.3.0 64 | pydantic==2.7.0 65 | pydantic_core==2.18.1 66 | pydub==0.25.1 67 | Pygments==2.18.0 68 | pyparsing==3.1.2 69 | python-dateutil==2.9.0.post0 70 | python-dotenv==1.0.0 71 | python-multipart==0.0.20 72 | pytz==2024.2 73 | PyYAML==6.0.2 74 | requests==2.32.3 75 | rich==13.9.4 76 | rsa==4.9 77 | ruff==0.8.4 78 | safehttpx==0.1.6 79 | semantic-version==2.10.0 80 | shellingham==1.5.4 81 | six==1.17.0 82 | sniffio==1.3.1 83 | SQLAlchemy==2.0.36 84 | sqlmodel==0.0.21 85 | starlette==0.41.3 86 | tokenizers==0.20.3 87 | tomli==2.0.2 88 | tomlkit==0.13.2 89 | tqdm==4.66.1 90 | typer==0.13.0 91 | typing_extensions==4.12.2 92 | tzdata==2024.2 93 | uritemplate==4.1.1 94 | urllib3==2.1.0 95 | uvicorn==0.30.6 96 | uvloop==0.21.0 97 | watchfiles==0.24.0 98 | websockets==14.0 99 | -------------------------------------------------------------------------------- /py/ai/market_analyzer/analysis_utils.py: -------------------------------------------------------------------------------- 1 | # analysis_utils.py 2 | 3 | import os 4 | import json 5 | from datetime import datetime 6 | from rich.console import Console 7 | from rich.markdown import Markdown 8 | import markdown2 9 | from dotenv import load_dotenv, find_dotenv 10 | from google import genai 11 | 12 | console = Console() 13 | 14 | def initialize_client(api_key_env_var): 15 | load_dotenv(find_dotenv()) 16 | api_key = os.getenv(api_key_env_var) 17 | if not api_key: 18 | raise ValueError(f"API key not found in environment variable {api_key_env_var}") 19 | return genai.Client(api_key=api_key) 20 | 21 | def show_json(obj): 22 | print(json.dumps(obj.model_dump(exclude_none=True), indent=2)) 23 | 24 | def show_parts(response): 25 | parts = response.candidates[0].content.parts 26 | if parts is None: 27 | print(f'finish_reason={response.candidates[0].finish_reason}') 28 | return 29 | for part in parts: 30 | if part.text: 31 | console.print(Markdown(part.text, hyperlinks=True)) 32 | grounding_metadata = response.candidates[0].grounding_metadata 33 | if grounding_metadata and grounding_metadata.search_entry_point: 34 | console.print(grounding_metadata.search_entry_point.rendered_content) 35 | 36 | def log_message(log_file, message, message_type="info"): 37 | timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') 38 | log_entry = f"

{timestamp} - {message}

\n" 39 | with open(log_file, 'a', encoding='utf-8') as file: 40 | file.write(log_entry) 41 | 42 | def log_message_r(log_file, response, message_type="model"): 43 | parts = response.candidates[0].content.parts 44 | log_message_content = "Response: " 45 | if parts is None: 46 | log_message_content += f"\n{response.candidates[0].finish_reason}" 47 | else: 48 | log_message_content += "".join(part.text for part in parts if part.text) 49 | log_message_content = markdown2.markdown(log_message_content) 50 | grounding_metadata = response.candidates[0].grounding_metadata 51 | if grounding_metadata and grounding_metadata.search_entry_point: 52 | log_message_content += grounding_metadata.search_entry_point.rendered_content 53 | log_message(log_file, log_message_content, message_type) 54 | 55 | def start_log_file(log_file): 56 | with open(log_file, 'w') as file: 57 | file.write("\n") 58 | 59 | def end_log_file(log_file): 60 | with open(log_file, 'a') as file: 61 | file.write("") 62 | -------------------------------------------------------------------------------- /py/ai/newsarranger/get_news_arrange.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | from markitdown import MarkItDown 4 | from dotenv import load_dotenv, find_dotenv 5 | from google import genai 6 | import datetime 7 | 8 | """ 9 | Retrieve the news content from a location https://example.xxxstockxxxnews.com 10 | Then convert it to markdown format using markitdown library. 11 | Then pass on the markdown content to Google Gemini API to arrange and group the news feed provided based on the order of importance for an investor in the markets. 12 | """ 13 | 14 | news_url = 'https://example.xxxstockxxxnews.com' 15 | 16 | # Initialize the client, using Google Gemini API key 17 | def initialize_client(api_key_env_var): 18 | load_dotenv(find_dotenv()) 19 | api_key = os.getenv(api_key_env_var) 20 | if not api_key: 21 | raise ValueError(f"API key not found in environment variable {api_key_env_var}") 22 | return genai.Client(api_key=api_key) 23 | 24 | my_model = 'gemini-2.0-flash' 25 | client = initialize_client('GOOGLE_API_KEY') 26 | 27 | # Main function 28 | if __name__ == '__main__': 29 | 30 | # Current time is, dd-mm-YYYY HH:MM:SS 31 | timenow = datetime.datetime.now().strftime("%d-%m-%Y %H:%M:%S") 32 | 33 | # get the news file 34 | print(f"Start getting the news file at {timenow}...") 35 | response = requests.get(news_url) 36 | html_content = response.text 37 | # Save it to a file 38 | with open('output.html', 'w') as file: 39 | file.write(html_content) 40 | 41 | print(f"Start converting the news file to markdown format at {timenow}...") 42 | md = MarkItDown() 43 | result = md.convert("output.html") 44 | #print(result.text_content) 45 | # Save the markdown content to a file 46 | with open('output.md', 'w') as file: 47 | file.write(result.text_content) 48 | 49 | print(f"Start arranging the news file at {timenow}...") 50 | analyzer = client.chats.create(model=my_model) 51 | response = analyzer.send_message(f"Arrange and group the news feed provided based on the order of importance for an investor in the markets. Include whatever data related to the news is available in the input, such as short summaries, hyperlinks etc. If available include time of report of the news. The time now is: {timenow}. The input is in markdown. Input: {result.text_content}") 52 | output = "" 53 | parts = response.candidates[0].content.parts 54 | if parts is None: 55 | print(f'finish_reason={response.candidates[0].finish_reason}') 56 | for part in parts: 57 | if part.text: 58 | #print(part.text) 59 | # join the text parts 60 | output += part.text 61 | 62 | # Save the output to a file 63 | with open('output_arranged.md', 'w') as file: 64 | file.write(output) 65 | -------------------------------------------------------------------------------- /py/yf/glb_scan.py: -------------------------------------------------------------------------------- 1 | import yfinance as yf 2 | import pandas as pd 3 | import time 4 | 5 | # Read the list of stocks from the CSV file 6 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"]) 7 | # Exchange, ".BO, .NS" 8 | exchange = ".NS" 9 | 10 | # Set the time frame to max 11 | time_frame = 'max' 12 | 13 | # Set the bar time frame 14 | data_interval = '1mo' 15 | 16 | # Set the green line to the all-time high of the stock 17 | green_line = 0.0 18 | 19 | # Set the minimum number of months since the ath/green line was breached 20 | min_months = 2 21 | 22 | # Initialize a list to store the results 23 | results = [] 24 | 25 | # Iterate through the list of stocks 26 | for stock in stocks["Ticker"]: 27 | try: 28 | # Get the stock data from yfinance, dont adjust OHLC 29 | ticker = yf.Ticker(f'{stock}{exchange}') 30 | data = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False) 31 | # Drop those with NaN 32 | data = data.dropna() 33 | # Drop last row, if 2nd last is already of the month 34 | if data.index[-1].month == data.index[-2].month: 35 | # Replace the values in the second-to-last row with the values in the last row 36 | data.loc[data.index[-2]] = data.loc[data.index[-1]] 37 | # Delete the last row 38 | data = data.drop(data.index[-1]) 39 | 40 | # print(data) 41 | 42 | # Initialize the ATH to the first close price and the ATH date to the first date 43 | ath = data.at[data.index[0], 'High'] 44 | ath_date = data.index[0] 45 | green_line = ath 46 | green_line_date = ath_date 47 | 48 | # Loop through each row of the dataframe 49 | for index, row in data.iterrows(): 50 | # Update the ATH and ATH date if the current close price is higher 51 | if row['High'] > ath: 52 | ath = row['High'] 53 | ath_date = index 54 | # Update Greenline if condition of minimum months is met 55 | if data.index.get_loc(index) - data.index.get_loc(ath_date) >= min_months: 56 | green_line = ath 57 | green_line_date = ath_date 58 | 59 | # print(stock + " green line: " + str(green_line) + " green line date: " + str(green_line_date)) 60 | last_close = data.at[data.index[-1], 'Close'] 61 | second_last_close = data.at[data.index[-2], 'Close'] 62 | if second_last_close < green_line and last_close > green_line: 63 | # print(stock +" close: " + str(last_close) + " second last close: " + str(second_last_close) + " green line: " + str(green_line) + " green line date: " + str(green_line_date)) 64 | results.append(stock) 65 | 66 | except Exception as e: 67 | print("Error for ticker: " + stock) 68 | print(e) 69 | 70 | # Print the results 71 | print(results) 72 | ex = 'NSE' if exchange == '.NS' else 'BSE' 73 | for stk in results: 74 | print(f'{ex}:{stk},') 75 | print("Done") 76 | -------------------------------------------------------------------------------- /py/eodhd/price_data/RELIANCE_D.csv: -------------------------------------------------------------------------------- 1 | Date,Open,High,Low,Close,Volume,Adj Close 2 | 1994-11-03,375.0,400.0,375.0,396.0,42650,396.0 3 | 1994-11-07,396.0,398.25,393.0,395.5,58700,395.5 4 | 1994-11-08,398.0,398.75,393.0,396.0,49050,396.0 5 | 1994-11-09,399.0,399.0,385.75,387.0,57500,387.0 6 | 1994-11-10,387.0,390.0,380.0,380.5,67250,380.5 7 | 1994-11-11,371.0,378.5,369.5,372.75,65000,372.75 8 | 1994-11-14,373.0,373.0,358.0,361.5,30450,361.5 9 | 1994-11-15,360.0,364.0,356.75,361.25,66900,361.25 10 | 1994-11-16,364.25,377.0,363.0,373.75,72800,373.75 11 | 1994-11-17,377.0,390.0,373.75,387.5,88850,387.5 12 | 1994-11-21,385.0,387.0,377.5,378.25,30350,378.25 13 | 1994-11-22,381.0,381.25,370.0,371.0,27700,371.0 14 | 1994-11-23,375.0,375.0,367.0,367.25,25300,367.25 15 | 1994-11-24,370.0,379.0,369.0,378.0,54500,378.0 16 | 1994-11-25,381.0,384.0,378.0,379.5,36900,379.5 17 | 1994-11-28,377.5,385.0,374.0,383.0,6000,383.0 18 | 1994-11-29,385.0,387.0,380.0,380.5,77400,380.5 19 | 1994-11-30,382.0,382.0,379.0,380.25,21950,380.25 20 | 1994-12-01,378.5,381.75,378.0,379.0,42800,379.0 21 | 1994-12-02,376.75,377.0,372.5,374.25,10700,374.25 22 | 1994-12-05,373.5,382.5,369.5,380.25,55800,380.25 23 | 1994-12-06,378.0,380.0,371.0,372.5,47800,372.5 24 | 1994-12-07,373.0,373.0,364.0,366.25,40950,366.25 25 | 1994-12-08,363.5,363.5,359.0,361.0,29900,361.0 26 | 1994-12-09,360.0,365.0,358.5,363.0,48600,363.0 27 | 1994-12-12,355.0,355.5,344.0,347.0,54750,347.0 28 | 1994-12-13,345.0,346.0,329.0,332.75,55800,332.75 29 | 1994-12-14,333.0,342.5,332.25,336.5,107100,336.5 30 | 1994-12-15,336.0,347.0,336.0,344.75,84000,344.75 31 | 1994-12-16,345.0,350.0,342.75,344.0,65750,344.0 32 | 1994-12-19,345.0,345.0,338.0,338.5,26200,338.5 33 | 1994-12-20,342.0,342.0,337.5,339.5,26450,339.5 34 | 1994-12-21,337.0,344.0,337.0,343.0,34100,343.0 35 | 1994-12-22,342.0,342.0,339.0,340.0,15700,340.0 36 | 1994-12-23,339.0,345.5,338.5,344.75,35150,344.75 37 | 1994-12-26,339.0,344.75,338.55,339.0,37050,339.0 38 | 1994-12-27,340.0,340.0,336.35,337.75,7450,337.75 39 | 1994-12-28,340.5,340.5,339.25,339.25,7650,339.25 40 | 1994-12-29,339.5,339.75,338.75,339.2,3150,339.2 41 | 1994-12-30,339.0,341.5,339.0,341.2,14750,341.2 42 | 1995-01-02,341.0,343.0,340.5,341.2,13600,341.2 43 | 1995-01-03,342.5,342.5,336.1,336.2,11450,336.2 44 | 1995-01-04,339.95,340.5,330.3,332.0,62600,332.0 45 | 1995-01-05,332.0,333.0,320.25,321.3,59200,321.3 46 | 1995-01-06,323.0,323.5,315.0,316.75,88550,316.75 47 | 1995-01-09,319.25,319.25,299.0,302.35,264800,302.35 48 | 1995-01-10,303.0,303.0,283.0,287.35,351750,287.35 49 | 1995-01-11,290.0,300.0,280.0,295.25,168000,295.25 50 | 1995-01-12,294.0,297.5,277.5,278.25,375850,278.25 51 | 1995-01-13,280.0,288.5,279.0,286.3,169700,286.3 52 | 1995-01-16,288.0,294.0,282.0,284.1,149800,284.1 53 | 1995-01-17,285.0,286.0,275.05,277.3,255950,277.3 54 | 1995-01-18,279.25,284.7,278.0,283.35,88500,283.35 55 | 1995-01-19,282.0,287.5,282.0,283.95,64350,283.95 56 | 1995-01-20,284.0,285.45,271.0,272.5,165400,272.5 57 | 1995-01-23,274.5,274.5,250.3,251.7,175050,251.7 58 | 1995-01-24,250.0,251.75,235.15,246.4,287400,246.4 59 | 1995-01-25,245.0,250.0,238.0,242.5,372400,242.5 60 | 1995-01-27,245.0,261.0,242.0,256.2,316250,256.2 61 | 1995-01-30,250.0,269.0,248.0,267.95,425600,267.95 62 | 1995-01-31,265.0,276.65,264.0,272.45,301000,272.45 63 | -------------------------------------------------------------------------------- /py/yf/ars_srs_scan.py: -------------------------------------------------------------------------------- 1 | import yfinance as yf 2 | import pandas as pd 3 | import time 4 | import datetime 5 | 6 | def cleanUp_data(data): 7 | # Drop those with NaN 8 | data = data.dropna() 9 | return data 10 | 11 | # set the file name of stocks 12 | stock_filename = "stocks.csv" 13 | 14 | # Set the time frame to max 15 | time_frame = '2y' 16 | 17 | # Set the bar time frame 18 | data_interval = '1d' 19 | 20 | # Specify the benchmark symbol 21 | benchmark = "^NSEI" 22 | 23 | 24 | # Specify the reference date 25 | reference_date = "2022-06-03" 26 | 27 | # Specify the number of rows to look back for the Static RS calculation 28 | srs_length = 123 29 | 30 | # Read the list of stocks from the CSV file 31 | stocks = pd.read_csv(stock_filename, header=0, usecols=["Ticker"]) 32 | 33 | # Use yfinance to retrieve the benchmark data 34 | benchmark_ticker = yf.Ticker(benchmark) 35 | benchmark_data = benchmark_ticker.history(period=time_frame,interval=data_interval,auto_adjust=False) 36 | benchmark_data = cleanUp_data(benchmark_data) 37 | 38 | # Create an empty list to store the stock data 39 | stock_data_list = [] 40 | 41 | # Iterate through the list of stocks 42 | for stock in stocks["Ticker"]: 43 | try: 44 | ticker = yf.Ticker(stock+".NS") 45 | 46 | # Use yfinance to retrieve the stock data 47 | stock_data = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False) 48 | stock_data = cleanUp_data(stock_data) 49 | 50 | # Calculate the Adaptive relative strength (ARS) using the formula you provided 51 | stock_data["Adaptive RS"] = (stock_data["Close"] / stock_data.loc[stock_data.index == reference_date, "Close"].values[0]) / (benchmark_data["Close"] / benchmark_data.loc[benchmark_data.index == reference_date, "Close"].values[0]) - 1 52 | 53 | # Calculate the Static relative strength (SRS) using the formula you provided and the specified number of rows to look back 54 | stock_close_123 = stock_data.at[stock_data.index[-123], 'Close'] 55 | benchmark_close_123 = benchmark_data.at[benchmark_data.index[-123], 'Close'] 56 | stock_data["Static RS"] = (stock_data["Close"] /stock_close_123) / (benchmark_data["Close"] / benchmark_close_123) - 1 57 | 58 | # Get the last row of the stock data 59 | last_row = stock_data.tail(1) 60 | 61 | # Extract the ARS and SRS values from the last row 62 | ars = round(last_row["Adaptive RS"].values[0], 2) 63 | srs = round(last_row["Static RS"].values[0], 2) 64 | 65 | # Create a dictionary with the stock name, ARS, and SRS values 66 | stock_data_dict = {"Stock": stock, "Adaptive RS": ars, "Static RS": srs} 67 | 68 | # Add the dictionary to the list 69 | stock_data_list.append(stock_data_dict) 70 | except Exception as e: 71 | print("Error " + stock) 72 | print(e) 73 | 74 | # print(stock_data_list) 75 | 76 | # Get the current timestamp 77 | timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") 78 | 79 | # Construct the file name using the timestamp 80 | filename = "rs_stock_data_" + timestamp + ".csv" 81 | 82 | # Convert the list of dictionaries to a dataframe 83 | stock_data_df = pd.DataFrame(stock_data_list) 84 | 85 | # Write the dataframe to the CSV file 86 | stock_data_df.to_csv(filename, index=False) 87 | -------------------------------------------------------------------------------- /py/eodhd/my_rsi.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script calculates the Combined Relative Strength Index (RSI) for a list of stocks. 3 | The Combined RSI is a technical indicator used in the analysis of financial markets. 4 | It is intended to chart the current and historical strength or weakness of a stock or market based on the closing 5 | prices of a recent trading period. The Combined RSI is calculated by combining the traditional RSI with the volume. 6 | """ 7 | 8 | import pricereader as pr 9 | import pandas as pd 10 | import numpy as np 11 | import datetime 12 | 13 | # Set output folder path 14 | output_path = "output" 15 | 16 | # Read the list of stocks from the CSV file 17 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"]) 18 | 19 | def calculate_combined_rsi(df, period=14): 20 | """ 21 | Calculate the Combined Relative Strength Index (RSI) for a given DataFrame. 22 | 23 | Parameters: 24 | - df (pandas.DataFrame): DataFrame containing the stock data. 25 | - period (int): Number of periods to consider for calculating the RSI. Default is 14. 26 | 27 | Returns: 28 | - combined_rsi (pandas.Series): Series containing the Combined RSI values. 29 | """ 30 | # Calculate daily price change 31 | df['Price Change'] = df['Close'].diff() 32 | 33 | # Calculate volume ratio and volatility 34 | avg_volume = df['Volume'].rolling(window=period).mean() 35 | df['Volume Ratio'] = df['Volume'] / avg_volume 36 | volatility = df['Price Change'].rolling(window=period).std() 37 | 38 | # Combine volume and volatility adjustments 39 | df['Combined Gain'] = np.where(df['Price Change'] > 0, (df['Price Change'] * df['Volume Ratio']) / volatility, 0) 40 | df['Combined Loss'] = np.where(df['Price Change'] < 0, -(df['Price Change'] * df['Volume Ratio']) / volatility, 0) 41 | 42 | # Compute average combined gain and loss 43 | avg_combined_gain = df['Combined Gain'].rolling(window=period).mean() 44 | avg_combined_loss = df['Combined Loss'].rolling(window=period).mean() 45 | 46 | # Calculate Combined RS and RSI 47 | combined_rs = avg_combined_gain / avg_combined_loss 48 | combined_rsi = 100 - (100 / (1 + combined_rs)) 49 | 50 | return combined_rsi 51 | 52 | 53 | def main(): 54 | """ 55 | Main function that calculates the Combined RSI for a list of stocks and saves the results to a CSV file. 56 | """ 57 | print("Started...") 58 | # Create the DataFrame 59 | result_df = pd.DataFrame(columns=['stock', 'my_rsi']) 60 | # Iterate through the list of stocks 61 | for stock in stocks["Ticker"]: 62 | try: 63 | # Get the daily stock data 64 | stock_data = pr.get_price_data(stock, 'd') 65 | # Drop those with NaN 66 | stock_data = stock_data.dropna() 67 | 68 | # Calculate combined RSI 69 | stock_data['Combined_RSI'] = calculate_combined_rsi(stock_data) 70 | # print(stock_data.tail()) 71 | last_row_idx = stock_data.index[-1] 72 | row = {'stock': stock, 'my_rsi': str(round(stock_data.loc[last_row_idx, 'Combined_RSI'], 2))} 73 | # Append the new row to the DataFrame 74 | result_df.loc[len(result_df)] = row 75 | 76 | except Exception as e: 77 | print("Error: " + stock) 78 | print(e) 79 | 80 | # Append current timestamp to the file name 81 | now = datetime.datetime.now() 82 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S") 83 | file_name = 'my_rsi_' + timestamp + '.csv' 84 | # Export the DataFrame to CSV 85 | result_df.to_csv(output_path + "/" + file_name, index=False) 86 | print(f'Saved file {file_name}') 87 | 88 | 89 | if __name__ == "__main__": 90 | main() 91 | -------------------------------------------------------------------------------- /py/ai/turnaround/my_tools/cmd_executor.py: -------------------------------------------------------------------------------- 1 | from smolagents import tool 2 | import subprocess 3 | import shlex 4 | 5 | @tool 6 | def cmd_executor(command: str, confirmed: bool = False) -> str: 7 | """ 8 | This tool executes readonly shell commands in a Linux/macOS environment. 9 | It is restricted to safe, readonly commands that do not modify the filesystem 10 | or system state. Useful for exploring directory structures, searching files, 11 | and gathering information about the system. 12 | 13 | LIMITATION: Only readonly commands are allowed for security. Commands that 14 | modify files, install software, or change system state are blocked. 15 | 16 | Allowed commands include: 17 | - ls, find, locate, which, whereis 18 | - grep, egrep, fgrep, zgrep 19 | - cat, head, tail, less, more 20 | - wc, sort, uniq, cut, awk, sed (readonly operations) 21 | - ps, top, htop, df, du, free 22 | - pwd, whoami, id, uname, date 23 | - file, stat, lsof 24 | 25 | Args: 26 | command (str): The shell command to execute (must be readonly). 27 | confirmed (bool): Must be set to True to confirm command execution. 28 | Defaults to False for safety. 29 | Returns: 30 | str: The output of the command or an error message. 31 | """ 32 | # check if operation is confirmed 33 | if not confirmed: 34 | return "Error: Command execution not confirmed. Set confirmed=True to proceed with running the command." 35 | 36 | if not command.strip(): 37 | return "No command provided." 38 | 39 | # List of allowed readonly commands 40 | allowed_commands = { 41 | 'ls', 'find', 'locate', 'which', 'whereis', 42 | 'grep', 'egrep', 'fgrep', 'zgrep', 'rg', 'ag', 43 | 'cat', 'head', 'tail', 'less', 'more', 44 | 'wc', 'sort', 'uniq', 'cut', 'awk', 'sed', 45 | 'ps', 'top', 'htop', 'df', 'du', 'free', 46 | 'pwd', 'whoami', 'id', 'uname', 'date', 47 | 'file', 'stat', 'lsof', 'tree' 48 | } 49 | 50 | # Parse the command to get the base command 51 | try: 52 | parsed_command = shlex.split(command) 53 | base_command = parsed_command[0] if parsed_command else "" 54 | except ValueError: 55 | return "Error: Invalid command syntax." 56 | 57 | # Check if the base command is allowed 58 | if base_command not in allowed_commands: 59 | return f"Error: Command '{base_command}' is not allowed. Only readonly commands are permitted." 60 | 61 | # Additional safety checks for potentially dangerous flags 62 | dangerous_patterns = ['rm', 'mv', 'cp', 'chmod', 'chown', 'sudo', '>', '>>', '|', '&&', '||', ';'] 63 | for pattern in dangerous_patterns: 64 | if pattern in command: 65 | return f"Error: Command contains potentially dangerous pattern '{pattern}'. Only readonly operations are allowed." 66 | 67 | try: 68 | # Execute the command with timeout for safety 69 | result = subprocess.run( 70 | command, 71 | shell=True, 72 | capture_output=True, 73 | text=True, 74 | timeout=30, # 30 second timeout 75 | cwd=None # Use current working directory 76 | ) 77 | 78 | if result.returncode == 0: 79 | return result.stdout if result.stdout else "Command executed successfully (no output)." 80 | else: 81 | return f"Command failed with return code {result.returncode}:\n{result.stderr}" 82 | 83 | except subprocess.TimeoutExpired: 84 | return "Error: Command timed out after 30 seconds." 85 | except Exception as e: 86 | return f"An error occurred while executing the command: {str(e)}" -------------------------------------------------------------------------------- /py/eodhd/how_many_weeks_high.py: -------------------------------------------------------------------------------- 1 | """ 2 | This scrip will fetch the current high price of a stock and calculate how many weeks it 3 | has been since the stock was at that price. 4 | """ 5 | import pricereader as pr 6 | import pandas as pd 7 | import time 8 | import datetime 9 | 10 | # Read the list of stocks from the CSV file 11 | stocks = pd.read_csv("stocks5.csv", header=0, usecols=["Ticker"]) 12 | 13 | # Set output folder path 14 | output_path = "output" 15 | 16 | # Function to get the number of bars to reach the high that t 17 | # stock_data: DataFrame containing the stock data 18 | # Date,Open,High,Low,Close,Volume,Adj Close 19 | # 2002-07-01,283.25,331.0,283.25,317.8,11803,317.8 20 | # 2002-07-08,303.6,327.0,300.0,300.45,10390,300.45 21 | # 2002-07-15,296.2,305.0,290.3,300.0,4744,300.0 22 | # 2002-07-22,286.0,315.0,280.0,304.4,21643,304.4 23 | def get_previous_index_prce_for_last_high(stock_data): 24 | """ 25 | This function will first fetch the high price of the latest date (latest week) 26 | Then for each row before this, it will check if this high price was reached or crossed 27 | If it was, it will return the number of weeks it took to reach this price 28 | If it was not, it will return -1, indicating that the stock is ATH (All time high) 29 | stock_data: DataFrame containing the stock data, in acsending order of date 30 | """ 31 | # Get the high price of the latest date 32 | latest_high = stock_data['High'].iloc[-1] 33 | 34 | # Iterate through the rows in reverse order 35 | for index in reversed(stock_data.index[:-1]): 36 | # Check if the high price was reached or crossed 37 | if stock_data.loc[index, 'High'] >= latest_high: 38 | # Return the index of the row where this price was reached 39 | return index, stock_data.loc[index, 'High'] 40 | 41 | # Return last index if the high price was not reached or crossed 42 | return stock_data.index[-1], latest_high 43 | 44 | 45 | def main(): 46 | print("Started...") 47 | # Create the DataFrame 48 | result_df = pd.DataFrame(columns=['stock', 'High of latest week', 'Last such week high', \ 49 | 'Days passed', 'High of that week', 'diff%']) 50 | # Iterate through the list of stocks 51 | for stock in stocks["Ticker"]: 52 | try: 53 | # Get the daily stock data 54 | stock_data = pr.get_price_data(stock, 'w') 55 | # Drop those with NaN 56 | stock_data = stock_data.dropna() 57 | 58 | # Get the index and high price of the week when the stock was at its high 59 | index, high = get_previous_index_prce_for_last_high(stock_data) 60 | 61 | # Get the high price of the latest date 62 | latest_high = stock_data['High'].iloc[-1] 63 | # Current / last date 64 | latest_date = stock_data.index[-1] 65 | diff = round((latest_high - high) / high * 100,2) 66 | days_diff = (latest_date - index).days 67 | latest_high = round(latest_high,2) 68 | high = round(high,2) 69 | # Append the result to the DataFrame 70 | row = {'stock': stock, 'High of latest week': latest_high, 'Last such week high':index, \ 71 | 'Days passed': f'{days_diff}', 'High of that week': high, 'diff%': f'{diff}%'} 72 | result_df.loc[len(result_df)] = row 73 | print(f"Processed: {stock}") 74 | 75 | except Exception as e: 76 | print("Error: " + stock) 77 | print(e) 78 | 79 | # Append current timestamp to the file name 80 | now = datetime.datetime.now() 81 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S") 82 | file_name = 'weeks_to_high_' + timestamp + '.csv' 83 | # Export the DataFrame to CSV 84 | result_df.to_csv(output_path + "/" + file_name, index=False) 85 | print(f'Saved file {file_name}') 86 | 87 | if __name__ == "__main__": 88 | main() 89 | -------------------------------------------------------------------------------- /py/ai/fininsightgpt/README.md: -------------------------------------------------------------------------------- 1 | # FinInsightGPT: AI-Powered Investment Analysis 2 | 3 | FinInsightGPT is an application that helps with investment decisions and business analysis by processing company data files, converting them to structured markdown, and generating comprehensive equity research reports using AI. 4 | 5 | ## Features 6 | 7 | - **Document Processing**: Converts various file formats (PDF, DOCX, PPTX, TXT, XLSX, images) to markdown 8 | - **Intelligent Image Analysis**: Uses OCR and AI vision to extract text and analyze charts/graphs 9 | - **Master File Generation**: Consolidates all company documents into a comprehensive master file 10 | - **AI Report Generation**: Creates detailed equity research reports using LLM models 11 | - **Command-line Interface**: Easy-to-use CLI for all operations 12 | 13 | ## Installation 14 | 15 | 1. Clone this repository 16 | 2. Install the required dependencies: 17 | 18 | ```bash 19 | pip install -r requirements.txt 20 | ``` 21 | 22 | 3. Install Tesseract OCR (for image processing): 23 | - macOS: `brew install tesseract` 24 | - Ubuntu/Debian: `sudo apt-get install tesseract-ocr` 25 | - Windows: Download from [GitHub](https://github.com/UB-Mannheim/tesseract/wiki) 26 | 27 | 4. Set up your environment variables: 28 | - Copy the template file: `cp .env.example .env` 29 | - Edit the `.env` file and add your OpenAI API key and model preferences: 30 | 31 | ``` 32 | # OpenAI API Key 33 | OPENAI_API_KEY=your_openai_api_key_here 34 | 35 | # OpenAI Model IDs 36 | OPENAI_TEXT_MODEL=gpt-4-turbo 37 | OPENAI_VISION_MODEL=gpt-4-vision-preview 38 | ``` 39 | 40 | ## Usage 41 | 42 | ### Directory Structure 43 | 44 | Place company files in folders under `company_data`: 45 | 46 | ``` 47 | company_data/ 48 | ├── company1/ 49 | │ ├── file1.pdf 50 | │ ├── file2.txt 51 | │ └── image1.jpg 52 | └── company2/ 53 | ├── presentation.pptx 54 | └── financials.xlsx 55 | ``` 56 | 57 | ### Commands 58 | 59 | #### List available companies: 60 | 61 | ```bash 62 | python src/main.py list 63 | ``` 64 | 65 | #### Process files for a company: 66 | 67 | ```bash 68 | python src/main.py process 69 | ``` 70 | 71 | #### Generate master file from processed files: 72 | 73 | ```bash 74 | python src/main.py master [--output-dir ] 75 | ``` 76 | 77 | #### Generate report from master file: 78 | 79 | ```bash 80 | python src/main.py report [--template ] [--output-dir ] [--model ] 81 | ``` 82 | 83 | #### Run the entire pipeline (process files, generate master, create report): 84 | 85 | ```bash 86 | python src/main.py all [--template ] [--model ] 87 | ``` 88 | 89 | ### Examples 90 | 91 | Process files for CDSL: 92 | 93 | ```bash 94 | python src/main.py process cdsl 95 | ``` 96 | 97 | Generate a report for JyothyLabs using previously created master file: 98 | 99 | ```bash 100 | python src/main.py report jyothylabs_master_20250504_123456.md --model gpt-4-vision-preview 101 | ``` 102 | 103 | Run the entire pipeline for a new company: 104 | 105 | ```bash 106 | python src/main.py all mynewcompany --model gpt-4-turbo 107 | ``` 108 | 109 | ## Report Templates 110 | 111 | The system uses the template file in `prompt_master/Equity_Research_Report_Template.md` by default. This template contains: 112 | 113 | 1. A system prompt to instruct the AI model 114 | 2. A user prompt that defines the report structure and analysis requirements 115 | 116 | You can modify this template or create custom templates for different analysis styles. 117 | 118 | ## Dependencies 119 | 120 | - pymupdf: PDF processing 121 | - python-docx: DOCX processing 122 | - python-pptx: PowerPoint processing 123 | - pandas & openpyxl: Excel processing 124 | - Pillow & pytesseract: Image processing 125 | - openai: AI model integration 126 | - tiktoken: Token counting for LLM API calls -------------------------------------------------------------------------------- /py/ai/turnaround/main.py: -------------------------------------------------------------------------------- 1 | # load .env into environment 2 | import os 3 | from dotenv import load_dotenv 4 | load_dotenv() 5 | 6 | from smolagents import CodeAgent, MLXModel 7 | from my_tools import search_web as web_fetcher 8 | from my_tools import save_report as save_report 9 | from my_tools import fs_reader as fs_reader 10 | from my_tools import cmd_executor as cmd_executor 11 | from smolagents import CodeAgent, LiteLLMModel 12 | 13 | import datetime 14 | import csv 15 | 16 | # Initialize the tools and models 17 | #local_model=mlx_model = MLXModel("Path to local model directory") 18 | model = LiteLLMModel(model_id="openai/gpt-4.1-mini", api_key=os.getenv("OPENAI_API_KEY")) 19 | 20 | #Create the agent with the model and tools 21 | agent = CodeAgent(tools=[web_fetcher, save_report, fs_reader, cmd_executor], model=model, additional_authorized_imports=["os", "openai", "json", "csv"]) # Not adding base tools. 22 | 23 | # Define the data directory and today's date 24 | data_dir = "data/financial_data.csv" 25 | date_today = datetime.datetime.now().strftime("%Y-%m-%d") 26 | 27 | instructions = f""" 28 | You are an expert financial analyst specializing in identifying turnaround in companies. Analyze for the company mentioned below in Step 1. With the searched financial data and your analysis generate a comprehensive markdown report that detects potential turnarounds if any for the company. To achieve this, you will follow these steps in sequence: 29 | Step 1. Company/Business Name/Stock Codes: {{business_name}}. 30 | Step 2. For this business, analyse if the business is experiencing a turnaround. Give a short report of your analysis. You will gather additional latest information using the web_fetcher tool. This includes searching for the latest financial reports, news, and other relevant information about the company. 31 | Step 3. After gathering enough information, you will prepare a report that includes a verdict about the turnaround potential of each business. The verdict can be "Strong Turnaround", "Weak Turnaround", or "No Turnaround". 32 | Step 4. Finally, format the report into a well-structured markdown document and save it to a file. You will ensre that the report contains the following sections: 33 | - Business Name 34 | - Summary of Financial Data 35 | - Analysis of Financial Health 36 | - Turnaround Potential Verdict 37 | Step 5. You will use the save_report tool to persist the report on disk. The report will be saved per business. You will pass the report content and the business name to the save_report tool. 38 | 39 | General instructions: 40 | You will use the web_fetcher tool to gather additional information about these businesses and the reporter tool to generate the markdown report. You can look up for latest financial reports, news and other relevant information for the company. 41 | Today is: {date_today}. 42 | Always search for tools available to you before writing new code, esp. the cmd_executor tool, which can execute read only shell commands to gather more information if needed. 43 | """ 44 | 45 | # Read the financial data file and start the analysis 46 | print("Loading financial data from:", data_dir) 47 | if not os.path.exists(data_dir): 48 | raise FileNotFoundError(f"The financial data file {data_dir} does not exist. Please check the path.") 49 | businesses = [] 50 | with open(data_dir, 'r', encoding='utf-8') as file: 51 | reader = csv.DictReader(file) 52 | # For each row in the CSV, create a entry, that contains the Name, Stock Symbol. Assuming the columns are Name,BSE Code,NSE Code. It is possible that BSE Code or NSE Code is not available, Create the entry for businesses list as a concatenation of Name/NSE Code/BSE Code. 53 | for row in reader: 54 | name = row.get('Name', 'Unknown') 55 | bse_code = row.get('BSE Code', '').strip() 56 | nse_code = row.get('NSE Code', '').strip() 57 | if not nse_code and not bse_code: 58 | business_entry = name 59 | elif not nse_code and bse_code: 60 | business_entry = f"""Name: {name} / BSE: {bse_code}""" 61 | elif nse_code and not bse_code: 62 | business_entry = f"""Name: {name} / NSE: {nse_code}""" 63 | else: 64 | business_entry = f"""Name: {name} / NSE: {nse_code} / BSE: {bse_code}""" 65 | businesses.append(business_entry) 66 | 67 | total_businesses = len(businesses) 68 | count = 0 69 | for business in businesses: 70 | print(f"Starting analyzing financial data and generating a report for {business}... Please wait.") 71 | final_instructions = instructions.format(business_name=business) 72 | #print(f"Final instructions for the agent: {final_instructions}") 73 | response = agent.run(final_instructions, max_steps=20) 74 | # Print progress 75 | count += 1 76 | print(f"Completed {count}/{total_businesses} businesses. Current business: {business}") -------------------------------------------------------------------------------- /py/ai/fininsightgpt/prompt_master/Equity_Research_Report_Template.md: -------------------------------------------------------------------------------- 1 | # Equity Research Report Template 2 | 3 | ## System Prompt 4 | 5 | You are a financial analyst specializing in creating concise company reports. I need a comprehensive research report on {company}, an Indian publicly listed company, with deep insights based on publicly available data, including concalls, annual reports, news, and competitive analysis. The report should be for company called {company}. Current datetime is {timestamp}. 6 | 7 | ## User Prompt 8 | 9 | The structure of the {company} report shall be as follows. 10 | 11 | --- 12 | 13 | ### 1. 📌 Company Overview 14 | 15 | - **Business Model and Key Segments** 16 | Briefly describe the company’s core business activities and primary segments. If diversified, outline the major business segments and how revenue mix has evolved over the last 3–5 years. 17 | 18 | - **Key Milestones** 19 | Highlight key events such as IPOs, product/service launches, expansions, strategic partnerships, or diversification moves. 20 | 21 | --- 22 | 23 | ### 2. 📈 Strategic Developments & Execution Analysis 24 | 25 | #### A. Business Expansion & Innovation 26 | - Detail new product or service launches, acquisitions, or innovations. 27 | - Mention R&D pipelines and any noteworthy technologies under development. 28 | - Include subsidiaries, joint ventures, and their strategic significance. 29 | 30 | #### B. Order Book & Execution Capacity 31 | - Present current size and growth trend of the order book. Include insights on order pipeline and execution win-rates. 32 | - Clarify execution visibility over the next 12–18 months. 33 | 34 | #### C. Capacity Expansion 35 | - Describe ongoing or planned projects/plants and their impact on production/revenue capacity. 36 | - Explain the funding strategy for capex (equity, debt, internal accruals). 37 | 38 | #### D. Risk Analysis 39 | - Discuss regulatory, operational, geopolitical, and credit-related risks. 40 | - Provide the latest credit ratings and any notable outlooks. 41 | 42 | #### E. Management & Governance 43 | - Assess management’s historical performance versus guidance. 44 | - Mention promoter shareholding patterns and governance quality. 45 | - Flag any litigation, controversies, or red flags if applicable. 46 | 47 | --- 48 | 49 | ### 3. 🏗 Recent Milestones & Notable Projects 50 | - Showcase recognition from industry/government bodies and key partnerships. 51 | - Highlight delivery of high-profile projects or export milestones. 52 | - Summarize improvements in net worth, margins, interest coverage, and profitability. 53 | 54 | --- 55 | 56 | ### 4. 🧭 “What’s New vs. What’s Next” Summary Table 57 | 58 | Create a table outlining: 59 | - Recent achievements, future plans, expected timelines, and revenue/strategic impact across areas such as Order Book, Capacity, Exports, R&D, and Financials. 60 | 61 | --- 62 | 63 | ### 5. 🆚 Competitive Analysis 64 | 65 | - Identify and compare peers using valuation (P/E, EV/EBITDA), RoE, margins, etc. 66 | - Discuss relative market positioning and entry barriers. 67 | - List key strengths and weaknesses compared to competitors. 68 | - Include visual peer comparison charts or tables. 69 | 70 | --- 71 | 72 | ### 6. 📰 News and Media Perception 73 | 74 | - List significant headlines from the past 2 years. 75 | - Compare media coverage with management’s communicated vision and strategies. 76 | - Mention any frauds, disputes, or controversies. 77 | - Provide an overall sentiment assessment (positive/neutral/negative). 78 | 79 | --- 80 | 81 | ### 7. 📊 Valuation & Investment Perspective 82 | 83 | - Include valuation metrics (P/E, EV/EBITDA, etc.) and compare with historical ranges and peers. 84 | - Perform a DCF analysis with assumptions and forecasts. 85 | - Evaluate whether the current market price justifies future earnings and growth. 86 | - Add broker/analyst consensus and sentiment. 87 | - Correlate stock price performance with earnings visibility. 88 | 89 | --- 90 | 91 | ### 8. 🚀 Key Catalysts to Watch (Near-Term) 92 | 93 | Track important near-term triggers such as: 94 | - Plant/project commissionings 95 | - Export order wins 96 | - Quarterly financial results and trends 97 | 98 | --- 99 | 100 | ### 9. 🧾 Conclusion & Investment Rationale 101 | 102 | - Provide a clear investment rating (Buy/Hold/Avoid) with reasoning. 103 | - Outline key upside/downside triggers. 104 | - Call out any immediate risks (regulatory, geopolitical, macro). 105 | - Mention near-term events that could impact re-rating potential. 106 | 107 | --- 108 | 109 | ## 📁 Data Sources to Refer to: 110 | 111 | - Last 3–5 Years of Annual Reports 112 | - Investor Presentations (especially the latest) 113 | - Earnings Call Transcripts (latest quarter) 114 | - Credit Rating Reports (if public) 115 | - Exchange Filings (NSE/BSE) 116 | - Financial News Sources (ET, BloombergQuint, Moneycontrol, etc.) 117 | -------------------------------------------------------------------------------- /py/yf/weeklyRSIVolStopBO.py: -------------------------------------------------------------------------------- 1 | ''' 2 | BOs of nifty500 stocks, that gave a weekly breakout from RSI(14) > 60 3 | Also, check if they are above the volstop(10,2.5) 4 | Also, check if they are abover the 20-EMA 5 | Prefer, stocks with relative ratio on an increasing trend on 5-6M average 6 | All calculations on weekly timeframes 7 | 8 | Generally, such stocks that take repitative support on a bullish RSI level, 9 | with backing of sectoral tailwind or strong fundamentals give good long term moves 10 | Exits can be planned on volstop break, or 20-EMA break or both with partial booking on 11 | break of one 12 | ''' 13 | 14 | import yfinance as yf 15 | import pandas as pd 16 | import ta 17 | import datetime 18 | 19 | # Set output folder path 20 | output_path = "output" 21 | 22 | # Read the list of stocks from the CSV file 23 | stocks = pd.read_csv("stocks500.csv", header=0, usecols=["Ticker"]) 24 | 25 | # Set start Date 26 | start_date = '2020-02-01' 27 | 28 | # Set end Date 29 | end_date = '2023-02-26' 30 | 31 | # Specify the benchmark symbol 32 | benchmark = "^NSEI" 33 | 34 | # Interval 35 | data_interval_weekly = '1wk' 36 | 37 | import yfinance as yf 38 | import pandas as pd 39 | import numpy as np 40 | 41 | def rsi_crossover(data, rsi_level): 42 | current_rsi = data.iloc[-1]['RSI'] 43 | previous_rsi = data.iloc[-2]['RSI'] 44 | return previous_rsi <= 60.0 and current_rsi > 60.0 45 | 46 | def volatility_stop(data, period, multiplier): 47 | high = data['High'] 48 | low = data['Low'] 49 | close = data['Close'] 50 | 51 | atr = pd.Series((high - low).abs().rolling(period).mean(), name='ATR') 52 | direction = np.where(close.diff() > 0, 1, -1) 53 | vol_stop = close - direction * atr * multiplier 54 | 55 | data['volStop'] = vol_stop 56 | return data 57 | 58 | def ratio_mean(data, benchmark_data, length): 59 | # Calculate the relative strength of the stock by dividing its weekly closing price by the weekly closing price of the Nifty 50 index 60 | relative_strength = data['Close'] / benchmark_data['Close'] 61 | data[f'relativeRatio'] = relative_strength 62 | # print(relative_strength.tail(10)) 63 | 64 | # Calculate the mean of the relative strength values for length 65 | data[f'ratio{length}W'] = relative_strength.rolling(window=length).mean() 66 | return data 67 | 68 | 69 | def main(): 70 | print("Started...") 71 | # Create the DataFrame 72 | result_df = pd.DataFrame(columns=['stock', 'Close', 'volStop10_2.5', 'ema20', 'RS-ratio', 'ratio-21W', 'RSI(14)']) 73 | 74 | # Benchmark data 75 | # Use yfinance to retrieve the benchmark data 76 | benchmark_ticker = yf.Ticker(benchmark) 77 | benchmark_data = benchmark_ticker.history(start=start_date, end=end_date, interval=data_interval_weekly,auto_adjust=False, prepost=False) 78 | benchmark_data = benchmark_data.dropna() 79 | 80 | # Iterate through the list of stocks 81 | for stock in stocks["Ticker"]: 82 | try: 83 | # Get the stock data from yfinance, dont adjust OHLC 84 | data = yf.Ticker(stock+".NS").history(start=start_date, end=end_date,interval=data_interval_weekly,auto_adjust=False, prepost=False) 85 | # Drop those with NaN 86 | data = data.dropna() 87 | 88 | # Calculate the RSI using a 14-day period 89 | data['RSI'] = ta.momentum.RSIIndicator(data['Close'], window=14).rsi() 90 | # Check if a crossover from value lower than 60 has happend, we need to however look at RSI trend on a charting platform 91 | if (rsi_crossover(data, 60)): 92 | # Calculate volStop 93 | data = volatility_stop(data, 10, 2.5) 94 | # Calculate ema20W 95 | data['ema20'] = ta.trend.EMAIndicator(data['Close'], window=20).ema_indicator() 96 | # Calculate the relative ratio and average 21W 97 | data = ratio_mean(data, benchmark_data, 21) 98 | curr_data = data.iloc[-1] 99 | row = {'stock': stock, 'Close': curr_data['Close'], 'volStop10_2.5': str(round(curr_data['volStop'], 2)), 'ema20': str(round(curr_data['ema20'], 2)), \ 100 | 'RS-ratio': str(round(curr_data['relativeRatio'], 2)), 'ratio-21W': str(round(curr_data['ratio21W'], 2)), 'RSI(14)': str(round(curr_data['RSI'], 2))} 101 | # Append the new row to the DataFrame 102 | result_df.loc[len(result_df)] = row 103 | 104 | except Exception as e: 105 | print("Error: " + stock) 106 | print(e) 107 | 108 | # Append current timestamp to the file name 109 | now = datetime.datetime.now() 110 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S") 111 | file_name = 'weeklyRSIVolStopBO_' + timestamp + '.csv' 112 | # Export the DataFrame to CSV 113 | result_df.to_csv(file_name, index=False) 114 | print('Done') 115 | 116 | if __name__ == "__main__": 117 | main() -------------------------------------------------------------------------------- /py/yf/newHighMonthly.py: -------------------------------------------------------------------------------- 1 | 2 | ''' 3 | This code, also searches for new monthly highs, but not just ATH 4 | This it does by boxing a lookback limit and a minimum duration where the new high should be 5 | with respect to the historical high. 6 | ''' 7 | import yfinance as yf 8 | import pandas as pd 9 | import time 10 | import os 11 | from datetime import datetime, timedelta 12 | 13 | # Set output folder path 14 | output_path = "output" 15 | 16 | # Read the list of stocks from the CSV file 17 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"]) 18 | 19 | # Set the time frame to max 20 | time_frame = 'max' 21 | 22 | # Set the bar time frame 23 | data_interval = '1mo' 24 | 25 | # Set the maximum number of months to lookback 26 | LOOKBACK_LIIMIT = 15 * 12 # Years in months 27 | 28 | # Set minimum numbber of months that this BO should be after 29 | MIN_BO_LENGTH = 50 #5 * 12 # Years in months 30 | 31 | # Initialize a list to store the results 32 | results = [] 33 | 34 | # Crore 35 | One_Cr = 10000000 36 | 37 | # Columnns in the report 38 | report_columns = ["Stock", "mcap", "High Close", "High Close Date", "Current Close", "#MonthsBO", "Diff", "sector" , "industry"] 39 | 40 | def write_dataframe_to_file(df, name): 41 | # Get the current timestamp 42 | timestamp = datetime.now().strftime("%Y%m%d%H%M%S") 43 | 44 | # Create the filename 45 | filename = f'{name}_{timestamp}.csv' 46 | # Save the DataFrame as a CSV file with specific column names as the header 47 | df.to_csv(f'{output_path}/{filename}',index=False) 48 | 49 | 50 | 51 | def main(): 52 | print("Started...") 53 | # create an empty dataframe to store the results 54 | results_df = pd.DataFrame(columns=report_columns) 55 | # Iterate through the list of stocks 56 | for stock in stocks["Ticker"]: 57 | try: 58 | # Get the stock data from yfinance, dont adjust OHLC 59 | ticker = yf.Ticker(stock+".NS") 60 | data = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False) 61 | # Drop those with NaN 62 | data = data.dropna() 63 | # Drop last row, if 2nd last is already of the month 64 | if data.index[-1].month == data.index[-2].month: 65 | # Replace the values in the second-to-last row with the values in the last row 66 | data.loc[data.index[-2]] = data.loc[data.index[-1]] 67 | # Delete the last row 68 | data = data.drop(data.index[-1]) 69 | 70 | if (len(data) < MIN_BO_LENGTH + 1): 71 | print(f'Skipping. Not enough data for {stock}, only {len(data)} available, minimum required {MIN_BO_LENGTH+1}') 72 | continue 73 | 74 | # Reverse the data frame to start from current candle 75 | stk_df = data.iloc[::-1] 76 | max_loopback = LOOKBACK_LIIMIT 77 | if (len(stk_df) < LOOKBACK_LIIMIT): # Limit lookback if not available data for so long 78 | max_loopback = len(stk_df) 79 | 80 | stk_df_max_lookback = stk_df.head(max_loopback) 81 | current_close = stk_df_max_lookback['Close'][0] 82 | for i in range(1, len(stk_df_max_lookback)): 83 | this_close = stk_df_max_lookback['Close'][i] 84 | if this_close > current_close: 85 | if i >= MIN_BO_LENGTH: 86 | highest_close_date = stk_df_max_lookback.index[i].strftime('%Y-%m-%d') 87 | diff = round((this_close - current_close)/current_close * 100, 2) 88 | # Essential data 89 | sector = '' 90 | industry = '' 91 | marketCap = '' 92 | try: 93 | if ticker.info: 94 | marketCap = round(ticker.info['marketCap'] / One_Cr, 0) 95 | industry = ticker.info['industry'] 96 | sector = ticker.info['sector'] 97 | except Exception as err: 98 | pass 99 | new_row = pd.DataFrame({"Stock": stock, "mcap": marketCap, "High Close": round(this_close, 2), "High Close Date": highest_close_date, \ 100 | "Current Close": round(current_close, 2), "#MonthsBO": i, "Diff": diff, "sector": sector, "industry": industry}, index=[0]) 101 | results_df = pd.concat([results_df, new_row]) 102 | break 103 | else: 104 | break # A newer high exist before MIN_BO_LENGTH 105 | except Exception as e: 106 | print(f'Error for ticker: {stock} ==> {e}') 107 | 108 | # print(results_df) 109 | write_dataframe_to_file(results_df, "newHighMonthly_BO_") 110 | print("Done") 111 | 112 | if __name__ == "__main__": 113 | main() 114 | -------------------------------------------------------------------------------- /py/yf/multimonthBO.py: -------------------------------------------------------------------------------- 1 | import yfinance as yf 2 | import pandas as pd 3 | import time 4 | import os 5 | from datetime import datetime, timedelta 6 | 7 | # Set output folder path 8 | output_path = "output" 9 | 10 | # Read the list of stocks from the CSV file 11 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"]) 12 | 13 | # Set the time frame to max 14 | time_frame = 'max' 15 | 16 | # Set the bar time frame 17 | data_interval = '1mo' 18 | 19 | # Set the minimum number of months since the last ath was breached 20 | MIN_MONTHS = 11 21 | 22 | # Threshold to previous ATH 23 | threshold = 1.0 24 | 25 | # Initialize a list to store the results 26 | results = [] 27 | 28 | # Crore 29 | One_Cr = 10000000 30 | 31 | # determine if highest close was minimum_low_length ago. 32 | def highestClose(stock_data, min_months): 33 | 34 | highest_close = stock_data["Close"][0] 35 | highest_close_date = stock_data.index[0] 36 | highest_close_idx = 0 37 | for i in range(1, len(stock_data)): 38 | if stock_data["Close"][i] > highest_close: 39 | highest_close = stock_data["Close"][i] 40 | highest_close_date = stock_data.index[i] 41 | highest_close_idx = i 42 | if len(stock_data) - highest_close_idx >= min_months: 43 | return [True, highest_close, highest_close_date] 44 | else: 45 | return [False, '', ''] 46 | 47 | def write_dataframe_to_file(df, name): 48 | # Get the current timestamp 49 | timestamp = datetime.now().strftime("%Y%m%d%H%M%S") 50 | 51 | # Create the filename 52 | filename = f'{name}_{timestamp}.csv' 53 | # Save the DataFrame as a CSV file with specific column names as the header 54 | df.to_csv(output_path + "/" + filename, index=False, columns=["Stock", "mcap", "Highest Close", "Highest Close Date", "Current Close", "Diff", "sector", "industry"]) 55 | 56 | 57 | def main(): 58 | print("Started...") 59 | # create an empty dataframe to store the results 60 | results_df = pd.DataFrame(columns=["Stock", "mcap", "Highest Close", "Highest Close Date", "Current Close", "Diff", "sector" , "industry"]) 61 | # Iterate through the list of stocks 62 | for stock in stocks["Ticker"]: 63 | try: 64 | # Get the stock data from yfinance, dont adjust OHLC 65 | ticker = yf.Ticker(stock+".NS") 66 | data = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False) 67 | # Drop those with NaN 68 | data = data.dropna() 69 | # Drop last row, if 2nd last is already of the month 70 | if data.index[-1].month == data.index[-2].month: 71 | # Replace the values in the second-to-last row with the values in the last row 72 | data.loc[data.index[-2]] = data.loc[data.index[-1]] 73 | # Delete the last row 74 | data = data.drop(data.index[-1]) 75 | 76 | # print(data) 77 | if (len(data) <= 2): 78 | print(f'Skipping {stock} since not enough data present ') 79 | continue 80 | 81 | min_months = MIN_MONTHS 82 | if (len(data) < (MIN_MONTHS + 1)): 83 | print(f'{stock} has only {len(data)} months, trimming condition') 84 | min_months = len(data) 85 | 86 | # Highest close prior to last month 87 | result_highestClose = highestClose(data.iloc[:-1], min_months) # Skip the current month 88 | highestClose_condition = result_highestClose[0] 89 | highestClose_value = result_highestClose[1] 90 | highestClose_date = result_highestClose[2] 91 | 92 | # Essential data 93 | sector = '' 94 | industry = '' 95 | marketCap = '' 96 | try: 97 | if ticker.info: 98 | marketCap = round(ticker.info['marketCap'] / One_Cr, 0) 99 | industry = ticker.info['industry'] 100 | sector = ticker.info['sector'] 101 | except Exception as err: 102 | pass 103 | 104 | last_close = data["Close"].tail(1).values[0] 105 | if (highestClose_condition and last_close >= highestClose_value * threshold): 106 | diff = round(((last_close - highestClose_value) / highestClose_value) * 100, 2) 107 | new_row = pd.DataFrame({"Stock": stock, "mcap": marketCap, "Highest Close": round(highestClose_value, 2), "Highest Close Date": highestClose_date, \ 108 | "Current Close": round(last_close, 2), "Diff": diff, "sector": sector, "industry": industry}, index=[0]) 109 | results_df = pd.concat([results_df, new_row]) 110 | 111 | except Exception as e: 112 | print(f'Error for ticker: {stock} ==> {e}') 113 | 114 | # print(results_df) 115 | write_dataframe_to_file(results_df, "MultiMonth_BO_") 116 | print("Done") 117 | 118 | if __name__ == "__main__": 119 | main() 120 | -------------------------------------------------------------------------------- /py/beta/chatgpt/generate_report_for_company.py: -------------------------------------------------------------------------------- 1 | import re 2 | import model as ai 3 | import os 4 | 5 | screener_xls_data = {} 6 | delimiter = "####" 7 | 8 | company_data = 'companyinfo/sjvn' 9 | 10 | screener_tabs = ['income_statement','income_statement_quarterly', 'balance_sheet', 'cashflow_statement', 'ratio_analysis'] 11 | screener_data = {} 12 | 13 | def preprocess_text(text): 14 | # Lowercase the text 15 | text = text.lower() 16 | 17 | # Remove special characters 18 | text = re.sub(r'\W', ' ', text) 19 | 20 | # Replace multiple spaces with a single space 21 | text = re.sub(r'\s+', ' ', text) 22 | 23 | return text 24 | 25 | def load_screener_data(): 26 | 27 | for i in range(0, len(screener_tabs)): 28 | tabname = screener_tabs[i] 29 | f = open(f'{company_data}/{tabname}.txt') 30 | data = f.read() 31 | f.close() 32 | screener_data[tabname] = data 33 | 34 | 35 | def company_info_analysis(): 36 | file = f'{company_data}/company_info.txt' 37 | data = 'No company info' 38 | if os.path.isfile(file): 39 | f = open(file) 40 | data = f.read() 41 | f.close() 42 | print('Analyzing company_info data...') 43 | system_message = f'As a financial analyst for equity markets, perform an evaluation of the company based on the inputs provided. The input is enclosed within {delimiter}.\ 44 | You must to the analysis in the following steps.\ 45 | Step 8: Prepare a short description of the comapnys business, factories, plants and operations in general.\ 46 | Step 9: Prepare shareholding trend and status, separetly, if shareholding data is provided. \ 47 | Step 10: Perpare a separate detailed summary of concall data if provided. \ 48 | Step 11: If credit rating data is provided, list out positive and negative points separately. \ 49 | Give your analysis in as detailed manner as possible, however summarize it to limit to max_tokens = 2000 ' 50 | user_message = f'{delimiter}{data}{delimiter}' 51 | messages = [ 52 | {'role':'system', 53 | 'content': system_message}, 54 | {'role':'user', 55 | 'content': f"{delimiter}{user_message}{delimiter}"}, 56 | ] 57 | response = ai.get_completion_from_messages(messages,max_tokens=2000) 58 | return response 59 | 60 | def fin_statement_analysis(): 61 | print('Analyzing screener data...') 62 | system_message = f'As a financial analyst for equity markets, you need to perform an evaluation of the company based on the inputs provided. Some of these inputs will be standard financial data and some will be unstructured. \ 63 | The input data will be encloded with {delimiter} You must to the analysis in the following steps. \ 64 | Step 1:{delimiter} Perform a financial analysis of the company from stock market investing perspective from its annual income statement quarterly income statment \ 65 | balance sheet and cashflow statement. Each will be provided to you enclosed as {delimiter}income_statement:{delimiter} {delimiter}balance_sheet{delimiter} and so on. \ 66 | Step 2: Using the ratio_analysis statement analyze the working capital cycle. Step 3: Perform a Du-Pont analysis using the above data. Step 4: Perform profitibility analysis of this financial data\ 67 | Step 5: Provide trend analysis and competitive advantages of the company based on given financial data. Step 6: Check pricing power of this company? \ 68 | Step 6: Detect and report any red flags about the company from the data \ 69 | Step 7: Report preparation/ Take special care. As an analyst perform these analysis and prepare a report that is very detailed but summarize it to limit to max_tokens=2000.' 70 | 71 | msg = '' 72 | for key,val in screener_data.items(): 73 | msg = f'{delimiter}{key}:{val}{delimiter}' 74 | user_message = f'{delimiter}{msg}{delimiter}' 75 | messages = [ 76 | {'role':'system', 77 | 'content': system_message}, 78 | {'role':'user', 79 | 'content': f"{delimiter}{user_message}{delimiter}"}, 80 | ] 81 | response = '' 82 | response = ai.get_completion_from_messages(messages,max_tokens=2000) 83 | return response 84 | 85 | 86 | def main(): 87 | ai.set_api() 88 | load_screener_data() 89 | 90 | #Financial statement analysis from screener data 91 | fin_screener_analysis = '' 92 | fin_screener_analysis = fin_statement_analysis() 93 | # print(fin_screener_analysis) 94 | with open(f'{company_data}/financial_analysis.txt', 'w', encoding='utf-8') as file: 95 | file.write(fin_screener_analysis) 96 | 97 | #Perform company info analysis from data from internet and elsewhere 98 | co_info_analysis = '' 99 | co_info_analysis = company_info_analysis() 100 | with open(f'{company_data}/company_info_analysis.txt', 'w', encoding='utf-8') as file: 101 | file.write(co_info_analysis) 102 | 103 | print('Done') 104 | 105 | if __name__ == "__main__": 106 | main() 107 | -------------------------------------------------------------------------------- /py/yf/box_scan.py: -------------------------------------------------------------------------------- 1 | ''' 2 | We detect a consolidation after a rally and quantify the box formation 3 | Rally is defined as 3 consecutive higher closes, and the high of that candle defines the top left of the box 4 | The low is extended with each new lower low 5 | ''' 6 | import yfinance as yf 7 | import pandas as pd 8 | import datetime 9 | import matplotlib.pyplot as plt 10 | import matplotlib.patches as patches 11 | 12 | 13 | # Set the bar time frame 14 | data_interval = '1d' 15 | # Set the time frame to 90d 16 | time_frame = '90d' 17 | 18 | # Set output folder path 19 | output_path = "boxscan/output" 20 | # Initialize an empty DataFrame to store the output CSV data 21 | output_df = pd.DataFrame(columns=['Stock Code', 'Box Duration', 'Drawdown', 'Fall Rate']) 22 | 23 | # Read the list of stocks from the CSV file 24 | stocks = pd.read_csv("stocks500.csv", header=0, usecols=["Ticker"]) 25 | 26 | # Box depth threshold % 27 | box_depth_threshold = -20 28 | # Rally days 29 | min_rally_days = 3 30 | # Box days 31 | min_days_in_box = 3 32 | 33 | # Function to plot and save chart and data 34 | def scan_for_box(df, stock_code): 35 | 36 | # Calculate 50-day average volume 37 | df['50_day_avg_vol'] = df['Volume'].rolling(window=50).mean() 38 | 39 | # Set up plot 40 | fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(20, 12), sharex=True, gridspec_kw={'height_ratios': [3, 1]}) 41 | ax1.set_ylabel('Price') 42 | ax1.set_title(f'{stock_code} with Negative Drawdown') 43 | ax2.set_xlabel('Time') 44 | ax2.set_ylabel('Volume') 45 | 46 | # Initialize variables for debugging and the box 47 | rally_days = 0 48 | rally_volume_high = False 49 | box_start = None 50 | box_end = None 51 | box_high = None 52 | box_low = None 53 | 54 | # Iterate through the data to identify rallies, place debug dots, and draw the box 55 | for i in range(len(df)): 56 | color = 'g' if df.iloc[i]['Close'] >= df.iloc[i]['Open'] else 'r' 57 | vol_color = color 58 | vol_color = 'g' if i > 0 and df.iloc[i]['Close'] >= df.iloc[i-1]['Close'] else 'r' 59 | 60 | ax1.plot([i, i], [df.iloc[i]['Low'], df.iloc[i]['High']], color=color) 61 | ax1.add_patch(patches.Rectangle((i - 0.3, df.iloc[i]['Open']), 0.6, df.iloc[i]['Close'] - df.iloc[i]['Open'], facecolor=color)) 62 | ax2.bar(i, df.iloc[i]['Volume'], color=vol_color, width=0.6) 63 | 64 | # Detect a rally 65 | if i > 0 and df.iloc[i]['Close'] > df.iloc[i - 1]['Close']: 66 | rally_days += 1 67 | if df.iloc[i]['Volume'] > df.iloc[i]['50_day_avg_vol']: 68 | rally_volume_high = True 69 | else: 70 | rally_days = 0 71 | rally_volume_high = False 72 | 73 | if rally_days >= min_rally_days and rally_volume_high: 74 | ax1.plot(i, df.iloc[i]['High'], 'o', color='orange') 75 | box_high = df.iloc[i]['High'] 76 | box_low = df.iloc[i]['Low'] 77 | box_start = i 78 | 79 | if box_start is not None: 80 | new_low = df.iloc[i]['Low'] 81 | if new_low < box_low: 82 | box_low = new_low 83 | box_end = i 84 | ax1.add_patch(patches.Rectangle((box_start, box_low), box_end - box_start, box_high - box_low, fill=True, color='yellow', alpha=0.3)) 85 | 86 | if df.iloc[i]['Close'] > box_high: 87 | box_start = None 88 | box_end = None 89 | box_high = None 90 | box_low = None 91 | 92 | # Book keeping 93 | if box_start is not None: 94 | box_days = (box_end - box_start) + 1 95 | box_drop_percent = -((box_high - box_low) / box_high) * 100 96 | box_fall_rate = round(-box_drop_percent / box_days, 2) 97 | text_str = f"Box Duration: {box_days} days\nDrawdown: {box_drop_percent:.2f}%\nFR: {box_fall_rate:.2f}" 98 | ax1.text(0.75, 0.1, text_str, transform=ax1.transAxes, fontsize=12, verticalalignment='bottom', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5)) 99 | 100 | if box_end == len(df) - 1 and box_drop_percent > box_depth_threshold and box_days > min_days_in_box: 101 | plt.savefig(f"{output_path}/{stock_code}.png") 102 | output_df.loc[len(output_df)] = [stock_code, box_days, box_drop_percent, box_fall_rate] 103 | plt.close() 104 | 105 | 106 | def main(): 107 | print('Started') 108 | # Iterate through the list of stocks 109 | for stock in stocks["Ticker"]: 110 | try: 111 | ticker = yf.Ticker(stock+".NS") 112 | stock_history = ticker.history(period=time_frame,interval=data_interval,auto_adjust=False) 113 | stock_history = stock_history.dropna() 114 | scan_for_box(stock_history, stock) 115 | except Exception as e: 116 | print(f"Error: {stock} ==> {e}") 117 | 118 | # Append current timestamp to the file name 119 | now = datetime.datetime.now() 120 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S") 121 | file_name = f'{output_path}/box_scan_{timestamp}.csv' 122 | # Export the DataFrame to CSV 123 | output_df.to_csv(file_name, index=False) 124 | print(f'Done, output saved in {file_name}') 125 | 126 | if __name__ == "__main__": 127 | main() 128 | -------------------------------------------------------------------------------- /py/yf/green_dot.py: -------------------------------------------------------------------------------- 1 | 2 | import yfinance as yf 3 | import pandas as pd 4 | import numpy as np 5 | import datetime 6 | 7 | # Set output folder path 8 | output_path = "output" 9 | 10 | # Read the list of stocks from the CSV file 11 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"]) 12 | 13 | # Set start Date 14 | start_date = '2020-01-01' 15 | 16 | # Set end Date 17 | end_date = '2023-01-21' 18 | 19 | # Specify the benchmark symbol 20 | benchmark = "^NSEI" 21 | 22 | # Interval 23 | data_interval_daily = '1d' # '1wk' or '1d' 24 | data_interval_weekly = '1wk' 25 | 26 | # Lookback for green dot 27 | lookback = 5 28 | 29 | def calculateReversionExpansion(stock_data): 30 | # Extract the close prices from the DataFrame 31 | src = stock_data["Close"] 32 | 33 | # Perform the EMA calculations 34 | l1, l2, l3, l4 = 20, 50, 100, 200 #EMA periods 35 | 36 | # Compute the exponential moving average with a lookback length of 20 37 | ema1 = src.ewm(span=l1).mean() 38 | ema2 = src.ewm(span=l2).mean() 39 | ema3 = src.ewm(span=l3).mean() 40 | ema4 = src.ewm(span=l4).mean() 41 | 42 | # Merge the series into one DataFrame 43 | merged_df = pd.concat([ema1, ema2, ema3, ema4], axis=1, keys=['EMA 20', 'EMA 50', 'EMA 100', 'EMA 200']) 44 | merged_df.fillna(0, inplace=True) 45 | # Find the lowest and the highest of this emas 46 | merged_df['lowest'] = merged_df[(merged_df > 0)].min(axis=1) 47 | # Cheeky way to replace zero with a miniscule value to get rid of div by zero error 48 | merged_df['lowest'].replace(0, 1e-10, inplace=True) 49 | merged_df['highest'] = merged_df.max(axis=1) 50 | 51 | # Now, merge the close, otherwise lowest will consider Close values also 52 | merged_df = pd.concat([merged_df, src], axis=1) 53 | # Calculate delta between lowest and highest 54 | merged_df['delta'] = (merged_df['highest'] - merged_df['lowest']) / merged_df['lowest'] 55 | # Calculate emadelta 56 | merged_df['emadelta'] = merged_df['delta'].ewm(span=7).mean() 57 | # Calculate delta between close and lowest ema 58 | merged_df['pricedelta'] = ( merged_df['Close'] - merged_df['lowest']) / merged_df['lowest'] 59 | # Calculate ema of this pricedelta 60 | merged_df['emapricedelta'] = merged_df['pricedelta'].ewm(span=7).mean() 61 | # Determine if a crossover has happened between delta crossing over emadelta 62 | merged_df['crossover'] = np.where((merged_df['delta'] > merged_df['emadelta']) & (merged_df['delta'].shift(1) < merged_df['emadelta'].shift(1)), 1, 0) 63 | # Determine if a crossunder has happened between delta crossing over emadelta 64 | merged_df['crossunder'] = np.where((merged_df['delta'] < merged_df['emadelta']) & (merged_df['delta'].shift(1) > merged_df['emadelta'].shift(1)), 1, 0) 65 | 66 | return merged_df 67 | 68 | def checkforGreenDot(rev_exp_data): 69 | # Check last lookback rows if there has been a crossover and no crossunder in the last 70 | rev_exp_data_21 = rev_exp_data.tail(lookback) 71 | 72 | crossover = False 73 | idx = '' 74 | delta = 0.0 75 | for index, row in rev_exp_data_21.iterrows(): 76 | if (row['crossover'] == 1 and row['Close'] > row['highest']): 77 | crossover = True 78 | idx = index 79 | delta = row['delta'] 80 | 81 | if (crossover and row['crossunder'] == 1): 82 | crossover = False 83 | return [crossover, idx, delta] 84 | 85 | def main(): 86 | print("Started...") 87 | # Create the DataFrame 88 | result_df = pd.DataFrame(columns=['stock', 'dailyXoverDate', 'dailyDelta', 'weeklyXoverDate', 'weeklyDelta']) 89 | # Iterate through the list of stocks 90 | for stock in stocks["Ticker"]: 91 | try: 92 | # Get the stock data 93 | # Get the stock data from yfinance, dont adjust OHLC 94 | stock_data_daily = yf.Ticker(stock+".NS").history(start=start_date, end=end_date,interval=data_interval_daily,auto_adjust=False, prepost=False) 95 | # Drop those with NaN 96 | stock_data_daily = stock_data_daily.dropna() 97 | 98 | # Calculate the entire series of reversion and expansion -- daily 99 | rev_exp_data = calculateReversionExpansion(stock_data_daily) 100 | result_daily = checkforGreenDot(rev_exp_data) 101 | 102 | # Weekly data 103 | stock_data_weekly = yf.Ticker(stock+".NS").history(start=start_date, end=end_date,interval=data_interval_weekly,auto_adjust=False, prepost=False) 104 | # Drop those with NaN 105 | stock_data_weekly = stock_data_weekly.dropna() 106 | 107 | # Calculate the entire series of reversion and expansion -- weekly 108 | rev_exp_data_weekly = calculateReversionExpansion(stock_data_weekly) 109 | result_weekly = checkforGreenDot(rev_exp_data_weekly) 110 | 111 | condition = result_daily[0] or result_weekly[0] 112 | if (condition): 113 | row = {'stock': stock, 'dailyXoverDate': str(result_daily[1]), 'dailyDelta': str(result_daily[2]), 'weeklyXoverDate': str(result_weekly[1]), 'weeklyDelta': str(result_weekly[2])} 114 | # Append the new row to the DataFrame 115 | result_df.loc[len(result_df)] = row 116 | 117 | except Exception as e: 118 | print("Error: " + stock) 119 | print(e) 120 | 121 | # Append current timestamp to the file name 122 | now = datetime.datetime.now() 123 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S") 124 | file_name = 'green_dot_' + timestamp + '.csv' 125 | # Export the DataFrame to CSV 126 | result_df.to_csv(output_path + "/" + file_name, index=False) 127 | 128 | 129 | if __name__ == "__main__": 130 | main() 131 | 132 | -------------------------------------------------------------------------------- /py/yf/trendreversal_ha.py: -------------------------------------------------------------------------------- 1 | ''' 2 | We tryto analyze trend reversal in stocks with major corrections 3 | In order to reduce noise we select monthly candles and further use HA 4 | 5 consecutive red candles, followed by 2 green candles should be a clean trend reversal 5 | These reversals must be validated with price action on lower timeframes. 6 | Also, one just confirm demand, by checking limevolumes. 7 | Relative strength across benchmark and sector must be checked. 8 | ''' 9 | import yfinance as yf 10 | import pandas as pd 11 | import datetime 12 | 13 | # Folder location 14 | output = 'output' 15 | 16 | # Read the list of stocks from the CSV file 17 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"]) 18 | 19 | # Set the time frame to max 20 | time_frame = 'max' 21 | 22 | # Set the bar time frame 23 | data_interval = '1mo' 24 | 25 | # Crore 26 | One_Cr = 10000000 27 | 28 | def create_HA_Candles(df): 29 | 30 | # Create a new DataFrame to store the Heikin-Ashi values 31 | heikin_ashi_data = pd.DataFrame(index=df.index) 32 | 33 | if (len(df) < 2): # We need at least 2 34 | return heikin_ashi_data 35 | 36 | # Append the 'High' and 'Low' columns from the original data 37 | heikin_ashi_data[['High', 'Low']] = df[['High', 'Low']] 38 | # Calculate the Heikin-Ashi open, close, high, and low values 39 | heikin_ashi_data['HA_Close'] = (df['Open'] + df['High'] + df['Low'] + df['Close']) / 4 40 | # Handle the first row separately 41 | first_row_open = (df['Open'][0] + df['Close'][0]) / 2 42 | heikin_ashi_data['HA_Open'] = first_row_open 43 | # Calculate HA_Open correctly for subsequent rows 44 | for i in range(1, len(heikin_ashi_data)): 45 | heikin_ashi_data['HA_Open'][i] = (heikin_ashi_data['HA_Open'][i-1] + heikin_ashi_data['HA_Close'][i-1]) / 2 46 | 47 | heikin_ashi_data['HA_High'] = heikin_ashi_data[['HA_Open', 'HA_Close', 'High']].max(axis=1) 48 | heikin_ashi_data['HA_Low'] = heikin_ashi_data[['HA_Open', 'HA_Close', 'Low']].min(axis=1) 49 | 50 | # Drop the 'High' and 'Low' columns 51 | heikin_ashi_data.drop(['High', 'Low'], axis=1, inplace=True) 52 | 53 | #print(heikin_ashi_data.tail(5)) 54 | return heikin_ashi_data 55 | 56 | 57 | def check_trend_change(df): 58 | # Check for the first 5 candles as red and the last 2 candles as green 59 | last_7_candles = df.tail(7) # Select the last 7 candles 60 | 61 | red_candles_count = 0 62 | green_candles_count = 0 63 | valid_pattern = False 64 | 65 | for i in range(5): 66 | candle = last_7_candles.iloc[i] 67 | if candle['HA_Close'] < candle['HA_Open']: 68 | red_candles_count += 1 69 | else: 70 | break 71 | 72 | for i in range(5, 7): 73 | candle = last_7_candles.iloc[i] 74 | if candle['HA_Close'] > candle['HA_Open']: 75 | green_candles_count += 1 76 | else: 77 | break 78 | 79 | if red_candles_count == 5 and green_candles_count == 2: 80 | valid_pattern = True 81 | 82 | return valid_pattern 83 | 84 | 85 | def main(): 86 | print("Started... ") 87 | # Create the DataFrame 88 | df = pd.DataFrame(columns=['stock', 'mcap', 'vol1', 'vol2d', 'vol3d', 'sector' , 'industry']) 89 | 90 | # Iterate through the list of stocks 91 | for stock in stocks["Ticker"]: 92 | try: 93 | # Get the stock data from yfinance, dont adjust OHLC 94 | stk_ticker = yf.Ticker(stock+".NS") 95 | data = stk_ticker.history(period=time_frame,interval=data_interval,auto_adjust=False) 96 | # Drop those with NaN 97 | data = data.dropna() 98 | if (len(data) < 2): # cannot do much analysis with 2 month candle 99 | continue 100 | # Drop last row, if 2nd last is already of the month 101 | if data.index[-1].month == data.index[-2].month: 102 | # Replace the values in the second-to-last row with the values in the last row 103 | data.loc[data.index[-2]] = data.loc[data.index[-1]] 104 | # Delete the last row 105 | data = data.drop(data.index[-1]) 106 | 107 | heikin_ashi_data = create_HA_Candles(data) 108 | if (len(heikin_ashi_data) < 7) : 109 | print(f'Skipped for {stock} too less data') 110 | 111 | # Merge it to data 112 | heikin_ashi_data = heikin_ashi_data.join(data) 113 | 114 | # Check if there is a trend change 115 | if check_trend_change(heikin_ashi_data): 116 | sector = '' 117 | industry = '' 118 | marketCap = '' 119 | try: 120 | if stk_ticker.info: 121 | sector = stk_ticker.info['sector'] 122 | industry = stk_ticker.info['industry'] 123 | marketCap = round(stk_ticker.info['marketCap'] / One_Cr, 0) 124 | except Exception as err: 125 | pass 126 | 127 | # Get volume data 128 | vols = data.tail(3)['Volume'] 129 | vol1 = vols[0] 130 | vol2d = vols[1] - vol1 131 | vol3d = vols[2] - vols[1] 132 | 133 | # Append to row 134 | row = {'stock': stock, 'mcap' : marketCap, 'vol1' : vol1, 'vol2d' : vol2d,'vol3d' : vol3d, 'sector' : sector, 'industry' : industry} 135 | # Append the new row to the DataFrame 136 | df.loc[len(df)] = row 137 | 138 | except Exception as e: 139 | print(f'Error for ticker {stock} ==> {e}') 140 | # Append current timestamp to the file name 141 | now = datetime.datetime.now() 142 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S") 143 | file_name = f'{output}/ha_trendreversal_{timestamp}.csv' 144 | # Export the DataFrame to CSV 145 | df.to_csv(file_name, index=False) 146 | print('Done') 147 | 148 | 149 | if __name__ == "__main__": 150 | main() 151 | -------------------------------------------------------------------------------- /py/eodhd/saucer_crs.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A script to determine a trend reversal. This script uses Relative Strength (Stock Price / Benchmark ratio). 3 | The script calculates the moving average of the relative strength values for a specified length (avg_length). 4 | It determines the current trend of this average, based on the following logic: 5 | - If the value of the average is rising means greater that max of any of last 3 (trend_length) weeks, the trend is considered uptrend. This is denoted by letter G. 6 | - If the value of the average is falling means less than minimum of any of last 3 (trend_length) weeks, the trend is considered downtrend. This is denoted by letter R. 7 | - If the value of the average is neither rising nor falling, the trend is considered sideways. This is denoted by letter S. 8 | Next, the script will create a string of these trends (G,R,S) for the last 26 (analysis_window) weeks, with the most recent week being the last character in the string. 9 | It will save this string in the output column 'Trend' of the output CSV file. 10 | ''' 11 | 12 | import pandas as pd 13 | import pricereader as pr 14 | import datetime 15 | 16 | # Set output folder path 17 | output_path = "output" 18 | 19 | # Read the list of stocks from the CSV file 20 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"]) 21 | 22 | # Specify the benchmark symbol 23 | benchmark = "NSEI" 24 | 25 | # Interval 26 | data_interval_weekly = 'w' 27 | 28 | # Weekly CRS Average length 29 | avg_length = 52 # Weeks 30 | ratio_col = f'ratio{avg_length}W' 31 | 32 | # Trend length 33 | trend_length = 3 # Weeks 34 | 35 | # Window of analysis 36 | analysis_window = 26 # Weeks 37 | 38 | def ratio_mean(data, benchmark_data, avg_length): 39 | # Calculate the relative strength of the stock by dividing its weekly closing price by the weekly closing price of the Nifty 50 index 40 | relative_strength = data['Close'] / benchmark_data['Close'] 41 | data[f'relativeRatio'] = relative_strength 42 | # print(relative_strength.tail(10)) 43 | 44 | # Calculate the mean of the relative strength values for length 45 | data[ratio_col] = relative_strength.rolling(window=avg_length).mean() 46 | return data 47 | 48 | 49 | def rising(source, length): 50 | return source > source.shift(1).rolling(window=length).max() 51 | 52 | def falling(source, length): 53 | return source < source.shift(1).rolling(window=length).min() 54 | 55 | def sideways(source, length): 56 | # Sideways is true when not rising and not falling 57 | is_rising = rising(source, length) 58 | is_falling = falling(source, length) 59 | return ~(is_rising | is_falling) # Not rising and not falling 60 | 61 | def detect_reversal(sequence, initial_count, initial_type, transition_length, final_pattern): 62 | if sequence[:initial_count].count(initial_type) >= initial_count and sequence[-len(final_pattern):] == final_pattern: 63 | return True 64 | return False 65 | 66 | def main(): 67 | print("Started...") 68 | # Create the DataFrame 69 | result_df = pd.DataFrame(columns=['stock', 'Trend Sequence', 'Reversal Message']) 70 | 71 | # Benchmark data 72 | benchmark_data = pr.get_price_data(benchmark, data_interval_weekly) 73 | benchmark_data = benchmark_data.dropna() 74 | 75 | # Iterate through the list of stocks 76 | for stock in stocks["Ticker"]: 77 | try: 78 | # Get the stock data, sample as below. Latest data is at the end 79 | ''' 80 | Date,Open,High,Low,Close,Volume,Adj Close 81 | 2017-11-16,400.0,400.0,361.0,361.0,29447,361.0 82 | 2017-11-20,343.0,343.0,279.45,279.45,5389,279.45 83 | 2017-11-27,265.5,265.5,194.15,206.45,613081,206.45 84 | 2017-12-04,196.0,227.55,181.0,227.55,615553,227.55 85 | 2017-12-11,238.9,290.25,238.9,290.25,87251,290.25 86 | ''' 87 | data = pr.get_price_data(stock, data_interval_weekly) 88 | # Drop those with NaN 89 | data = data.dropna() 90 | 91 | # Calculate the relative ratio and average avg_lengthW 92 | data = ratio_mean(data, benchmark_data, avg_length) 93 | 94 | # Apply the rising, falling, and sideways functions 95 | data['MA_rising'] = rising(data[ratio_col], trend_length) 96 | data['MA_falling'] = falling(data[ratio_col],trend_length) 97 | data['MA_sideways'] = sideways(data[ratio_col], trend_length) 98 | 99 | # Extract the last analysis_window rows 100 | analysis_data = data[['MA_rising', 'MA_falling', 'MA_sideways']].tail(analysis_window) 101 | 102 | # Create a sequence string from the last 13 rows 103 | sequence = ''.join(['G' if row['MA_rising'] else 'R' if row['MA_falling'] else 'S' for index, row in analysis_data.iterrows()]) 104 | 105 | # Detect reversals, 14 weeks of current trend and 4 weeks of opposite trend, in between we do not care 106 | bullish_reversal = detect_reversal(sequence, 14, 'R', 4, 'GG') 107 | bearish_reversal = detect_reversal(sequence, 14, 'G', 4, 'RR') 108 | 109 | # Determine reversal message 110 | reversal_message = "" 111 | if bullish_reversal: 112 | reversal_message = "Bullish reversal detected." 113 | elif bearish_reversal: 114 | reversal_message = "Bearish reversal detected." 115 | 116 | # Save the results to the DataFrame 117 | row = {'stock': stock, 'Trend Sequence': sequence, 'Reversal Message': reversal_message} 118 | # Append the new row to the DataFrame 119 | result_df.loc[len(result_df)] = row 120 | except Exception as e: 121 | print("Error: " + stock) 122 | print(e) 123 | 124 | # Append current timestamp to the file name 125 | now = datetime.datetime.now() 126 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S") 127 | file_name = 'weeklyRS_Saucer_' + timestamp + '.csv' 128 | # Export the DataFrame to CSV 129 | result_df.to_csv(output_path + "/" + file_name, index=False) 130 | print('Done') 131 | 132 | if __name__ == "__main__": 133 | main() -------------------------------------------------------------------------------- /py/yf/supply_exhaustion_6m_scan.py: -------------------------------------------------------------------------------- 1 | import yfinance as yf 2 | import pandas as pd 3 | import os 4 | from datetime import datetime, timedelta 5 | 6 | # Set output folder path 7 | output_path = "output" 8 | 9 | # Read the list of stocks from the CSV file 10 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"]) 11 | 12 | # Set start Date 13 | start_date = '2021-01-24' 14 | 15 | # Set end Date 16 | end_date = '2023-01-25' 17 | 18 | # Interval 19 | data_interval = '1d' 20 | 21 | # lowest close lookback dataset length 22 | lowest_low_lookback = 250 23 | 24 | # minimum days since last lowest close 25 | minimum_low_length = 123 26 | 27 | # mimnum days since last peak after lowest close 28 | minimum_days_since_high = 55 29 | 30 | # determine highest close in the dataset , Priorr to lowest low 31 | def highestClose(stock_data): 32 | highest_close = stock_data["Close"][0] 33 | highest_close_date = stock_data.index[0] 34 | for i in range(1, len(stock_data)): 35 | if stock_data["Close"][i] >= highest_close: 36 | highest_close = stock_data["Close"][i] 37 | highest_close_date = stock_data.index[i] 38 | 39 | return [highest_close, highest_close_date] 40 | 41 | 42 | # determine if lowest close was minimum_low_length ago. 43 | def lowestLow(stock_data): 44 | 45 | lowest_close = stock_data["Close"][0] 46 | lowest_close_date = stock_data.index[0] 47 | lowest_close_idx = 0 48 | for i in range(1, len(stock_data)): 49 | if stock_data["Close"][i] <= lowest_close: 50 | lowest_close = stock_data["Close"][i] 51 | lowest_close_date = stock_data.index[i] 52 | lowest_close_idx = i 53 | if len(stock_data) - lowest_close_idx >= minimum_low_length: 54 | return [True, lowest_close, lowest_close_date] 55 | else: 56 | return [False, '', ''] 57 | 58 | def write_dataframe_to_file(df, name): 59 | # Get the current timestamp 60 | timestamp = datetime.now().strftime("%Y%m%d%H%M%S") 61 | 62 | # Create the filename 63 | filename = f'{name}_{timestamp}.csv' 64 | # Save the DataFrame as a CSV file with specific column names as the header 65 | df.to_csv(output_path + "/" + filename, index=False, columns=["Stock", "Lowest Close", "Low Date", "High Prior", "High Prior Date", "23_6 Retrace", \ 66 | "38_2 Retrace", "50_0 Retrace", "Curr/High %"]) 67 | 68 | 69 | def main(): 70 | print("Started...") 71 | # create an empty dataframe to store the results 72 | results_df = pd.DataFrame(columns=["Stock", "Lowest Close", "Low Date", "High Prior", "High Prior Date", "23_6 Retrace", "38_2 Retrace", \ 73 | "50_0 Retrace", "Curr/High %"]) 74 | # Iterate through the list of stocks 75 | for stock in stocks["Ticker"]: 76 | try: 77 | result_lowestLow = [False, '', ''] 78 | below_23_6 = False 79 | below_38_2 = False 80 | below_50 = False 81 | 82 | # Get the stock data 83 | # Get the stock data from yfinance, dont adjust OHLC 84 | stock_data = yf.Ticker(stock+".NS").history(start=start_date, end=end_date,interval=data_interval,auto_adjust=False, prepost=False) 85 | # Drop those with NaN 86 | stock_data = stock_data.dropna() 87 | 88 | # Lowest low should be beyond last minimum_low_length months 89 | result_lowestLow = lowestLow(stock_data.tail(lowest_low_lookback)) 90 | lowest_low_condition = result_lowestLow[0] 91 | lowest_low_close = result_lowestLow[1] 92 | lowest_low_date = result_lowestLow[2] 93 | 94 | # if lowest low condition is met, find out max in the data set Priorr to lowest low date 95 | if (lowest_low_condition): 96 | # Get dataset upto lowest_low_date 97 | before_low_data = stock_data.loc[stock_data.index < lowest_low_date] 98 | 99 | # Get highest Priorr to low 100 | result_highestClosePriorr = highestClose(before_low_data) 101 | highest_Priorr_close = result_highestClosePriorr[0] 102 | highest_Priorr_date = result_highestClosePriorr[1] 103 | 104 | # Calcualte difference between close and high 105 | diff = (highest_Priorr_close - lowest_low_close) 106 | # 23.6%, 38.2% and 50% retracement value 107 | level_23_6 = lowest_low_close + (diff * 0.236) 108 | level_38_2 = lowest_low_close + (diff * 0.382) 109 | level_50 = lowest_low_close + (diff * 0.50) 110 | 111 | # Get dataset after lowest_low_date 112 | after_low_data = stock_data.loc[stock_data.index > lowest_low_date] 113 | # Get highest after low 114 | result_highestCloseAfter = highestClose(after_low_data) 115 | highest_after_close = result_highestCloseAfter[0] 116 | highest_after_date = result_highestCloseAfter[1] 117 | 118 | # Check if the highest close, is within the retracement level 119 | if highest_after_close <= level_50: 120 | below_50 = True 121 | if highest_after_close <= level_38_2: 122 | below_38_2 = True 123 | if highest_after_close <= level_23_6: 124 | below_23_6 = True 125 | # Calculate distance of current price with respect to the highest value in the retracement 126 | current_close = stock_data["Close"].tail(1).values[-1] 127 | curr_diff = round(((current_close - highest_after_close) / (highest_after_close)) * 100, 2) 128 | 129 | if (below_50 or below_23_6 or below_38_2): 130 | new_row = pd.DataFrame({"Stock": stock, "Lowest Close": lowest_low_close, "Low Date": lowest_low_date, "High Prior": highest_Priorr_close, \ 131 | "High Prior Date": highest_Priorr_date, "23_6 Retrace": below_23_6, "38_2 Retrace": below_38_2, "50_0 Retrace": below_50, \ 132 | "Curr/High %": curr_diff}, index=[0]) 133 | results_df = pd.concat([results_df, new_row]) 134 | 135 | except Exception as e: 136 | print("Error: " + stock) 137 | print(e) 138 | 139 | # print(results_df) 140 | write_dataframe_to_file(results_df, "Supply_Exhaustion_6M_") 141 | print("Done") 142 | 143 | if __name__ == "__main__": 144 | main() 145 | -------------------------------------------------------------------------------- /py/ai/fininsightgpt/src/master_file_generator.py: -------------------------------------------------------------------------------- 1 | """ 2 | Master File Generator Module 3 | 4 | This module handles the creation of the consolidated master markdown file from individual markdown files. 5 | """ 6 | 7 | import os 8 | import re 9 | import logging 10 | from pathlib import Path 11 | from typing import List, Optional 12 | import datetime 13 | 14 | # Configure logging 15 | logging.basicConfig( 16 | level=logging.INFO, 17 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' 18 | ) 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | def generate_master_file( 23 | company_name: str, 24 | markdown_files: List[str], 25 | output_dir: Optional[str] = None 26 | ) -> str: 27 | """Generate a consolidated master markdown file for a company. 28 | 29 | Args: 30 | company_name: Name of the company 31 | markdown_files: List of paths to markdown files to include 32 | output_dir: Directory to save the master file (defaults to company folder) 33 | 34 | Returns: 35 | Path to the generated master file 36 | """ 37 | logger.info(f"Generating master file for {company_name} from {len(markdown_files)} markdown files") 38 | 39 | # Create timestamp for the master file 40 | timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") 41 | master_filename = f"{company_name}_master_{timestamp}.md" 42 | 43 | # Determine output directory 44 | if output_dir is None: 45 | # Try to infer from the first markdown file 46 | if markdown_files: 47 | first_file = Path(markdown_files[0]) 48 | output_dir = first_file.parent.parent # Go up one level from processed/ 49 | else: 50 | output_dir = os.getcwd() 51 | 52 | output_path = Path(output_dir) / master_filename 53 | 54 | # Prepare master file content 55 | master_content = [ 56 | f"# {company_name.upper()} - Consolidated Analysis", 57 | f"Generated on: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", 58 | f"Number of source documents: {len(markdown_files)}", 59 | "\n---\n" 60 | ] 61 | 62 | # Table of Contents 63 | toc = ["## Table of Contents"] 64 | 65 | # Track sections for organizing content 66 | sections = { 67 | "Financial Data": [], 68 | "Business Overview": [], 69 | "Management": [], 70 | "Industry Analysis": [], 71 | "News & Media": [], 72 | "Miscellaneous": [] 73 | } 74 | 75 | # Process each markdown file 76 | for idx, md_file in enumerate(markdown_files): 77 | try: 78 | with open(md_file, 'r', encoding='utf-8') as f: 79 | content = f.read() 80 | 81 | # Extract filename for reference 82 | filename = Path(md_file).stem 83 | 84 | # Determine section based on content keywords 85 | section = "Miscellaneous" 86 | content_lower = content.lower() 87 | 88 | if any(kw in content_lower for kw in ["profit", "revenue", "financial", "balance sheet", "income", "statement", "ratio"]): 89 | section = "Financial Data" 90 | elif any(kw in content_lower for kw in ["business", "product", "service", "segment", "overview"]): 91 | section = "Business Overview" 92 | elif any(kw in content_lower for kw in ["ceo", "director", "management", "board"]): 93 | section = "Management" 94 | elif any(kw in content_lower for kw in ["industry", "market", "competitor", "competition"]): 95 | section = "Industry Analysis" 96 | elif any(kw in content_lower for kw in ["news", "press", "announcement", "media"]): 97 | section = "News & Media" 98 | 99 | # Add to appropriate section 100 | sections[section].append((filename, content)) 101 | 102 | # Add to TOC 103 | toc.append(f"- [{filename}](#{filename.lower().replace(' ', '-')})") 104 | 105 | except Exception as e: 106 | logger.error(f"Error processing markdown file {md_file}: {str(e)}") 107 | sections["Miscellaneous"].append(( 108 | f"Error_{idx}", 109 | f"Error processing file {md_file}: {str(e)}" 110 | )) 111 | 112 | # Add TOC to master content 113 | master_content.extend(toc) 114 | master_content.append("\n---\n") 115 | 116 | # Add content by section 117 | for section_name, section_contents in sections.items(): 118 | if section_contents: 119 | master_content.append(f"# {section_name}") 120 | 121 | for filename, content in section_contents: 122 | # Add section anchor 123 | master_content.append(f"") 124 | 125 | # Clean up the content by removing the first heading if it matches the filename 126 | # This avoids duplication with our added heading 127 | content_lines = content.split("\n") 128 | if len(content_lines) > 0 and content_lines[0].startswith("# ") and filename in content_lines[0]: 129 | content = "\n".join(content_lines[1:]) 130 | 131 | master_content.append(f"## {filename}") 132 | master_content.append(content) 133 | master_content.append("\n---\n") 134 | 135 | # Add metadata and summary section 136 | master_content.append("# Metadata") 137 | master_content.append("## Document Sources") 138 | 139 | sources_table = ["| Source | Type | Date Included |"] 140 | sources_table.append("| --- | --- | --- |") 141 | 142 | for md_file in markdown_files: 143 | file_path = Path(md_file) 144 | file_type = file_path.suffix 145 | file_date = datetime.datetime.fromtimestamp(os.path.getmtime(md_file)).strftime('%Y-%m-%d') 146 | sources_table.append(f"| {file_path.stem} | {file_type} | {file_date} |") 147 | 148 | master_content.extend(sources_table) 149 | 150 | # Write the master file 151 | try: 152 | with open(output_path, 'w', encoding='utf-8') as f: 153 | f.write("\n\n".join(master_content)) 154 | logger.info(f"Master file generated: {output_path}") 155 | except Exception as e: 156 | logger.error(f"Error writing master file: {str(e)}") 157 | return "" 158 | 159 | return str(output_path) -------------------------------------------------------------------------------- /py/eodhd/gareebman_entry_exit.py: -------------------------------------------------------------------------------- 1 | ''' 2 | We are working here on identifying my favorite point of a company's business 3 | when there is a turn around. This analysis will try to capture from a price 4 | movement perspective. 5 | 6 | We solely rely on technical indicators for shortlisting in this scan. Ideally 7 | we should look for long bases, and then, we see if price is bottoming and 8 | then picking up. 9 | 10 | To keep it simple, we will track only RSI and Volstop. 11 | For favourable entries into watchlist we will look for, (in weekly timeframe) 12 | rsi > threshold (45) and volstop in uptrend. We will check with the previous 13 | weeks to see if we had a "False", and now we have a "True". This means entry. 14 | We dont expect to see too many flip-flops. 15 | We are also defining an exit (from the watchlist), if volstop is in downtrend. 16 | Again the same logic of comparing with previous week will apply. 17 | 18 | We are also keeping a count of the current "entry" or "exit". So, let us say 19 | a "trend" is "entry" and "duration" is 8, it means entry condition satisfied 20 | 8 bars ago and continues to remain "entry" (without "exit" condition triggered) 21 | 22 | So, do not confuse with the normal "entry" - "exit" terminology and method of 23 | trading. "entry" doesnt mean sell your house and take position. It means start 24 | to track it. 25 | ''' 26 | 27 | import pandas as pd 28 | import numpy as np 29 | import ta 30 | from ta.volatility import AverageTrueRange 31 | import datetime 32 | import pricereader as pr 33 | 34 | # Set output folder path 35 | output_path = "output" 36 | 37 | # Read the list of stocks from the CSV file 38 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"]) 39 | 40 | # Interval 41 | data_interval_weekly = 'w' 42 | 43 | # RSI interval 44 | rsi_length = 14 45 | # RSI weekly threshold 46 | rsi_weekly_threshold = 45 47 | 48 | def rsi(data): 49 | # Calculate the RSI 50 | data['rsi'] = ta.momentum.RSIIndicator(data['Close'], window=rsi_length).rsi() 51 | return data 52 | 53 | def calculate_true_range(df): 54 | high_low = df['High'] - df['Low'] 55 | high_close = np.abs(df['High'] - df['Close'].shift()) 56 | low_close = np.abs(df['Low'] - df['Close'].shift()) 57 | true_ranges = pd.concat([high_low, high_close, low_close], axis=1) 58 | return true_ranges.max(axis=1) 59 | 60 | def calculate_atr(df, atrlen): 61 | df['TR'] = calculate_true_range(df) 62 | return df['TR'].rolling(window=atrlen, min_periods=1).mean() 63 | 64 | def vol_stop(df, atrlen=10, atrfactor=2.0): 65 | df['ATR'] = calculate_atr(df, atrlen) * atrfactor 66 | max_val = df['Close'].iloc[0] 67 | min_val = df['Close'].iloc[0] 68 | uptrend = True 69 | stop = 0.0 70 | 71 | stops = [] 72 | uptrends = [] 73 | 74 | for index, row in df.iterrows(): 75 | max_val = max(max_val, row['Close']) 76 | min_val = min(min_val, row['Close']) 77 | atrM = row['ATR'] 78 | 79 | if uptrend: 80 | stop = max(stop, max_val - atrM) 81 | else: 82 | stop = min(stop, min_val + atrM) 83 | 84 | if row['Close'] - stop >= 0.0: 85 | uptrend = True 86 | else: 87 | uptrend = False 88 | 89 | if uptrend != uptrends[-1] if uptrends else True: 90 | max_val = row['Close'] 91 | min_val = row['Close'] 92 | stop = max_val - atrM if uptrend else min_val + atrM 93 | 94 | stops.append(stop) 95 | uptrends.append(uptrend) 96 | 97 | df['VolStop'] = stops 98 | df['Uptrend'] = uptrends 99 | return df 100 | 101 | def main(): 102 | print("Started...") 103 | # Create the DataFrame 104 | result_df = pd.DataFrame(columns=['stock', 'Close', 'VolStop10_2.0', 'RSI(14)', 'Entry', 'Exit', 'Trend', 'Duration']) 105 | # Iterate through the list of stocks 106 | for stock in stocks["Ticker"]: 107 | try: 108 | # Get the stock data 109 | data = pr.get_price_data(stock, data_interval_weekly) 110 | # Drop those with NaN 111 | data = data.dropna() 112 | 113 | # Get RSI data 114 | data = rsi(data) 115 | 116 | # Get VolStop 117 | data = vol_stop(data) 118 | 119 | # Creating the 'entry' column 120 | data['entry'] = (data['rsi'] > rsi_weekly_threshold) & data['Uptrend'] 121 | 122 | # Creating the 'exit' column 123 | data['exit'] = ~data['Uptrend'] 124 | 125 | # Check entry toggle 126 | entry = data['entry'].iloc[-1] and not data['entry'].iloc[-2] 127 | 128 | # Check exit toggle 129 | exit = data['exit'].iloc[-1] and not data['exit'].iloc[-2] 130 | 131 | # Combine the 'entry' and 'exit' columns into a single column representing the current trend 132 | data['trend'] = np.where(data['entry'], 'entry', 'exit') 133 | 134 | # Identify where the trend changes 135 | trend_changes = data['trend'].ne(data['trend'].shift()).cumsum() 136 | 137 | # Group by these changes and count within each group 138 | data['trend_duration'] = data.groupby(trend_changes).cumcount() + 1 139 | 140 | row = {} 141 | 142 | if (entry or exit): 143 | row = {'stock': stock,'Close': str(round(data['Close'].iloc[-1], 2)),'VolStop10_2.0':str(round(data['VolStop'].iloc[-1])), \ 144 | 'RSI(14)':str(round(data['rsi'].iloc[-1])), 'Trend': data['trend'].iloc[-1], \ 145 | 'Duration': data['trend_duration'].iloc[-1], 'Entry':entry,'Exit':exit} 146 | else: 147 | row = {'stock': stock,'Close': str(round(data['Close'].iloc[-1], 2)),'VolStop10_2.0':str(round(data['VolStop'].iloc[-1])), \ 148 | 'RSI(14)':str(round(data['rsi'].iloc[-1])), 'Trend': data['trend'].iloc[-1], \ 149 | 'Duration': data['trend_duration'].iloc[-1], 'Entry':'-','Exit':'-'} 150 | 151 | # Append the new row to the DataFrame 152 | result_df.loc[len(result_df)] = row 153 | 154 | except Exception as e: 155 | print("Error: " + stock) 156 | print(e) 157 | 158 | # Append current timestamp to the file name 159 | now = datetime.datetime.now() 160 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S") 161 | file_name = f'{output_path}/gareebman_report_' + timestamp + '.csv' 162 | # Export the DataFrame to CSV 163 | result_df.to_csv(file_name, index=False) 164 | print('Done') 165 | 166 | if __name__ == "__main__": 167 | main() 168 | -------------------------------------------------------------------------------- /py/ai/turnaround/README.md: -------------------------------------------------------------------------------- 1 | # Business Turnaround Detection System 2 | 3 | An AI-powered financial analysis tool that identifies potential business turnarounds by analyzing companies listed in a CSV file. The system uses advanced AI agents to research financial data and market conditions for each company, generating comprehensive markdown reports with turnaround potential verdicts. 4 | 5 | ## 🎯 Purpose 6 | 7 | This tool is designed to help investors and analysts identify companies that may be experiencing business turnarounds by: 8 | - Fetching latest financial reports and news 9 | - Analyzing financial health indicators 10 | - Determining turnaround potential with AI-driven insights 11 | - Generating structured markdown reports for each company 12 | 13 | ## 📁 Project Structure 14 | 15 | ``` 16 | turnaround/ 17 | ├── main.py # Main execution script 18 | ├── data/ 19 | │ └── financial_data.csv # Input CSV with company data 20 | ├── my_tools/ # Custom tools for the AI agent 21 | │ ├── __init__.py 22 | │ ├── cmd_executor.py # Shell command execution tool 23 | │ ├── fs_reader.py # File system reader tool 24 | │ ├── markdown_report.py # Report generation tool 25 | │ └── web_fetcher.py # Web search tool 26 | ├── output/ # Generated reports directory 27 | └── README.md # This file 28 | ``` 29 | 30 | ## 🔧 Prerequisites 31 | 32 | Before running this project, ensure you have: 33 | 34 | 1. **Python 3.8+** installed 35 | 2. **OpenAI API Key** - Required for the AI agent 36 | 3. **Internet connection** - For web research functionality 37 | 38 | ## 📦 Installation & Setup 39 | 40 | ### 1. Install Required Dependencies 41 | 42 | #### Option A: Using requirements.txt (Recommended) 43 | ```bash 44 | pip install -r requirements.txt 45 | ``` 46 | 47 | #### Option B: Manual Installation 48 | ```bash 49 | pip install smolagents python-dotenv openai litellm pandas numpy requests 50 | ``` 51 | 52 | ### 2. Environment Configuration 53 | 54 | Create a `.env` file in the project root directory: 55 | 56 | ```bash 57 | touch .env 58 | ``` 59 | 60 | Add your OpenAI API key to the `.env` file: 61 | 62 | ``` 63 | OPENAI_API_KEY=your_openai_api_key_here 64 | ``` 65 | 66 | ### 3. Prepare Input Data 67 | 68 | Ensure your `data/financial_data.csv` file follows this format: 69 | 70 | ```csv 71 | Name,BSE Code,NSE Code 72 | 63 Moons Tech.,526881,63MOONS 73 | Apex Frozen Food,540692,APEX 74 | Arman Financial,531179,ARMANFIN 75 | ``` 76 | 77 | **Required Columns:** 78 | - `Name`: Company name (required) 79 | - `BSE Code`: Bombay Stock Exchange code (optional) 80 | - `NSE Code`: National Stock Exchange code (optional) 81 | 82 | ### 4. Create Output Directory 83 | 84 | ```bash 85 | mkdir -p output 86 | ``` 87 | 88 | ## 🚀 Usage 89 | 90 | ### Basic Execution 91 | 92 | Run the turnaround analysis: 93 | 94 | ```bash 95 | cd /path/to/turnaround 96 | python main.py 97 | ``` 98 | 99 | ### What Happens During Execution 100 | 101 | 1. **Data Loading**: Reads companies from `data/financial_data.csv` 102 | 2. **AI Analysis**: For each company, the AI agent: 103 | - Searches web for latest financial reports 104 | - Gathers recent news and market data 105 | - Analyzes financial health indicators 106 | - Determines turnaround potential 107 | 3. **Report Generation**: Creates detailed markdown reports in the `output/` directory 108 | 109 | ### Sample Output 110 | 111 | Reports are saved as: `output/{business_name}{timestamp}_report.md` 112 | 113 | Each report includes: 114 | - **Business Name & Codes** 115 | - **Summary of Financial Data** 116 | - **Analysis of Financial Health** 117 | - **Turnaround Potential Verdict**: "Strong Turnaround", "Weak Turnaround", or "No Turnaround" 118 | 119 | ## 🔧 Configuration 120 | 121 | ### Model Configuration 122 | 123 | The system uses OpenAI's GPT-4.1-mini by default. To change the model, modify the `model` variable in `main.py`: 124 | 125 | ```python 126 | model = LiteLLMModel(model_id="openai/gpt-4-turbo", api_key=os.getenv("OPENAI_API_KEY")) 127 | ``` 128 | 129 | ### Analysis Steps 130 | 131 | The AI agent follows these steps: 132 | 1. Company identification and code mapping 133 | 2. Web research for financial data and news 134 | 3. Financial health analysis 135 | 4. Turnaround potential assessment 136 | 5. Report generation and saving 137 | 138 | ## 📊 Best Practices 139 | 140 | ### When to Run 141 | - **Ideal timing**: After quarterly earnings season 142 | - **Frequency**: Quarterly or semi-annually for best results 143 | - **Market conditions**: Consider running during market downturns for maximum turnaround identification 144 | 145 | ### Data Quality 146 | - Ensure company names and stock codes are accurate 147 | - Remove delisted or defunct companies from the CSV 148 | - Update the CSV with new companies of interest 149 | 150 | ## 🛠️ Troubleshooting 151 | 152 | ### Common Issues 153 | 154 | 1. **Missing API Key** 155 | ``` 156 | Error: OpenAI API key not found 157 | Solution: Check your .env file and ensure OPENAI_API_KEY is set 158 | ``` 159 | 160 | 2. **CSV File Not Found** 161 | ``` 162 | Error: The financial data file data/financial_data.csv does not exist 163 | Solution: Ensure the CSV file exists in the data/ directory 164 | ``` 165 | 166 | 3. **Network Issues** 167 | ``` 168 | Error: Web search failed 169 | Solution: Check internet connection and API quotas 170 | ``` 171 | 172 | 4. **Permission Errors** 173 | ``` 174 | Error: Cannot write to output directory 175 | Solution: Ensure output/ directory exists and has write permissions 176 | ``` 177 | 178 | ### Debugging 179 | 180 | Enable verbose logging by modifying the agent configuration: 181 | 182 | ```python 183 | response = agent.run(final_instructions, max_steps=20, verbose=True) 184 | ``` 185 | 186 | ## 📈 Output Interpretation 187 | 188 | ### Turnaround Verdicts 189 | 190 | - **Strong Turnaround**: Company shows clear signs of recovery with improving fundamentals 191 | - **Weak Turnaround**: Some positive indicators but recovery uncertain 192 | - **No Turnaround**: No significant improvement indicators found 193 | 194 | ### Report Sections 195 | 196 | Each generated report contains: 197 | - Executive summary with verdict 198 | - Financial metrics analysis 199 | - Market sentiment and news analysis 200 | - Risk factors and considerations 201 | - Timeline for potential recovery 202 | 203 | ## 🤝 Contributing 204 | 205 | To enhance this tool: 206 | 1. Add new analysis tools in the `my_tools/` directory 207 | 2. Extend the financial metrics analysis 208 | 3. Improve web scraping capabilities 209 | 4. Add visualization features 210 | 211 | ## ⚠️ Disclaimer 212 | 213 | This tool is for informational purposes only and should not be considered as financial advice. Always conduct thorough due diligence and consult with financial professionals before making investment decisions. 214 | 215 | ## 📝 License 216 | 217 | This project is part of the BharatTrader stock analysis suite. Please refer to the main project license for usage terms. 218 | -------------------------------------------------------------------------------- /py/yf/ss_result_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Stock Result Analysis Script for Screener Source Data 4 | 5 | This script processes stock data from ss_result_file.csv, which contains stock information 6 | from Screener. For each stock, it downloads historical price data from Yahoo Finance 7 | and calculates various performance metrics relative to benchmark. 8 | 9 | The script: 10 | 1. Reads stock information from a CSV file with 'companyId' format as 'NSE:SYMBOL' or 'BSE:SYMBOL' 11 | 2. Downloads historical price data for each stock using yfinance 12 | 3. Calculates performance metrics (stock change %, benchmark change %, Alpha, ARS) 13 | 4. Saves the enriched data to a new CSV file 14 | 15 | Usage: 16 | python ss_result_parser.py 17 | """ 18 | 19 | # Standard library imports 20 | import datetime 21 | import numpy as np 22 | import pandas as pd 23 | import yfinance as yf 24 | 25 | # Constants 26 | ARS_DATE = "2024-05-10" # ARS (Adaptive Relative Strength) reference date 27 | START_DATE = '2024-01-01' # Beginning of analysis period 28 | END_DATE = (datetime.datetime.now() + datetime.timedelta(days=1)).strftime('%Y-%m-%d') # today + 1 day 29 | 30 | RESULT_FILE = "ss_result_file.csv" 31 | OUTPUT_FILE = "final_ss_result_parser.csv" 32 | 33 | 34 | def main(): 35 | """ 36 | Main function to process stock data and calculate performance metrics. 37 | """ 38 | print('Started... with yfinance version:', yf.__version__) 39 | 40 | # Use yfinance to retrieve the benchmark data 41 | benchmark_ticker = yf.Ticker("^NSEI") # NIFTY 50 Index 42 | benchmark_data = benchmark_ticker.history(start=START_DATE, end=END_DATE, interval='1d', auto_adjust=False, prepost=False) 43 | benchmark_data = benchmark_data.dropna() 44 | 45 | # Read the result file 46 | result = pd.read_csv(RESULT_FILE) 47 | result = result.dropna(subset=['companyId']) # Only drop rows with no companyId 48 | 49 | # Process each stock 50 | for index, row in result.iterrows(): 51 | try: 52 | # Extract exchange and symbol from companyId 53 | company_id_parts = row['companyId'].split(':') 54 | exchange = company_id_parts[0] 55 | symbol = company_id_parts[1] 56 | 57 | print(f"Processing {row['Name']}...") 58 | 59 | # Set ticker format based on exchange 60 | if exchange == "NSE": 61 | stk_ticker = symbol + '.NS' 62 | elif exchange == "BSE": 63 | stk_ticker = symbol + '.BO' 64 | else: 65 | print(f"Unknown exchange for {row['companyId']}") 66 | continue 67 | 68 | stk_ticker = yf.Ticker(stk_ticker) 69 | stock_data = stk_ticker.history(start=START_DATE, end=END_DATE, interval='1d', auto_adjust=False, prepost=False) 70 | 71 | if stock_data.empty: 72 | print(f"No data available for {row['companyId']}") 73 | continue 74 | 75 | # Fetch Result Date, and then fetch the price on that date from stock_data. 76 | if pd.isna(row['Last Result Date']): 77 | print(f"No result date for {row['companyId']}") 78 | continue 79 | 80 | result_date = datetime.datetime.strptime(row['Last Result Date'], '%Y-%m-%d').strftime('%Y-%m-%d') 81 | result_price = 0.00 82 | 83 | # Get the last date in the stock data 84 | last_date = stock_data.index[-1].strftime('%Y-%m-%d') 85 | if last_date < result_date: 86 | print(f"Error: {row['companyId']} => Result Date {result_date} is greater than last date in stock data {last_date}") 87 | continue 88 | 89 | # If price not found on result date, try following dates 90 | while result_date <= last_date: 91 | try: 92 | result_price = stock_data.loc[stock_data.index == result_date, "Close"].values[0] 93 | break 94 | except: 95 | result_date = (datetime.datetime.strptime(result_date, '%Y-%m-%d') + datetime.timedelta(days=1)).strftime('%Y-%m-%d') 96 | continue 97 | 98 | # Calculate and add stock performance metrics 99 | add_stock_metrics(result, index, stock_data, result_date, result_price) 100 | 101 | # Calculate and add benchmark performance metrics 102 | add_benchmark_metrics(result, index, benchmark_data, result_date) 103 | 104 | # Calculate alpha and ARS 105 | calculate_comparative_metrics(result, index, stock_data, benchmark_data) 106 | 107 | except Exception as e: 108 | print(f'Error processing {row.get("companyId", "unknown")}: {e}') 109 | continue 110 | 111 | # Save the result file 112 | result.to_csv(OUTPUT_FILE, index=False) 113 | print(f"Processing complete. Results saved to {OUTPUT_FILE}") 114 | 115 | 116 | def add_stock_metrics(result_df, index, stock_data, result_date, result_price): 117 | """ 118 | Calculate and add stock-specific metrics to the result dataframe. 119 | 120 | Args: 121 | result_df: The dataframe containing stock information 122 | index: The row index in the dataframe 123 | stock_data: Historical stock data from yfinance 124 | result_date: The date when the result was announced 125 | result_price: The stock price on the result date 126 | """ 127 | result_df.at[index, 'Result Date Price'] = round(result_price, 2) 128 | result_df.at[index, 'Last Close Date'] = stock_data.index[-1].strftime('%Y-%m-%d') 129 | result_df.at[index, 'Last Close Price'] = round(stock_data['Close'].iloc[-1], 2) 130 | result_df.at[index, '% Stock change'] = round((stock_data['Close'].iloc[-1] - result_price) / result_price * 100, 2) 131 | 132 | 133 | def add_benchmark_metrics(result_df, index, benchmark_data, result_date): 134 | """ 135 | Calculate and add benchmark metrics to the result dataframe. 136 | 137 | Args: 138 | result_df: The dataframe containing stock information 139 | index: The row index in the dataframe 140 | benchmark_data: Historical benchmark data from yfinance 141 | result_date: The date when the result was announced 142 | """ 143 | benchmark_result_price = benchmark_data.loc[benchmark_data.index == result_date, "Close"].values[0] 144 | result_df.at[index, 'Result Date Benchmark Price'] = round(benchmark_result_price, 2) 145 | result_df.at[index, 'Last Benchmark Date'] = benchmark_data.index[-1].strftime('%Y-%m-%d') 146 | result_df.at[index, 'Last Benchmark Price'] = round(benchmark_data['Close'].iloc[-1], 2) 147 | result_df.at[index, '% Benchmark change'] = round((benchmark_data['Close'].iloc[-1] - benchmark_result_price) / benchmark_result_price * 100, 2) 148 | 149 | 150 | def calculate_comparative_metrics(result_df, index, stock_data, benchmark_data): 151 | """ 152 | Calculate comparative performance metrics like Alpha and ARS. 153 | 154 | Args: 155 | result_df: The dataframe containing stock information 156 | index: The row index in the dataframe 157 | stock_data: Historical stock data from yfinance 158 | benchmark_data: Historical benchmark data from yfinance 159 | """ 160 | # Calculate alpha (stock performance relative to benchmark) 161 | result_df.at[index, 'Alpha'] = result_df.at[index, '% Stock change'] - result_df.at[index, '% Benchmark change'] 162 | 163 | # Calculate ARS (Adaptive Relative Strength) 164 | try: 165 | result_df.at[index, 'ARS'] = round( 166 | (stock_data['Close'].iloc[-1] / stock_data.loc[stock_data.index == ARS_DATE, "Close"].values[0]) / 167 | (benchmark_data['Close'].iloc[-1] / benchmark_data.loc[benchmark_data.index == ARS_DATE, "Close"].values[0]) - 1, 2) 168 | except: 169 | result_df.at[index, 'ARS'] = 0.00 # Error in calculating ARS, set it to 0.00 170 | 171 | 172 | if __name__ == "__main__": 173 | main() -------------------------------------------------------------------------------- /py/ai/nse_announcements/weekly_nse_announcements_analysis.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import requests 3 | import fitz # PyMuPDF 4 | import os 5 | from openai import OpenAI 6 | from urllib.parse import urlparse 7 | from dotenv import load_dotenv, find_dotenv 8 | from datetime import datetime 9 | import argparse 10 | import logging 11 | 12 | log_timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 13 | LOCAL_MODEL = '' #'llama3.1:latest' # keep it blank if, gpt is used 14 | LOCAL_URL = 'http://10.0.0.4:7862/v1' # Update with cloud URL or Local 15 | GPT_MODEL = 'gpt-4o-mini' # if LOCAL_MODEL is blank, GPT will be used 16 | CONTEXT_LEN = 1500 17 | 18 | # Logging configuration 19 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') 20 | logger = logging.getLogger() 21 | 22 | # Load environment variables 23 | def set_api(): 24 | load_dotenv(find_dotenv()) 25 | return os.getenv('OPENAI_API_KEY') 26 | 27 | # Get LLM client (GPT or local) 28 | def get_llm_client_model(): 29 | if not LOCAL_MODEL: 30 | gpt_client = OpenAI(api_key=set_api()) 31 | return gpt_client, GPT_MODEL 32 | else: 33 | my_local_client = OpenAI(base_url=LOCAL_URL, api_key="local-llm") 34 | return my_local_client, LOCAL_MODEL 35 | 36 | client, model = get_llm_client_model() 37 | 38 | critical_subjects = [ 39 | "Updates", "Press Release", "Financial Result Updates", "Sale or Disposal-XBRL", 40 | "Acquisition-XBRL", "Record Date", "Investor Presentation", 41 | "Change in Directors/Key Managerial Personnel/Auditor/Compliance Officer/Share Transfer Agent", 42 | "Acquisition", "Scheme of Arrangement", "Resignation", "Appointment", 43 | "Date of Payment of Dividend", "Dividend", "Increase in Authorised Capital", 44 | "Credit Rating", "Rights Issue", "Public Announcement-Open Offer" 45 | ] 46 | 47 | routine_updates_subjects = [ 48 | "Shareholders meeting", "Outcome of Board Meeting", "Copy of Newspaper Publication", 49 | "Analysts/Institutional Investor Meet/Con. Call Updates", "Loss/Duplicate-Share Certificate-XBRL", 50 | "Board Meeting Intimation", "Trading Window-XBRL", "Notice Of Shareholders Meetings-XBRL", 51 | "Change in Director(s)", "ESOP/ESOS/ESPS", "Clarification - Financial Results", 52 | "Corporate Insolvency Resolution Process-XBRL", "Limited Review Report", 53 | "Disclosure under SEBI (PIT) Reg 2015" 54 | ] 55 | 56 | # Function to download and extract PDF or XML text 57 | def download_and_extract_pdf(url, local_path): 58 | # Skip download if file already exists 59 | if os.path.exists(local_path): 60 | logger.info(f"File already exists locally: {local_path}") 61 | return extract_pdf_text(local_path) 62 | 63 | try: 64 | # Make the request to download the file 65 | response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}) 66 | response.raise_for_status() # Ensure no bad response 67 | 68 | # Check the Content-Type header to determine file type 69 | content_type = response.headers.get('Content-Type') 70 | file_extension = None 71 | 72 | if 'application/pdf' in content_type: 73 | file_extension = 'pdf' 74 | elif 'application/xml' in content_type: 75 | file_extension = 'xml' 76 | 77 | # Ensure we append the correct file extension to local_path 78 | if file_extension: 79 | local_path += f'.{file_extension}' 80 | else: 81 | logger.warning(f"Unknown content type: {content_type}. Assuming default .pdf") 82 | file_extension = 'pdf' 83 | local_path += '.pdf' 84 | 85 | # Write the file to the local path 86 | with open(local_path, 'wb') as f: 87 | f.write(response.content) 88 | 89 | # Extract text based on file type 90 | if file_extension == 'pdf': 91 | return extract_pdf_text(local_path) 92 | elif file_extension == 'xml': 93 | return extract_xml_text(local_path) 94 | else: 95 | logger.error(f"Unsupported file type: {file_extension}") 96 | return "" 97 | 98 | except requests.RequestException as e: 99 | logger.error(f"Failed to download {url}: {e}") 100 | return "" 101 | 102 | # Function to extract text from PDF 103 | def extract_pdf_text(local_path): 104 | try: 105 | doc = fitz.open(local_path) 106 | text = "".join(page.get_text() for page in doc) 107 | return text 108 | except Exception as e: 109 | logger.error(f"Failed to extract text from PDF {local_path}: {e}") 110 | return "" 111 | 112 | # Function to extract text from XML 113 | def extract_xml_text(local_path): 114 | try: 115 | with open(local_path, 'r') as f: 116 | return f.read() 117 | except Exception as e: 118 | logger.error(f"Failed to extract text from XML file {local_path}: {e}") 119 | return "" 120 | 121 | # Truncate text to context length 122 | def truncate_words(text): 123 | words = text.split() 124 | return ' '.join(words[:CONTEXT_LEN]) if len(words) > CONTEXT_LEN else text 125 | 126 | # Get summary and sentiment using OpenAI API 127 | def get_summary_and_sentiment(text): 128 | truncated_text = truncate_words(text) 129 | try: 130 | response = client.chat.completions.create( 131 | model=model, temperature=1.0, max_tokens=500, 132 | messages=[ 133 | {"role": "user", "content": "Please summarize the company announcement provided."}, 134 | {"role": "user", "content": truncated_text} 135 | ] 136 | ) 137 | summary = response.choices[0].message.content 138 | sentiment_response = client.chat.completions.create( 139 | model=model, temperature=1.0, max_tokens=20, 140 | messages=[ 141 | {"role": "user", "content": f"Provide an investor sentiment analysis score in a scale between 0 (negative sentiment) to 1 (positive sentiment) for the following text. The answer should be a single float value, no explanation is required: {summary}"} 142 | ] 143 | ) 144 | sentiment_score = float(sentiment_response.choices[0].message.content.strip()) 145 | return summary, sentiment_score 146 | except Exception as e: 147 | logger.error(f"Error in generating summary/sentiment: {e}") 148 | return "", -1.0 149 | 150 | # Write result to file 151 | def write_to_file(file, data): 152 | with open(file, 'a') as f: 153 | f.write(data) 154 | 155 | # Main processing function 156 | def process_announcement(index, row, stock): 157 | pdf_url = row['ATTACHMENT'] 158 | filename = os.path.basename(urlparse(pdf_url).path) 159 | pdf_local_path = os.path.join('notifications', filename) 160 | pdf_text = download_and_extract_pdf(pdf_url, pdf_local_path) 161 | summary, sentiment_score = get_summary_and_sentiment(pdf_text) 162 | return { 163 | 'Stock': stock, 'Company': row['COMPANY NAME'], 'Subject': row['SUBJECT'], 164 | 'Summary': summary, 'Score': sentiment_score, 'Link': row['ATTACHMENT'] 165 | } 166 | 167 | # Main function 168 | def main(): 169 | # Parse command line arguments 170 | parser = argparse.ArgumentParser(description='Analyze announcements') 171 | parser.add_argument('--file', type=str, help='Input file path') 172 | parser.add_argument('--start', type=str, help='Stock to start from in stocks.csv') 173 | args = parser.parse_args() 174 | 175 | try: 176 | stocks = pd.read_csv("stocks.csv", usecols=["Ticker"]) 177 | df = pd.read_csv(args.file) 178 | df = df[~df['SUBJECT'].isin(routine_updates_subjects) & df['SUBJECT'].isin(critical_subjects)] 179 | logger.info(f"Analyzing {len(df)} announcements") 180 | 181 | result_df = pd.DataFrame(columns=['Stock', 'Company', 'Subject', 'Summary', 'Score', 'Link']) 182 | 183 | for stock in stocks["Ticker"]: 184 | for index, row in df[df['SYMBOL'] == stock].iterrows(): 185 | try: 186 | result = process_announcement(index, row, stock) 187 | # Append the new row to the DataFrame 188 | result_df.loc[len(result_df)] = result 189 | except Exception as e: 190 | logger.error(f"Error processing {stock}: {e}") 191 | 192 | file_name = f'output/{args.file}_report_{log_timestamp}.csv' 193 | result_df.to_csv(file_name, index=False) 194 | logger.info(f"Results saved to {file_name}") 195 | 196 | except Exception as e: 197 | logger.error(f"Error during processing: {e}") 198 | 199 | if __name__ == "__main__": 200 | main() 201 | -------------------------------------------------------------------------------- /py/eodhd/mip12_scanner.py: -------------------------------------------------------------------------------- 1 | """ 2 | Momentum Investing Scanner (MIP‑12) (Modified from Prashanth Sir’s book) 3 | 4 | This module implements a momentum‑based stock scanner following the “MIP‑12” strategy 5 | from Prashanth Sir’s recent book. It filters and ranks Nifty 500 stocks by multiple 6 | technical criteria and outputs a CSV report. The original algorithm has been modified 7 | to include a ranking metric based on the Sharpe ratio, rather than Volar as that is proprietary. 8 | 9 | --- Overview --- 10 | 1. Market Trend Filter: 11 | Checks if the benchmark index (e.g., Nifty 500) is above its 20‑day EMA. 12 | 2. Entry Filters (applied only when market is bullish): 13 | • 52‑Week High Retracement: stock must be within 50% of its 52‑week high. 14 | • 200‑Day EMA: stock’s latest close must exceed its 200‑day EMA. 15 | 3. Ranking Metric: 16 | Computes a simple Sharpe ratio (mean daily return ÷ standard deviation of daily returns). 17 | 4. Final Selection: 18 | • If market is bullish: all stocks passing entry filters are ranked by Sharpe ratio. 19 | • If market is bearish: no new entries are considered, but ranking is still performed. 20 | 5. Output: 21 | • `mip12_scan_report.csv` with columns: 22 | Ticker, Rank#, Price, 52W_High, 200D_EMA, Sharpe_Ratio 23 | • `mip12_scan_errors.csv` capturing any per‑symbol exceptions. 24 | 25 | --- Functions --- 26 | market_trend_filter(benchmark_df, ema_period=20) → bool 27 | get_52w_high(stock_df, period=252) → float 28 | get_200d_ema(stock_df, period=200) → float 29 | compute_sharpe_ratio(stock_df) → float 30 | 31 | --- Main Flow --- 32 | 1. Load benchmark data. 33 | 2. Determine `is_bullish` flag based on the market trend filter. 34 | 3. Loop over each symbol: 35 | a. Load its price series. 36 | b. If bullish, enforce entry filters (52W High Retracement and 200D EMA). 37 | c. Compute Sharpe ratio for ranking. 38 | d. Append record (Ticker, Price, 52W_High, 200D_EMA, Sharpe_Ratio). 39 | e. Catch and log any exceptions per symbol. 40 | 4. Build a DataFrame, sort by Sharpe ratio, and insert Rank#. 41 | 5. Export the report and any errors to CSV. 42 | 43 | --- Logging & Error Handling --- 44 | - Uses Python’s `logging` module to record INFO and ERROR messages. 45 | - Errors for individual symbols are collected and saved to `mip12_scan_errors.csv`. 46 | 47 | Usage: 48 | python mip12_scanner.py 49 | 50 | """ 51 | import pricereader as pr 52 | import pandas as pd 53 | import numpy as np 54 | import logging 55 | 56 | # Configure logging 57 | logging.basicConfig( 58 | level=logging.INFO, 59 | format='%(asctime)s %(levelname)s: %(message)s', 60 | datefmt='%Y-%m-%d %H:%M:%S' 61 | ) 62 | 63 | # Interval 64 | data_interval = 'd' 65 | 66 | # Benchmark symbol 67 | benchmark = "CRSLDX" # Nifty 500 Index 68 | 69 | # Read the list of stocks from the CSV file 70 | stocks = pd.read_csv("nifty500.csv", header=0, usecols=["Ticker"]) 71 | 72 | # --- Helper functions --- 73 | 74 | def market_trend_filter(benchmark_df: pd.DataFrame, 75 | ema_period: int = 20, 76 | price_col: str = 'Close') -> bool: 77 | """Return True if latest benchmark Close > its EMA.""" 78 | ema = benchmark_df[price_col].ewm(span=ema_period, adjust=False).mean() 79 | return benchmark_df[price_col].iloc[-1] > ema.iloc[-1] 80 | 81 | def get_52w_high(stock_df: pd.DataFrame, 82 | period: int = 252, 83 | price_col: str = 'Close') -> float: 84 | """Return the 52‑week high price, or NaN if insufficient data.""" 85 | closes = stock_df[price_col].dropna() 86 | if len(closes) < period: 87 | return float('nan') 88 | return closes.iloc[-period:].max() 89 | 90 | def get_200d_ema(stock_df: pd.DataFrame, 91 | period: int = 200, 92 | price_col: str = 'Close') -> float: 93 | """Return the most recent 200‑day EMA, or NaN if insufficient data.""" 94 | closes = stock_df[price_col].dropna() 95 | if len(closes) < period: 96 | return float('nan') 97 | ema = closes.ewm(span=period, adjust=False).mean() 98 | return ema.iloc[-1] 99 | 100 | def passes_ratio_200d_ema(stock_df: pd.DataFrame, 101 | benchmark_df: pd.DataFrame, 102 | period: int = 200, 103 | price_col: str = 'Close') -> bool: 104 | """ 105 | Return True if the latest ratio of stock/benchmark Close is above 106 | its 200‑day EMA on the ratio series. 107 | """ 108 | # align on common dates 109 | ratio = (stock_df[price_col] / benchmark_df[price_col]).dropna() 110 | if len(ratio) < period: 111 | return False 112 | ema = ratio.ewm(span=period, adjust=False).mean() 113 | return ratio.iloc[-1] > ema.iloc[-1] 114 | 115 | def compute_sharpe_ratio(stock_df: pd.DataFrame, 116 | price_col: str = 'Close', 117 | period: int = 252) -> float: 118 | """ 119 | Compute the Sharpe ratio as mean(daily returns) / std(daily returns) for the last `period` days. 120 | Returns 0.0 if there is insufficient data or if the annualized volatility is zero. 121 | """ 122 | df_1y = stock_df.tail(period).copy() 123 | 124 | # Calculate 12M ROC 125 | current_price = df_1y['Close'].iloc[-1] 126 | price_1y_ago = df_1y['Close'].iloc[0] 127 | roc_12m = (current_price / price_1y_ago) - 1 128 | 129 | # Daily returns & volatility 130 | df_1y['daily_return'] = df_1y['Close'].pct_change() 131 | daily_vol = df_1y['daily_return'].std() 132 | annualized_vol = daily_vol * np.sqrt(period) 133 | 134 | return 0.0 if annualized_vol == 0 else roc_12m / annualized_vol 135 | 136 | 137 | # --- Main scanning function --- 138 | 139 | def main(): 140 | 141 | logging.info("Scan started.") 142 | 143 | # 1. Load & trim benchmark data 144 | benchmark_data = pr.get_price_data(benchmark, data_interval) 145 | 146 | # 2. Check market trend 147 | is_bullish = market_trend_filter(benchmark_data) 148 | if is_bullish: 149 | logging.info("Market is bullish → full entry filters apply.") 150 | else: 151 | logging.info("Market is NOT bullish → only ranking/exits, no new entries.") 152 | print("Market is NOT bullish → only ranking/exits, no new entries.") 153 | 154 | # 3. Prepare lists 155 | candidates = stocks["Ticker"].tolist() 156 | records = [] 157 | errors = [] 158 | 159 | # 4. Per‐stock processing 160 | for symbol in candidates: 161 | try: 162 | print(f"Processing {symbol}...") 163 | df = pr.get_price_data(symbol, data_interval) 164 | if df.empty: 165 | continue # no data in date range 166 | 167 | # Entry filters if bullish 168 | high_52w = get_52w_high(df) 169 | ema_200 = get_200d_ema(df) 170 | 171 | 172 | price = df['Close'].iloc[-1] 173 | if pd.isna(high_52w) or price < 0.5 * high_52w: 174 | logging.info("Skipping %s: 52W high retracement not met.", symbol) 175 | continue 176 | if pd.isna(ema_200) or price <= ema_200: 177 | logging.info("Skipping %s: 200D EMA not met.", symbol) 178 | continue 179 | 180 | if not passes_ratio_200d_ema(df, benchmark_data): 181 | logging.info("Skipping %s: ratio chart condition not met.", symbol) 182 | continue 183 | 184 | # Compute ranking metric 185 | sharpe = compute_sharpe_ratio(df) 186 | 187 | # Record all required fields 188 | records.append({ 189 | "Ticker": symbol, 190 | "Price": df['Close'].iloc[-1], 191 | "52W_High": high_52w, 192 | "200D_EMA": ema_200, 193 | "Sharpe_Ratio": sharpe 194 | }) 195 | 196 | except Exception as e: 197 | logging.error(f"Error processing {symbol}: {e}") 198 | errors.append({"Ticker": symbol, "Error": str(e)}) 199 | 200 | # 5. Build final report DataFrame 201 | report_df = pd.DataFrame(records) 202 | report_df = report_df.dropna(subset=["Sharpe_Ratio"]) 203 | report_df = report_df.sort_values("Sharpe_Ratio", ascending=False) 204 | report_df.insert(1, "Rank#", range(1, len(report_df) + 1)) 205 | 206 | # 6. Export results 207 | report_df.to_csv("mip12_scan_report.csv", index=False) 208 | logging.info("Report saved to mip12_scan_report.csv.") 209 | 210 | # 7. Optionally export errors 211 | if errors: 212 | err_df = pd.DataFrame(errors) 213 | err_df.to_csv("mip12_scan_errors.csv", index=False) 214 | logging.info("Errors saved to mip12_scan_errors.csv.") 215 | 216 | return report_df 217 | 218 | # If this script is run directly, invoke main(): 219 | if __name__ == "__main__": 220 | main() 221 | -------------------------------------------------------------------------------- /py/yf/limevolume.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Volume is where the whole story begins. So it is important to determine volume expansions. 3 | On charts, one can look for volume expansions, when they breach daily/weekly averages by huge margins. (LimeVolume day) 4 | This indicate institutional demand. 5 | Expansion of volume and presense of demand at different life cycle stages of a stock can mean different things. 6 | For example, a limevolume day observed in Stage 1 for the first time, may be the first signal of demand, but not good 7 | to initiate a long trade just yet, because instituion will absorb the supply gradually. 8 | If the base is instead formed well, and we start to see limevolume with higher lows on price chart, it might indicate 9 | begining of stage 2. 10 | If a scrip is already in an established up trend, (Stage 2), then limevolume days on a sideways (resting) trend, indicates 11 | renewed demand either by the same institution or a new player interested in the company. Maybe suitable for top-up. 12 | ''' 13 | 14 | import yfinance as yf 15 | import pandas as pd 16 | import numpy as np 17 | import math 18 | import csv 19 | import datetime 20 | 21 | # Read the list of stocks from the CSV file 22 | stocks = pd.read_csv("stocks.csv", header=0, usecols=["Ticker"]) 23 | # Exchange ".BO" for BSE, ".NS" for Nifty 24 | exchg = ".NS" 25 | 26 | # Set start Date 27 | start_date = '2022-07-25' # Should be a date that is start of the week date, so that daily and weekly data can match 28 | 29 | # Set end Date 30 | end_date = '2023-07-29' 31 | # Folder location 32 | output = 'output' 33 | 34 | # Interval 35 | data_interval_wkeely = '1wk' 36 | data_interval_daily = '1d' 37 | 38 | # Weekly volume average length 39 | weekly_volume_length = 10 40 | # Daily volume average length 41 | daily_volume_length = 100 42 | 43 | # Number of days to check for limevolume 44 | lookback_length = 55 #3-months daily 45 | 46 | # Read up sector/industry information from text data 47 | stock_industry_map = pd.read_csv("stock_sector_industry_map.csv", header=0, usecols=["NSE Code","Industry","Market Cap", "Sector"]) 48 | 49 | # Crore 50 | One_Cr = 10000000 51 | 52 | def fetch_industry_mcap(nse_code): 53 | 54 | industry = '' 55 | mcap = '' 56 | sector = '' 57 | 58 | try: 59 | # We try to get from local file first 60 | sector = stock_industry_map[stock_industry_map['NSE Code'] == nse_code]['Sector'].iloc[0] 61 | industry = stock_industry_map[stock_industry_map['NSE Code'] == nse_code]['Industry'].iloc[0] 62 | mcap = stock_industry_map[stock_industry_map['NSE Code'] == nse_code]['Market Cap'].iloc[0] 63 | except Exception as err: 64 | pass 65 | 66 | if industry == '' or mcap == '': 67 | try: 68 | # Try yf 69 | ticker = yf.Ticker(nse_code+".NS") 70 | if ticker.info: 71 | if industry == '': 72 | industry = ticker.info['industry'] 73 | if mcap == '': 74 | mcap = round(ticker.info['marketCap'] / One_Cr, 0) 75 | if sector == '': 76 | sector = ticker.info['sector'] 77 | except Exception as err: 78 | pass 79 | 80 | return [sector, industry, mcap] 81 | 82 | def main(): 83 | print("Started... " + start_date + " - " + end_date) 84 | 85 | # Create the DataFrame 86 | df = pd.DataFrame(columns=['stock', 'mcap', 'blueVolCount', 'limeVolToday', 'limeVolCount', 'latestLimeVolDate', 'earliestLimeVolDate', 'tealVolCount', 'latestTealVolDate', \ 87 | 'earliestTealVolDate', 'priceChng', 'sector' , 'industry']) 88 | # Iterate through the list of stocks 89 | for stock in stocks["Ticker"]: 90 | try: 91 | print(f'Analyzing {stock}...') 92 | # Get the stock data 93 | stk_ticker = yf.Ticker(stock+exchg) 94 | # Get the stock data from yfinance, dont adjust OHLC 95 | stock_data_daily = stk_ticker.history(start=start_date, end=end_date,interval=data_interval_daily,auto_adjust=False, prepost=False) 96 | # Drop those with NaN 97 | stock_data_daily = stock_data_daily.dropna() 98 | 99 | stock_data_weekly = stk_ticker.history(start=start_date, end=end_date,interval=data_interval_wkeely,auto_adjust=False, prepost=False) 100 | # Drop those with NaN 101 | stock_data_weekly = stock_data_weekly.dropna() 102 | 103 | #10wk avg volume 104 | weekly_vol_avg_col = f'Weekly_Volume_Avg{weekly_volume_length}' 105 | stock_data_weekly[weekly_vol_avg_col] = stock_data_weekly['Volume'].rolling(window=weekly_volume_length, min_periods=1).mean().fillna(0) 106 | 107 | #100d avg volule 108 | daily_vol_avg_col = f'Daily_Volume_Avg{daily_volume_length}' 109 | stock_data_daily[daily_vol_avg_col] = stock_data_daily['Volume'].rolling(window=daily_volume_length, min_periods=1).mean().fillna(0) 110 | 111 | # Create a new column in the daily data to store the corresponding weekly volume 112 | stock_data_daily[weekly_vol_avg_col] = 0 113 | 114 | # Loop through each row in the daily data 115 | mismatch_ctr = 0 116 | never_matched = True 117 | for i, row in stock_data_daily.iterrows(): 118 | # Extract the date from the current row 119 | date = row.name.date() 120 | 121 | # Look up the corresponding row in the weekly data 122 | weekly_row = stock_data_weekly.loc[stock_data_weekly.index.date == date] 123 | 124 | # If there is no corresponding weekly data for the current date, propagate the last known weekly volume forward 125 | if len(weekly_row) == 0: 126 | if never_matched and mismatch_ctr < 7: 127 | mismatch_ctr = mismatch_ctr + 1 128 | continue # Try to match up data for next week 129 | stock_data_daily.at[i, weekly_vol_avg_col] = stock_data_daily[weekly_vol_avg_col].shift(1)[i] 130 | # If there is corresponding weekly data for the current date, fetch the volume and set it in the daily data 131 | else: 132 | never_matched = False 133 | weekly_avg_volume = weekly_row[weekly_vol_avg_col].iloc[0] 134 | stock_data_daily.at[i, weekly_vol_avg_col] = weekly_avg_volume 135 | 136 | isTodayLimeVolume = False 137 | cntLimeCount = 0 138 | cntTealCount = 0 139 | pctChange = 0 140 | earliestLimeVolDate = '' 141 | latestLimeVolDate = '' 142 | earliestTealVolDate = '' 143 | latestTealVolDate = '' 144 | # reverse 145 | stock_data_daily = stock_data_daily.iloc[::-1] 146 | 147 | if len(stock_data_daily) > lookback_length: 148 | for i in range(0, lookback_length): 149 | if stock_data_daily['Close'][i] > stock_data_daily['Close'][i+1]: # Up Day 150 | weekly_avg_to_compare = stock_data_daily[weekly_vol_avg_col][i] 151 | for j in range(i+1, i+7): # Find the previous week volume average, by checking previous unmatched value 152 | _weekly_avg = stock_data_daily[weekly_vol_avg_col][j] 153 | if _weekly_avg != weekly_avg_to_compare: 154 | weekly_avg_to_compare = _weekly_avg 155 | break 156 | if stock_data_daily['Volume'][i] > weekly_avg_to_compare: # Now compare if this day's volume is greater than weekly average volume 157 | cntLimeCount = cntLimeCount + 1 158 | earliestLimeVolDate = stock_data_daily.index[i].strftime("%d-%b-%Y") 159 | if cntLimeCount == 1: 160 | latestLimeVolDate = stock_data_daily.index[i].strftime("%d-%b-%Y") 161 | pctChange = round(((stock_data_daily['Close'][i] / stock_data_daily['Close'][i+1]) - 1 ) * 100, 2) 162 | if i == 0: 163 | isTodayLimeVolume = True 164 | # Teal Volume 165 | if stock_data_daily['Volume'][i] > stock_data_daily[daily_vol_avg_col][i]: # Now compare if this day's volume is greater than daily average volume 166 | cntTealCount = cntTealCount + 1 167 | earliestTealVolDate = stock_data_daily.index[i].strftime("%d-%b-%Y") 168 | if cntTealCount == 1: 169 | latestTealVolDate = stock_data_daily.index[i].strftime("%d-%b-%Y") 170 | 171 | # Fetch industy and mcap 172 | [sector, industry, marketCap] = fetch_industry_mcap(stock) 173 | 174 | blueVolCnt = cntLimeCount + cntTealCount 175 | row = {'stock': stock, 'blueVolCount': str(blueVolCnt), 'limeVolToday' : str(isTodayLimeVolume), 'limeVolCount': str(cntLimeCount), \ 176 | 'latestLimeVolDate' : str(latestLimeVolDate), 'earliestLimeVolDate' : str(earliestLimeVolDate), \ 177 | 'tealVolCount': str(cntTealCount), 'latestTealVolDate' : str(latestTealVolDate), 'earliestTealVolDate' : str(earliestTealVolDate), \ 178 | 'mcap' : marketCap, 'priceChng': str(pctChange), 'sector' : sector, 'industry' : industry} 179 | # Append the new row to the DataFrame 180 | df.loc[len(df)] = row 181 | 182 | except Exception as e: 183 | print(f'Error: {stock} => {e}') 184 | # Append current timestamp to the file name 185 | now = datetime.datetime.now() 186 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S") 187 | file_name = f'{output}/limevolume_{timestamp}.csv' 188 | # Export the DataFrame to CSV 189 | df.to_csv(file_name, index=False) 190 | print('Done') 191 | 192 | if __name__ == "__main__": 193 | main() 194 | -------------------------------------------------------------------------------- /py/ai/fininsightgpt/src/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | FinInsightGPT - AI-Powered Investment Analysis Application 4 | 5 | This application processes company data files, converts them to markdown, 6 | creates consolidated master files, and generates equity research reports. 7 | """ 8 | 9 | import os 10 | import sys 11 | import argparse 12 | import logging 13 | from pathlib import Path 14 | from typing import List, Optional 15 | 16 | # Load environment variables from .env file 17 | try: 18 | from dotenv import load_dotenv 19 | load_dotenv() # Load variables from .env file 20 | ENV_LOADED = True 21 | except ImportError: 22 | ENV_LOADED = False 23 | logging.warning("dotenv not found, environment variables must be set manually") 24 | 25 | # Configure logging 26 | logging.basicConfig( 27 | level=logging.INFO, 28 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' 29 | ) 30 | logger = logging.getLogger(__name__) 31 | 32 | # Import local modules 33 | from document_processor import process_company_folder 34 | from master_file_generator import generate_master_file 35 | from report_generator import generate_report 36 | 37 | 38 | def setup_argparse() -> argparse.ArgumentParser: 39 | """Set up command-line arguments.""" 40 | parser = argparse.ArgumentParser( 41 | description="FinInsightGPT - AI-Powered Investment Analysis Application" 42 | ) 43 | 44 | subparsers = parser.add_subparsers(dest='command', help='Command to run') 45 | 46 | # Process command 47 | process_parser = subparsers.add_parser('process', help='Process files in a company folder') 48 | process_parser.add_argument('company_folder', help='Path to the company folder') 49 | 50 | # Master file command 51 | master_parser = subparsers.add_parser('master', help='Generate master file from processed files') 52 | master_parser.add_argument('company_folder', help='Path to the company folder') 53 | master_parser.add_argument('--output-dir', help='Directory to save the master file (defaults to company folder)') 54 | 55 | # Report command 56 | report_parser = subparsers.add_parser('report', help='Generate report from master file') 57 | report_parser.add_argument('master_file', help='Path to the master markdown file') 58 | report_parser.add_argument('--template', help='Path to the report template (default: prompt_master/Equity_Research_Report_Template.md)') 59 | report_parser.add_argument('--output-dir', help='Directory to save the report (defaults to master file directory)') 60 | report_parser.add_argument('--model', help='LLM model to use (default: gpt-4-turbo)') 61 | 62 | # All-in-one command 63 | all_parser = subparsers.add_parser('all', help='Process everything end-to-end') 64 | all_parser.add_argument('company_folder', help='Path to the company folder') 65 | all_parser.add_argument('--template', help='Path to the report template (default: prompt_master/Equity_Research_Report_Template.md)') 66 | all_parser.add_argument('--model', default='gpt-4-turbo', help='LLM model to use (default: gpt-4-turbo)') 67 | 68 | # List companies command 69 | subparsers.add_parser('list', help='List all available company folders') 70 | 71 | return parser 72 | 73 | 74 | def list_companies(base_path: str = "../company_data") -> List[str]: 75 | """List all company folders in the company_data directory.""" 76 | base_path = Path(base_path) 77 | 78 | if not base_path.exists() or not base_path.is_dir(): 79 | logger.error(f"Company data directory not found: {base_path}") 80 | return [] 81 | 82 | companies = [] 83 | 84 | for item in base_path.iterdir(): 85 | if item.is_dir() and not item.name.startswith('.'): 86 | companies.append(item.name) 87 | 88 | return companies 89 | 90 | 91 | def run_process_command(args: argparse.Namespace) -> None: 92 | """Process files in a company folder.""" 93 | company_folder = args.company_folder 94 | 95 | # Ensure path is absolute 96 | if not os.path.isabs(company_folder): 97 | script_dir = Path(__file__).parent.absolute() 98 | company_data_dir = script_dir.parent / "company_data" 99 | company_folder = os.path.join(company_data_dir, company_folder) 100 | 101 | logger.info(f"Processing files in: {company_folder}") 102 | processed_files = process_company_folder(company_folder) 103 | 104 | if not processed_files: 105 | logger.warning("No files were processed.") 106 | else: 107 | logger.info(f"Successfully processed {len(processed_files)} files.") 108 | 109 | 110 | def run_master_command(args: argparse.Namespace) -> Optional[str]: 111 | """Generate master file from processed files.""" 112 | company_folder = args.company_folder 113 | output_dir = args.output_dir 114 | 115 | # Ensure path is absolute 116 | if not os.path.isabs(company_folder): 117 | script_dir = Path(__file__).parent.absolute() 118 | company_data_dir = script_dir.parent / "company_data" 119 | company_folder = os.path.join(company_data_dir, company_folder) 120 | 121 | # Get company name from folder path 122 | company_name = Path(company_folder).name 123 | 124 | # Find processed markdown files 125 | processed_folder = Path(company_folder) / "processed" 126 | 127 | if not processed_folder.exists() or not processed_folder.is_dir(): 128 | logger.error(f"Processed folder not found: {processed_folder}") 129 | return None 130 | 131 | markdown_files = [] 132 | for file in processed_folder.glob("*.md"): 133 | if file.is_file(): 134 | markdown_files.append(str(file)) 135 | 136 | if not markdown_files: 137 | logger.error("No processed markdown files found.") 138 | return None 139 | 140 | logger.info(f"Found {len(markdown_files)} processed files.") 141 | 142 | # Generate master file 143 | master_file_path = generate_master_file( 144 | company_name=company_name, 145 | markdown_files=markdown_files, 146 | output_dir=output_dir 147 | ) 148 | 149 | if master_file_path: 150 | logger.info(f"Successfully generated master file: {master_file_path}") 151 | else: 152 | logger.error("Failed to generate master file.") 153 | 154 | return master_file_path 155 | 156 | 157 | def run_report_command(args: argparse.Namespace) -> Optional[str]: 158 | """Generate report from master file.""" 159 | master_file = args.master_file 160 | template_path = args.template 161 | output_dir = args.output_dir 162 | model = args.model 163 | 164 | # If model not specified in args, use the environment variable 165 | if model is None: 166 | model = os.environ.get("OPENAI_TEXT_MODEL", "gpt-4-turbo") 167 | 168 | # Ensure master file path is absolute 169 | if not os.path.isabs(master_file): 170 | script_dir = Path(__file__).parent.absolute() 171 | company_data_dir = script_dir.parent / "company_data" 172 | master_file = os.path.join(company_data_dir, master_file) 173 | 174 | # Ensure template path is set 175 | if template_path is None: 176 | script_dir = Path(__file__).parent.absolute() 177 | template_path = script_dir.parent / "prompt_master" / "Equity_Research_Report_Template.md" 178 | elif not os.path.isabs(template_path): 179 | script_dir = Path(__file__).parent.absolute() 180 | template_path = script_dir.parent / template_path 181 | 182 | # Check if files exist 183 | if not os.path.exists(master_file): 184 | logger.error(f"Master file not found: {master_file}") 185 | return None 186 | 187 | if not os.path.exists(template_path): 188 | logger.error(f"Template file not found: {template_path}") 189 | return None 190 | 191 | # Generate report 192 | report_file_path = generate_report( 193 | master_file_path=master_file, 194 | template_path=str(template_path), 195 | output_dir=output_dir, 196 | model=model 197 | ) 198 | 199 | if report_file_path: 200 | logger.info(f"Successfully generated report: {report_file_path}") 201 | else: 202 | logger.error("Failed to generate report.") 203 | 204 | return report_file_path 205 | 206 | 207 | def run_all_command(args: argparse.Namespace) -> None: 208 | """Process everything end-to-end: process files, generate master file, and generate report.""" 209 | company_folder = args.company_folder 210 | template_path = args.template 211 | model = args.model 212 | 213 | # Process files 214 | process_args = argparse.Namespace(company_folder=company_folder) 215 | run_process_command(process_args) 216 | 217 | # Generate master file 218 | master_args = argparse.Namespace(company_folder=company_folder, output_dir=None) 219 | master_file_path = run_master_command(master_args) 220 | 221 | if not master_file_path: 222 | logger.error("Cannot continue without a master file.") 223 | return 224 | 225 | # Generate report 226 | report_args = argparse.Namespace( 227 | master_file=master_file_path, 228 | template=template_path, 229 | output_dir=None, 230 | model=model 231 | ) 232 | report_file_path = run_report_command(report_args) 233 | 234 | if report_file_path: 235 | logger.info(f"End-to-end processing completed successfully.") 236 | else: 237 | logger.error("End-to-end processing failed during report generation.") 238 | 239 | 240 | def check_environment(): 241 | """Check if required environment variables are set.""" 242 | if not os.environ.get("OPENAI_API_KEY"): 243 | logger.warning("OPENAI_API_KEY environment variable is not set. Set it in your .env file or export it in your shell.") 244 | return False 245 | return True 246 | 247 | 248 | def main() -> None: 249 | """Main entry point of the application.""" 250 | parser = setup_argparse() 251 | args = parser.parse_args() 252 | 253 | if args.command is None: 254 | parser.print_help() 255 | sys.exit(1) 256 | 257 | elif args.command == 'process': 258 | run_process_command(args) 259 | 260 | elif args.command == 'master': 261 | run_master_command(args) 262 | 263 | elif args.command == 'report': 264 | run_report_command(args) 265 | 266 | elif args.command == 'all': 267 | run_all_command(args) 268 | 269 | elif args.command == 'list': 270 | script_dir = Path(__file__).parent.absolute() 271 | company_data_dir = script_dir.parent / "company_data" 272 | companies = list_companies(str(company_data_dir)) 273 | 274 | if companies: 275 | print("Available company folders:") 276 | for company in companies: 277 | print(f"- {company}") 278 | else: 279 | print("No company folders found.") 280 | 281 | else: 282 | parser.print_help() 283 | sys.exit(1) 284 | 285 | 286 | if __name__ == "__main__": 287 | main() -------------------------------------------------------------------------------- /py/yf/stock_sector_strength.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A comparitive analysis of the stock market based on sectors (or any grouping) from a significant date/event of past as reflected on the benchmark. 3 | The idea then is to calculate the gains not only of the individual stocks but the entire group, with respect to that event. 4 | Interesting analysis can be done, if the entire group is considered, where we can see that the leader stocks move much in advance of their peers and 5 | start outperformance with respect to benchmark and the sectors. We can also see how the sector as a group is performing with respect to the benchmark. 6 | ''' 7 | import pandas as pd 8 | import os 9 | from datetime import datetime, timedelta 10 | import csv 11 | import yfinance as yf 12 | 13 | 14 | # Read up sector/industry information from text data 15 | stock_industry_map = pd.read_csv("stock_sector_industry_map.csv", header=0, usecols=["NSE Code","Industry","Market Cap", "Sector"]) 16 | 17 | # Reference Date for comaprison, preferred <= 200 18 | reference_date = '2022-12-01' 19 | 20 | # Run date, must be greater than reference date 21 | run_date = '2023-08-05' 22 | 23 | # Minimum number of trading days to consider for index 24 | min_trading_days = 200 25 | 26 | # Maximum number of stocks to include in a sector group 27 | max_stocks_per_sector = 10 28 | 29 | # Limit on marketcap 30 | min_cap = 500 # Crores 31 | 32 | # Calculate gain percentages for different time periods 33 | periods = [5, 21, 55, 123] 34 | 35 | # Specify the benchmark symbol 36 | benchmark = "^NSEI" 37 | 38 | # Folder location 39 | output = 'output' 40 | 41 | def has_min_days_data(nse_code): 42 | # Calculate the start date as one year before the run_date 43 | start_date = (datetime.strptime(run_date, '%Y-%m-%d') - timedelta(days=365)).strftime('%Y-%m-%d') 44 | 45 | # Get the daily data for the specified period 46 | ticker = yf.Ticker(nse_code+'.NS') 47 | stock_data = ticker.history(start=start_date, end=run_date, interval='1d',auto_adjust=False, prepost=False) 48 | 49 | # Check if the stock has at least min_trading_days days of trading data 50 | if len(stock_data) >= min_trading_days: 51 | return True 52 | else: 53 | return False 54 | 55 | def prepare_custom_indexes(df): 56 | # Group the stocks by their sectors into a dictionary 57 | custom_indices = {} 58 | 59 | # Iterate through each row in the DataFrame 60 | for index, row in df.iterrows(): 61 | sector = row['Sector'] 62 | stock_info = { 63 | 'NSE Code': row['NSE Code'], 64 | 'Industry': row['Industry'], 65 | 'Market Cap': row['Market Cap'] 66 | } 67 | nse_code = row['NSE Code'] 68 | 69 | # Check if the stock has at least 200 days of trading data 70 | if has_min_days_data(nse_code): 71 | # Check if the sector already exists in the dictionary 72 | if sector in custom_indices: 73 | custom_indices[sector].append(stock_info) 74 | else: 75 | custom_indices[sector] = [stock_info] 76 | 77 | # Sort the stocks within each sector by decreasing market cap 78 | for sector in custom_indices: 79 | stocks_in_sector = custom_indices[sector] 80 | stocks_sorted_by_market_cap = sorted(stocks_in_sector, key=lambda x: x['Market Cap'], reverse=True) 81 | custom_indices[sector] = stocks_sorted_by_market_cap[:max_stocks_per_sector] 82 | 83 | # print(custom_indices) 84 | return custom_indices 85 | 86 | def generate_watchlist_with_headers(custom_indices): 87 | watchlist_string_withheaders = "" 88 | watchlist_string = "" 89 | 90 | sector_index_mapper = {} 91 | 92 | for sector, stocks in custom_indices.items(): 93 | # Calculate the number of stocks in the sector 94 | num_stocks = len(stocks) 95 | str = '' 96 | str_header = f'###{sector},' 97 | for stock in stocks: 98 | nse_code = 'NSE:' + stock['NSE Code'] 99 | str += nse_code.replace('-','_').replace('&','_') + "+" 100 | 101 | str = str.rsplit('+', 1)[0].strip() 102 | str = f'( {str} )/{num_stocks}' + ',' 103 | watchlist_string += str 104 | sector_index_mapper[sector.upper()] = str 105 | watchlist_string_withheaders = watchlist_string_withheaders + str_header.upper() + str 106 | 107 | # Write the watchlist to the txt file 108 | with open('custom_indices_without_headers.txt', 'w') as file: 109 | file.write(watchlist_string) 110 | 111 | # Write the watchlist to the txt file 112 | with open('custom_indices_with_headers.txt', 'w') as file: 113 | file.write(watchlist_string_withheaders) 114 | 115 | return sector_index_mapper 116 | 117 | def calculate_gain_percentages(data_df, reference_date, run_date): 118 | # Filter the data from the reference date to the run date 119 | filtered_data = data_df.loc[reference_date:run_date] 120 | 121 | # Calculate the gain percentage for the original period 122 | start_price = filtered_data.iloc[0]['Close'] 123 | end_price = filtered_data.iloc[-1]['Close'] 124 | gain_percentage = ((end_price - start_price) / start_price) * 100 125 | 126 | gain_percentages = [gain_percentage] 127 | 128 | for period in periods: 129 | if len(filtered_data) < period: 130 | gain_percentages.append(None) # Append None if there's insufficient data for the period 131 | else: 132 | start_price_period = filtered_data.iloc[-period]['Close'] 133 | gain_percentage_period = ((end_price - start_price_period) / start_price_period) * 100 134 | gain_percentages.append(round(gain_percentage_period, 2)) 135 | 136 | return gain_percentages 137 | 138 | def calculate_sector_gains(custom_indices, reference_date, run_date): 139 | sector_gains = {} 140 | 141 | for sector, stocks in custom_indices.items(): 142 | total_close_start = 0.0 143 | total_close_end = 0.0 144 | 145 | for stock in stocks: 146 | nse_code = stock['NSE Code'] 147 | ticker = yf.Ticker(nse_code+'.NS') 148 | stock_data = ticker.history(start=reference_date, end=run_date, interval='1d',auto_adjust=False, prepost=False) 149 | if not stock_data.empty: 150 | # Get the closing price on the reference_date and run_date 151 | close_start = stock_data.iloc[0]['Close'] 152 | close_end = stock_data.iloc[-1]['Close'] 153 | total_close_start += close_start 154 | total_close_end += close_end 155 | 156 | # Calculate the gain percentage for the sector from reference_date to run_date 157 | sector_gain = round(((total_close_end - total_close_start) / total_close_start) * 100, 2) 158 | sector_gains[sector] = sector_gain 159 | 160 | return sector_gains 161 | 162 | def main(): 163 | print("Started...") 164 | # Prepare working dataset We only take NSE Codes and Market Cap > min_cap Crores 165 | df = stock_industry_map[(stock_industry_map['NSE Code'].notna()) & (stock_industry_map['Market Cap'] >= min_cap)] 166 | print(f'{len(df)} NSE stocks with mcap > {min_cap} Cr') 167 | # print(df.tail(10)) 168 | # Prepare custom index 169 | ### df = df.tail(30) ### FOR TESTS ONLY#################### 170 | print("Preparing custom indices...") 171 | custom_indices = prepare_custom_indexes(df) 172 | sector_index_mapper = generate_watchlist_with_headers(custom_indices) 173 | 174 | print("Calculating benchmark gain...") 175 | # Calculate gains of benchmark from reference date to run date 176 | benchmark_ticker = yf.Ticker(benchmark) 177 | benchmark_data = benchmark_ticker.history(start=reference_date, end=run_date, interval='1d',auto_adjust=False, prepost=False) 178 | benchmark_gain = calculate_gain_percentages(benchmark_data, reference_date, run_date)[0] 179 | 180 | print("Calculating sector gains...") 181 | sector_gains = calculate_sector_gains(custom_indices, reference_date, run_date) 182 | 183 | # Convert the date strings to datetime objects 184 | date1 = datetime.strptime(run_date, '%Y-%m-%d') 185 | date2 = datetime.strptime(reference_date, '%Y-%m-%d') 186 | 187 | # Calculate the difference in days between the two dates 188 | days_difference = (date1 - date2).days 189 | 190 | # Now we run for all stocks and create a big list and report 191 | result_df = pd.DataFrame(columns=['symbol', 'start','end','days', 'mcap', 'sector', 'industry', 'gain_stock_sector', 'gain_stock_benchmrk', 'gain_sector_benchmrk', \ 192 | 'gain_stock_refdate', 'gain_sector_refdate', 'gain_benchmrk_refdate', 'gain_stock_5d', 'gain_stock_21d', 'gain_stock_55d', 'gain_stock_123d',\ 193 | 'sector_index']) 194 | 195 | print("Calculating stock performances...") 196 | # Iterate through each row in the DataFrame 197 | for index, row in df.iterrows(): 198 | nse_code = row['NSE Code'] 199 | ticker = yf.Ticker(nse_code+'.NS') 200 | try: 201 | stock_data = ticker.history(start=reference_date, end=run_date, interval='1d',auto_adjust=False, prepost=False) 202 | if (len(stock_data) <= 2): 203 | print(f'Skipping... {nse_code}') 204 | continue 205 | stock_gains = calculate_gain_percentages(stock_data,reference_date, run_date) 206 | stock_gain_from_refdate = stock_gains[0] 207 | sector = row['Sector'] 208 | industry = row['Industry'] 209 | mcap = row['Market Cap'] 210 | gain_stock_sector = stock_gain_from_refdate - sector_gains[sector] 211 | gain_stock_benchmrk = stock_gain_from_refdate - benchmark_gain 212 | gain_sector_benchmrk = sector_gains[sector] - benchmark_gain 213 | gain_sector_refdate = sector_gains[sector] 214 | sector_index = sector_index_mapper[sector.upper()] 215 | 216 | row = {'symbol': nse_code, 'start': reference_date, 'end' : run_date, 'days' : days_difference, 'mcap': str(mcap), 'sector' : sector.upper(), 'industry' : industry.upper(), \ 217 | 'gain_stock_sector' : str(gain_stock_sector), 'gain_stock_benchmrk' : str(gain_stock_benchmrk), 'gain_sector_benchmrk' : str(gain_sector_benchmrk), \ 218 | 'gain_stock_refdate' : str(stock_gain_from_refdate), 'gain_sector_refdate' : str(gain_sector_refdate), 'gain_benchmrk_refdate' : str(benchmark_gain), \ 219 | 'gain_stock_5d' : str(stock_gains[1]), 'gain_stock_55d' : str(stock_gains[2]), 'gain_stock_21d' : str(stock_gains[3]), 'gain_stock_123d' : str(stock_gains[4]),\ 220 | 'sector_index' : sector_index} 221 | 222 | # Append the new row to the DataFrame 223 | result_df.loc[len(result_df)] = row 224 | except Exception as e: 225 | print(f'Error: {nse_code} => {e}') 226 | 227 | # Append current timestamp to the file name 228 | now = datetime.now() 229 | timestamp = now.strftime("%Y-%m-%d %H-%M-%S") 230 | file_name = f'{output}/stock_sector_benchmark_{reference_date}_{run_date}_{timestamp}.csv' 231 | # Export the DataFrame to CSV 232 | result_df.to_csv(file_name, index=False) 233 | # print(sector_index_mapper) 234 | print("Done") 235 | 236 | if __name__ == "__main__": 237 | main() 238 | -------------------------------------------------------------------------------- /py/eodhd/stocks.csv: -------------------------------------------------------------------------------- 1 | Ticker 2 | RELIANCE 3 | HDFCBANK 4 | TCS 5 | ICICIBANK 6 | HINDUNILVR 7 | ITC 8 | INFY 9 | SBIN 10 | BHARTIARTL 11 | HDFC 12 | BAJFINANCE 13 | LICI 14 | KOTAKBANK 15 | LT 16 | ASIANPAINT 17 | HCLTECH 18 | AXISBANK 19 | MARUTI 20 | ADANIENT 21 | TITAN 22 | SUNPHARMA 23 | BAJAJFINSV 24 | DMART 25 | ULTRACEMCO 26 | TATAMOTORS 27 | WIPRO 28 | NESTLEIND 29 | ONGC 30 | JSWSTEEL 31 | M&M 32 | NTPC 33 | POWERGRID 34 | ADANIGREEN 35 | ADANIPORTS 36 | LTIM 37 | TATASTEEL 38 | COALINDIA 39 | IOC 40 | HDFCLIFE 41 | BAJAJ-AUTO 42 | PIDILITIND 43 | HINDZINC 44 | SBILIFE 45 | HAL 46 | SIEMENS 47 | DLF 48 | BRITANNIA 49 | GRASIM 50 | TECHM 51 | INDUSINDBK 52 | GODREJCP 53 | VBL 54 | VEDL 55 | INDIGO 56 | BANKBARODA 57 | DABUR 58 | DIVISLAB 59 | HINDALCO 60 | CHOLAFIN 61 | ADANIPOWER 62 | BEL 63 | EICHERMOT 64 | ABB 65 | DRREDDY 66 | ADANITRANS 67 | BPCL 68 | CIPLA 69 | SHREECEM 70 | AMBUJACEM 71 | BAJAJHLDNG 72 | HAVELLS 73 | SBICARD 74 | ICICIPRULI 75 | TATACONSUM 76 | MANKIND 77 | MCDOWELL-N 78 | APOLLOHOSP 79 | GAIL 80 | ATGL 81 | MARICO 82 | TATAPOWER 83 | ICICIGI 84 | PNB 85 | ZOMATO 86 | POLYCAB 87 | SHRIRAMFIN 88 | LODHA 89 | BERGEPAINT 90 | MOTHERSON 91 | TORNTPHARM 92 | SRF 93 | JINDALSTEL 94 | TVSMOTOR 95 | CGPOWER 96 | TIINDIA 97 | ZYDUSLIFE 98 | HEROMOTOCO 99 | IDBI 100 | UNIONBANK 101 | CANBK 102 | TRENT 103 | NAUKRI 104 | PFC 105 | MAXHEALTH 106 | INDHOTEL 107 | BOSCHLTD 108 | PIIND 109 | IDFCFIRSTB 110 | PAYTM 111 | ASHOKLEY 112 | HDFCAMC 113 | CUMMINSIND 114 | AWL 115 | YESBANK 116 | MUTHOOTFIN 117 | ASTRAL 118 | AUBANK 119 | PGHH 120 | IOB 121 | COLPAL 122 | IRCTC 123 | ABBOTINDIA 124 | SCHAEFFLER 125 | ABCAPITAL 126 | PATANJALI 127 | UPL 128 | JSWENERGY 129 | NHPC 130 | BALKRISIND 131 | AUROPHARMA 132 | IRFC 133 | INDUSTOWER 134 | TATAELXSI 135 | TATACOMM 136 | GODREJPROP 137 | SUPREMEIND 138 | ALKEM 139 | MPHASIS 140 | MRF 141 | HINDPETRO 142 | LTTS 143 | LUPIN 144 | RECLTD 145 | NYKAA 146 | CONCOR 147 | INDIANB 148 | PAGEIND 149 | UBL 150 | BHARATFORG 151 | APLAPOLLO 152 | LINDEINDIA 153 | M&MFIN 154 | OBEROIRLTY 155 | IDEA 156 | HONAUT 157 | MAZDOCK 158 | STARHEALTH 159 | SAIL 160 | PERSISTENT 161 | DALBHARAT 162 | BANDHANBNK 163 | UCOBANK 164 | BANKINDIA 165 | IGL 166 | SOLARINDS 167 | GICRE 168 | POLICYBZR 169 | PETRONET 170 | SONACOMS 171 | OFSS 172 | ACC 173 | AIAENG 174 | BHEL 175 | UNOMINDA 176 | NMDC 177 | GUJGASLTD 178 | L&TFH 179 | BIOCON 180 | 3MINDIA 181 | FACT 182 | SYNGENE 183 | MANYAVAR 184 | ESCORTS 185 | JUBLFOOD 186 | JSL 187 | FLUOROCHEM 188 | DELHIVERY 189 | METROBRAND 190 | TORNTPOWER 191 | THERMAX 192 | PHOENIXLTD 193 | EMBASSY 194 | SUNDARMFIN 195 | COROMANDEL 196 | POONAWALLA 197 | CRISIL 198 | RVNL 199 | FEDERALBNK 200 | COFORGE 201 | OIL 202 | MFSL 203 | KPITTECH 204 | CENTRALBK 205 | DEEPAKNTR 206 | GMRINFRA 207 | APOLLOTYRE 208 | KANSAINER 209 | SKFINDIA 210 | SUNDRMFAST 211 | MSUMI 212 | FORTIS 213 | VOLTAS 214 | TATACHEM 215 | DIXON 216 | JKCEMENT 217 | TIMKEN 218 | GRINDWELL 219 | SUZLON 220 | DEVYANI 221 | ENDURANCE 222 | PEL 223 | HATSUN 224 | GLAXO 225 | ZFCVINDIA 226 | KEI 227 | MAHABANK 228 | RELAXO 229 | PSB 230 | KAJARIACER 231 | CARBORUNIV 232 | KPRMILL 233 | NAVINFLUOR 234 | PRESTIGE 235 | BATAINDIA 236 | IIFL 237 | BDL 238 | EXIDEIND 239 | GLENMARK 240 | LICHSGFIN 241 | ZEEL 242 | NH 243 | RAMCOCEM 244 | SUNTV 245 | BAYERCROP 246 | ATUL 247 | SUMICHEM 248 | CREDITACC 249 | ISEC 250 | GLAND 251 | ABFRL 252 | IPCALAB 253 | SJVN 254 | NIACL 255 | NAM-INDIA 256 | JBCHEPHARM 257 | INDIAMART 258 | LALPATHLAB 259 | MEDANTA 260 | FIVESTAR 261 | LAURUSLABS 262 | RADICO 263 | VINATIORGA 264 | CIEINDIA 265 | CROMPTON 266 | EMAMILTD 267 | 360ONE 268 | WHIRLPOOL 269 | RATNAMANI 270 | GILLETTE 271 | IDFC 272 | MINDSPACE 273 | AJANTPHARM 274 | KALYANKJIL 275 | TATAMTRDVR 276 | POWERINDIA 277 | ELGIEQUIP 278 | PFIZER 279 | NXST 280 | CHOLAHLDNG 281 | BLUEDART 282 | AARTIIND 283 | TANLA 284 | TRIDENT 285 | NATIONALUM 286 | PNBHOUSING 287 | JBMA 288 | CGCL 289 | NLCINDIA 290 | CYIENT 291 | TTML 292 | GODREJIND 293 | GSPL 294 | KEC 295 | SANOFI 296 | IRB 297 | FINCABLES 298 | BLUESTARCO 299 | ASTERDM 300 | RAJESHEXPO 301 | MRPL 302 | KIMS 303 | CENTURYPLY 304 | LAXMIMACH 305 | PVRINOX 306 | SONATSOFTW 307 | BAJAJELEC 308 | FINEORG 309 | TEJASNET 310 | HAPPSTMNDS 311 | APARINDS 312 | REDINGTON 313 | DCMSHRIRAM 314 | NATCOPHARM 315 | CLEAN 316 | AFFLE 317 | WESTLIFE 318 | EIHOTEL 319 | ANGELONE 320 | ASAHIINDIA 321 | APLLTD 322 | APTUS 323 | CASTROLIND 324 | RBLBANK 325 | AETHER 326 | BRIGADE 327 | NSLNISP 328 | TRITURBINE 329 | NUVOCO 330 | AEGISCHEM 331 | GRINFRA 332 | PPLPHARMA 333 | AAVAS 334 | RHIM 335 | ALKYLAMINE 336 | CDSL 337 | SUVENPHAR 338 | VGUARD 339 | AKZOINDIA 340 | JINDALSAW 341 | HUDCO 342 | RAYMOND 343 | TATAINVEST 344 | SFL 345 | FINPIPE 346 | KIOCL 347 | HINDCOPPER 348 | BIKAJI 349 | DATAPATTNS 350 | BASF 351 | CAMS 352 | MEDPLUS 353 | RAINBOW 354 | ABSLAMC 355 | CHAMBLFERT 356 | CANFINHOME 357 | IEX 358 | MOTILALOFS 359 | ZENSARTECH 360 | RITES 361 | MANAPPURAM 362 | GESHIP 363 | TTKPRESTIG 364 | POLYMED 365 | EQUITASBNK 366 | CENTURYTEX 367 | AMARAJABAT 368 | BSOFT 369 | VTL 370 | ANURAS 371 | MGL 372 | OLECTRA 373 | KAYNES 374 | ITI 375 | KARURVYSYA 376 | UTIAMC 377 | ERIS 378 | WELSPUNIND 379 | BSE 380 | SUNCLAYLTD 381 | USHAMART 382 | RENUKA 383 | CESC 384 | CERA 385 | SHYAMMETL 386 | CEATLTD 387 | FSL 388 | CUB 389 | CRAFTSMAN 390 | GALAXYSURF 391 | ASTRAZEN 392 | CAMPUS 393 | CHALET 394 | ZYDUSWELL 395 | GODREJAGRO 396 | ROUTE 397 | BIRLACORPN 398 | GNFC 399 | KPIL 400 | SAPPHIRE 401 | PNCINFRA 402 | HFCL 403 | JYOTHYLAB 404 | BLS 405 | BIRET 406 | NCC 407 | COCHINSHIP 408 | IRCON 409 | INGERRAND 410 | KRBL 411 | ECLERX 412 | INTELLECT 413 | SHOPERSTOP 414 | PGHL 415 | SAREGAMA 416 | GODFRYPHLP 417 | VIPIND 418 | SPLPETRO 419 | WELCORP 420 | UJJIVANSFB 421 | CCL 422 | EIDPARRY 423 | SYRMA 424 | ELECON 425 | MCX 426 | RKFORGE 427 | GRAPHITE 428 | BALRAMCHIN 429 | IONEXCHANG 430 | LATENTVIEW 431 | MAPMYINDIA 432 | GLS 433 | JKLAKSHMI 434 | GPIL 435 | GRANULES 436 | BBTC 437 | PRAJIND 438 | KSB 439 | ENGINERSIN 440 | JWL 441 | ALOKINDS 442 | AMBER 443 | DEEPAKFERT 444 | MAHLIFE 445 | SPARC 446 | NBCC 447 | ALLCARGO 448 | TITAGARH 449 | EASEMYTRIP 450 | ACE 451 | MHRIL 452 | LEMONTREE 453 | SAFARI 454 | MINDACORP 455 | J&KBANK 456 | HOMEFIRST 457 | INDIGOPNTS 458 | EPL 459 | METROPOLIS 460 | BALAMINES 461 | ESABINDIA 462 | JMFINANCIL 463 | TEGA 464 | BEML 465 | PRINCEPIPE 466 | TV18BRDCST 467 | SWSOLAR 468 | GRSE 469 | CHEMPLASTS 470 | KNRCON 471 | KIRLFER 472 | TMB 473 | SCHNEIDER 474 | JUSTDIAL 475 | RUSTOMJEE 476 | LXCHEM 477 | GSFC 478 | TRIVENI 479 | CHENNPETRO 480 | MASTEK 481 | GMMPFAUDLR 482 | MAHSCOOTER 483 | BORORENEW 484 | ACI 485 | GET&D 486 | KTKBANK 487 | HNDFDS 488 | MTARTECH 489 | VRLLOG 490 | JUBLINGREA 491 | CAPLIPOINT 492 | KFINTECH 493 | INDIACEM 494 | JINDWORLD 495 | QUESS 496 | MAHSEAMLES 497 | ANANTRAJ 498 | GARFIBRES 499 | RCF 500 | HEG 501 | SARDAEN 502 | FUSION 503 | GOCOLORS 504 | HSCL 505 | SIS 506 | NETWORK18 507 | PRSMJOHNSN 508 | SYMPHONY 509 | HGINFRA 510 | ROLEXRINGS 511 | STLTECH 512 | JKTYRE 513 | GREENLAM 514 | SWANENERGY 515 | KIRLOSENG 516 | JUBLPHARMA 517 | PCBL 518 | SUPRAJIT 519 | GAEL 520 | GPPL 521 | RPOWER 522 | CMSINFO 523 | TCI 524 | GMDCLTD 525 | NEWGEN 526 | STARCEMENT 527 | POWERMECH 528 | TCIEXP 529 | MIDHANI 530 | RELINFRA 531 | IBULHSGFIN 532 | DAAWAT 533 | KENNAMET 534 | VSTIND 535 | VAIBHAVGBL 536 | HGS 537 | VESUVIUS 538 | FDC 539 | RBA 540 | RAIN 541 | SUNTECK 542 | RTNINDIA 543 | KIRLOSBROS 544 | AVANTIFEED 545 | JKPAPER 546 | INOXWIND 547 | RELIGARE 548 | BCG 549 | RSYSTEMS 550 | SOBHA 551 | ICRA 552 | UJJIVAN 553 | ISGEC 554 | PTCIL 555 | ZENTEC 556 | SPANDANA 557 | PARADEEP 558 | LAOPALA 559 | VARROC 560 | RESPONIND 561 | MMTC 562 | CSBBANK 563 | DELTACORP 564 | TECHNOE 565 | ORIENTELEC 566 | JSWHL 567 | GHCL 568 | RAILTEL 569 | MARKSANS 570 | BECTORFOOD 571 | BOROLTD 572 | GUJALKALI 573 | SHRIPISTON 574 | SANSERA 575 | IDEAFORGE 576 | GENUSPOWER 577 | NAVA 578 | ROSSARI 579 | RATEGAIN 580 | AARTIDRUGS 581 | VOLTAMP 582 | PRUDENT 583 | HBLPOWER 584 | SHARDACROP 585 | TATACOFFEE 586 | VIJAYA 587 | SCI 588 | AHLUCONT 589 | DODLA 590 | EDELWEISS 591 | PDSL 592 | GRAVITA 593 | NESCO 594 | HCG 595 | HLEGLAS 596 | LUXIND 597 | VMART 598 | ARVINDFASN 599 | ANANDRATHI 600 | JAMNAAUTO 601 | NAZARA 602 | SURYAROSNI 603 | SOUTHBANK 604 | PRIVISCL 605 | GREENPANEL 606 | MANINFRA 607 | AMIORG 608 | AGI 609 | RALLIS 610 | NEULANDLAB 611 | KKCL 612 | TEAMLEASE 613 | MASFIN 614 | AVALON 615 | HINDWAREAP 616 | EMIL 617 | KIRLPNU 618 | ICIL 619 | IRBINVIT 620 | DBCORP 621 | DREAMFOLKS 622 | JPPOWER 623 | SULA 624 | SBCL 625 | POLYPLEX 626 | SHAREINDIA 627 | HARSHA 628 | MFL 629 | INFIBEAM 630 | TIIL 631 | STAR 632 | THOMASCOOK 633 | TDPOWERSYS 634 | CYIENTDLM 635 | HEIDELBERG 636 | NEOGEN 637 | RAJRATAN 638 | BHARATRAS 639 | DCBBANK 640 | EMUDHRA 641 | MOIL 642 | SUNFLAG 643 | TIPSINDLTD 644 | JTEKTINDIA 645 | HIKAL 646 | GANESHHOUC 647 | GATEWAY 648 | LGBBROSLTD 649 | TINPLATE 650 | NILKAMAL 651 | TATVA 652 | IBREALEST 653 | SSWL 654 | PATELENG 655 | DISHTV 656 | ARVIND 657 | SHANTIGEAR 658 | DBL 659 | NOCIL 660 | DHANUKA 661 | ASTRAMICRO 662 | WOCKPHARMA 663 | CHOICEIN 664 | PFOCUS 665 | NFL 666 | ETHOSLTD 667 | WELENT 668 | MOLDTKPAC 669 | TASTYBITE 670 | GLOBUSSPR 671 | BANARISUG 672 | FORCEMOT 673 | IFBIND 674 | ADVENZYMES 675 | PGEL 676 | ELECTCAST 677 | SAKSOFT 678 | PRICOLLTD 679 | SUDARSCHEM 680 | AUTOAXLES 681 | DATAMATICS 682 | PTC 683 | TI 684 | NAVNETEDUL 685 | JAICORPLTD 686 | GOKEX 687 | MAITHANALL 688 | TATASTLLP 689 | BBOX 690 | WABAG 691 | KSCL 692 | KIRLOSIND 693 | GOODYEAR 694 | WSTCSTPAPR 695 | IKIO 696 | GREAVESCOT 697 | WONDERLA 698 | TARSONS 699 | UFLEX 700 | BSHSL 701 | FCL 702 | JTLIND 703 | DALMIASUG 704 | SOMANYCERA 705 | TIMETECHNO 706 | THYROCARE 707 | GABRIEL 708 | BAJAJCON 709 | INDOCO 710 | AARTIPHARM 711 | ITDCEM 712 | APOLLOPIPE 713 | HEMIPROP 714 | KPIGREEN 715 | KOVAI 716 | LANDMARK 717 | MAHLOG 718 | HCC 719 | NUCLEUS 720 | RAMKY 721 | ORIENTCEM 722 | JAYNECOIND 723 | UNIPARTS 724 | RAJRILTD 725 | MAXVIL 726 | MSTCLTD 727 | HINDOILEXP 728 | APCOTEXIND 729 | ITDC 730 | SUBROS 731 | ORCHPHARMA 732 | KOLTEPATIL 733 | JCHAC 734 | STYLAMIND 735 | IFCI 736 | JINDALPOLY 737 | TEXRAIL 738 | SHILPAMED 739 | DIVGIITTS 740 | MBAPL 741 | HATHWAY 742 | SAGCEM 743 | IWEL 744 | VENKEYS 745 | DYNAMATECH 746 | UNICHEMLAB 747 | TATAMETALI 748 | DBREALTY 749 | RTNPOWER 750 | PARAS 751 | PSPPROJECT 752 | TCNSBRANDS 753 | BARBEQUE 754 | BESTAGRO 755 | SIYSIL 756 | ASHOKA 757 | VSTTILLERS 758 | DCXINDIA 759 | JISLJALEQS 760 | SDBL 761 | IPL 762 | JKIL 763 | ASTEC 764 | FIEMIND 765 | VINDHYATEL 766 | ISMTLTD 767 | HERITGFOOD 768 | LUMAXTECH 769 | SANGHVIMOV 770 | GRAUWEIL 771 | SHARDAMOTR 772 | EXPLEOSOL 773 | EVEREADY 774 | CAMLINFINE 775 | DCAL 776 | SWARAJENG 777 | VENUSPIPES 778 | GULFOILLUB 779 | BAJAJHIND 780 | FINOPB 781 | UGROCAP 782 | CARTRADE 783 | TVSSRICHAK 784 | BOMDYEING 785 | ADFFOODS 786 | THANGAMAYL 787 | JAGRAN 788 | BANCOINDIA 789 | PRECAM 790 | GUFICBIO 791 | PURVA 792 | ORISSAMINE 793 | KINGFA 794 | SANGHIIND 795 | IMAGICAA 796 | BALMLAWRIE 797 | GANECOS 798 | PAISALO 799 | INDOSTAR 800 | CIGNITITEC 801 | AURIONPRO 802 | KESORAMIND 803 | NRBBEARING 804 | PILANIINVS 805 | BEPL 806 | MAYURUNIQ 807 | MMFL 808 | CARERATING 809 | HIL 810 | SUNDARMHLD 811 | HONDAPOWER 812 | IOLCP 813 | SEQUENT 814 | CONFIPET 815 | TARC 816 | GREENPLY 817 | ASHIANA 818 | ACCELYA 819 | RUPA 820 | BBL 821 | DHANI 822 | BUTTERFLY 823 | VADILALIND 824 | MOL 825 | VISHNU 826 | SANDHAR 827 | SOTL 828 | FMGOETZE 829 | STYRENIX 830 | GRWRHITECH 831 | DOLLAR 832 | SHALBY 833 | ATFL 834 | WENDT 835 | MANORAMA 836 | GNA 837 | HUHTAMAKI 838 | MPSLTD 839 | ALEMBICLTD 840 | IIFLSEC 841 | AMRUTANJAN 842 | LUMAXIND 843 | JPASSOCIAT 844 | VIDHIING 845 | APTECHT 846 | SIRCA 847 | TIRUMALCHM 848 | DIAMONDYD 849 | NOVARTIND 850 | ANUP 851 | SUPRIYA 852 | REPCOHOME 853 | DHAMPURSUG 854 | SURYODAY 855 | GATI 856 | INDIAGLYCO 857 | CARYSIL 858 | SESHAPAPER 859 | TIDEWATER 860 | NELCO 861 | GOCLCORP 862 | IMFA 863 | SEAMECLTD 864 | OPTIEMUS 865 | WHEELS 866 | KDDL 867 | MUKANDLTD 868 | SUBEXLTD 869 | CENTUM 870 | TTKHLTCARE 871 | AXISCADES 872 | JAIBALAJI 873 | RAMASTEEL 874 | ARMANFIN 875 | SPICEJET 876 | SJS 877 | PARAGMILK 878 | PANAMAPET 879 | DWARKESH 880 | COSMOFIRST 881 | INOXGREEN 882 | GALLANTT 883 | ARTEMISMED 884 | NACLIND 885 | SKIPPER 886 | MONTECARLO 887 | SERVOTECH 888 | DEN 889 | FOSECOIND 890 | JSWISPL 891 | XPROINDIA 892 | HARIOMPIPE 893 | SHANKARA 894 | ADORWELD 895 | PRECWIRE 896 | ANDHRAPAP 897 | SALASAR 898 | VAKRANGEE 899 | PIXTRANS 900 | FILATEX 901 | KSL 902 | KUANTUM 903 | TAJGVK 904 | RPGLIFE 905 | CAPACITE 906 | CANTABIL 907 | GIPCL 908 | SEPC 909 | RANEHOLDIN 910 | ROSSELLIND 911 | SATIN 912 | ORIENTHOT 913 | AHL 914 | GOKULAGRO 915 | SHK 916 | EIHAHOTELS 917 | UNIVCABLES 918 | FAIRCHEMOR 919 | SMLISUZU 920 | PRAKASH 921 | VSSL 922 | GTPL 923 | ARVSMART 924 | ANDHRSUGAR 925 | SANGAMIND 926 | STOVEKRAFT 927 | IGARASHI 928 | RAMCOIND 929 | HESTERBIO 930 | MOREPENLAB 931 | KABRAEXTRU 932 | NDTV 933 | MARATHON 934 | REFEX 935 | TCPLPACK 936 | KCP 937 | HARDWYN 938 | SASKEN 939 | JASH 940 | STEELXIND 941 | RIIL 942 | -------------------------------------------------------------------------------- /py/ai/fininsightgpt/src/document_processor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Document Processor Module 3 | 4 | This module handles the conversion of various file formats to markdown text. 5 | Supported formats: txt, pdf, docx, pptx, xlsx, images 6 | """ 7 | 8 | import os 9 | import re 10 | import logging 11 | import base64 12 | import json 13 | from pathlib import Path 14 | from typing import Dict, List, Optional, Tuple, Any 15 | import datetime 16 | 17 | # Load environment variables from .env file 18 | try: 19 | from dotenv import load_dotenv 20 | load_dotenv() # Load variables from .env 21 | ENV_LOADED = True 22 | except ImportError: 23 | ENV_LOADED = False 24 | logging.warning("dotenv not found, environment variables must be set manually") 25 | 26 | # Configure logging 27 | logging.basicConfig( 28 | level=logging.INFO, 29 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' 30 | ) 31 | logger = logging.getLogger(__name__) 32 | 33 | # Get model IDs and config from environment variables 34 | OPENAI_VISION_MODEL = os.environ.get("OPENAI_VISION_MODEL", "gpt-4-vision-preview") 35 | OPENAI_TEXT_MODEL = os.environ.get("OPENAI_TEXT_MODEL", "gpt-4-turbo") 36 | # Flag to enable/disable LLM prompt logging (default: enabled) 37 | ENABLE_LOGGING = os.environ.get("ENABLE_LLM_LOGGING", "true").lower() == "true" 38 | 39 | # Try to import optional dependencies, with graceful fallbacks 40 | try: 41 | import fitz # PyMuPDF 42 | PDF_EXTRACTOR = "pymupdf" 43 | except ImportError: 44 | PDF_EXTRACTOR = None 45 | logger.warning("PyMuPDF not found. PDF extraction will be limited.") 46 | 47 | try: 48 | import docx 49 | DOCX_AVAILABLE = True 50 | except ImportError: 51 | DOCX_AVAILABLE = False 52 | logger.warning("python-docx not found. DOCX extraction will be unavailable.") 53 | 54 | try: 55 | from pptx import Presentation 56 | PPTX_AVAILABLE = True 57 | except ImportError: 58 | PPTX_AVAILABLE = False 59 | logger.warning("python-pptx not found. PPTX extraction will be unavailable.") 60 | 61 | try: 62 | import pandas as pd 63 | PANDAS_AVAILABLE = True 64 | except ImportError: 65 | PANDAS_AVAILABLE = False 66 | logger.warning("pandas not found. XLSX extraction will be unavailable.") 67 | 68 | try: 69 | from PIL import Image 70 | import pytesseract 71 | OCR_AVAILABLE = True 72 | except ImportError: 73 | OCR_AVAILABLE = False 74 | logger.warning("PIL or pytesseract not found. OCR will be unavailable.") 75 | 76 | try: 77 | import openai 78 | OPENAI_AVAILABLE = True 79 | except ImportError: 80 | OPENAI_AVAILABLE = False 81 | logger.warning("OpenAI library not found. Advanced image analysis will be unavailable.") 82 | 83 | 84 | def extract_from_txt(file_path: str) -> str: 85 | """Extract text from a plain text file. 86 | 87 | Args: 88 | file_path: Path to the text file 89 | 90 | Returns: 91 | Extracted text content 92 | """ 93 | try: 94 | with open(file_path, 'r', encoding='utf-8') as f: 95 | return f.read() 96 | except UnicodeDecodeError: 97 | # Try with different encodings if utf-8 fails 98 | try: 99 | with open(file_path, 'r', encoding='latin-1') as f: 100 | return f.read() 101 | except Exception as e: 102 | logger.error(f"Error reading text file {file_path}: {str(e)}") 103 | return f"ERROR: Could not read {file_path} due to encoding issues." 104 | 105 | 106 | def extract_from_pdf(file_path: str) -> str: 107 | """Extract text from a PDF file. 108 | 109 | Args: 110 | file_path: Path to the PDF file 111 | 112 | Returns: 113 | Extracted text content 114 | """ 115 | if PDF_EXTRACTOR == "pymupdf": 116 | try: 117 | text_content = [] 118 | with fitz.open(file_path) as doc: 119 | for page_num, page in enumerate(doc): 120 | text = page.get_text() 121 | text_content.append(f"# Page {page_num + 1}\n\n{text}\n\n") 122 | return "\n".join(text_content) 123 | except Exception as e: 124 | logger.error(f"Error extracting text from PDF {file_path}: {str(e)}") 125 | return f"ERROR: Could not extract text from {file_path}." 126 | else: 127 | logger.error("No PDF extraction library available") 128 | return "ERROR: PDF extraction requires PyMuPDF. Please install with: pip install pymupdf" 129 | 130 | 131 | def extract_from_docx(file_path: str) -> str: 132 | """Extract text from a DOCX file. 133 | 134 | Args: 135 | file_path: Path to the DOCX file 136 | 137 | Returns: 138 | Extracted text content 139 | """ 140 | if not DOCX_AVAILABLE: 141 | return "ERROR: DOCX extraction requires python-docx. Please install with: pip install python-docx" 142 | 143 | try: 144 | doc = docx.Document(file_path) 145 | full_text = [] 146 | 147 | for para in doc.paragraphs: 148 | full_text.append(para.text) 149 | 150 | # Add tables 151 | for table in doc.tables: 152 | for row in table.rows: 153 | row_text = " | ".join([cell.text for cell in row.cells]) 154 | full_text.append(f"| {row_text} |") 155 | 156 | return "\n\n".join(full_text) 157 | except Exception as e: 158 | logger.error(f"Error extracting text from DOCX {file_path}: {str(e)}") 159 | return f"ERROR: Could not extract text from {file_path}." 160 | 161 | 162 | def extract_from_pptx(file_path: str) -> str: 163 | """Extract text from a PPTX file. 164 | 165 | Args: 166 | file_path: Path to the PPTX file 167 | 168 | Returns: 169 | Extracted text content 170 | """ 171 | if not PPTX_AVAILABLE: 172 | return "ERROR: PPTX extraction requires python-pptx. Please install with: pip install python-pptx" 173 | 174 | try: 175 | presentation = Presentation(file_path) 176 | text_content = [] 177 | 178 | for slide_num, slide in enumerate(presentation.slides): 179 | slide_text = [] 180 | slide_text.append(f"# Slide {slide_num + 1}") 181 | 182 | for shape in slide.shapes: 183 | if hasattr(shape, "text") and shape.text.strip(): 184 | slide_text.append(shape.text) 185 | 186 | text_content.append("\n\n".join(slide_text)) 187 | 188 | return "\n\n---\n\n".join(text_content) 189 | except Exception as e: 190 | logger.error(f"Error extracting text from PPTX {file_path}: {str(e)}") 191 | return f"ERROR: Could not extract text from {file_path}." 192 | 193 | 194 | def extract_from_xlsx(file_path: str) -> str: 195 | """Extract data from an Excel file. 196 | 197 | Args: 198 | file_path: Path to the Excel file 199 | 200 | Returns: 201 | Extracted data as markdown tables 202 | """ 203 | if not PANDAS_AVAILABLE: 204 | return "ERROR: Excel extraction requires pandas. Please install with: pip install pandas openpyxl" 205 | 206 | try: 207 | result = [] 208 | # Read all sheets 209 | excel_file = pd.ExcelFile(file_path) 210 | 211 | for sheet_name in excel_file.sheet_names: 212 | df = pd.read_excel(file_path, sheet_name=sheet_name) 213 | 214 | # Convert to markdown table 215 | md_table = f"## Sheet: {sheet_name}\n\n" 216 | md_table += df.to_markdown(index=False) 217 | result.append(md_table) 218 | 219 | return "\n\n---\n\n".join(result) 220 | except Exception as e: 221 | logger.error(f"Error extracting data from Excel {file_path}: {str(e)}") 222 | return f"ERROR: Could not extract data from {file_path}." 223 | 224 | 225 | def log_llm_prompt( 226 | company_name: str, 227 | phase: str, 228 | section: str, 229 | messages: List[Dict[str, Any]], 230 | model: str, 231 | temperature: float, 232 | max_tokens: int, 233 | run_timestamp: Optional[str] = None 234 | ) -> None: 235 | """Log the prompt sent to the LLM. 236 | 237 | Args: 238 | company_name: Name of the company 239 | phase: Phase of processing (e.g., 'document_processing') 240 | section: Section being generated (e.g., 'image_analysis') 241 | messages: Messages sent to the LLM 242 | model: Model name 243 | temperature: Temperature setting 244 | max_tokens: Max tokens setting 245 | run_timestamp: Optional timestamp to use for the log filename. If provided, 246 | appends to an existing log file with this timestamp. 247 | """ 248 | if not ENABLE_LOGGING: 249 | logger.info("LLM logging is disabled. Skipping log entry.") 250 | return 251 | 252 | # Create logs directory 253 | company_logs_dir = Path(f"company_data/{company_name}/logs") 254 | company_logs_dir.mkdir(exist_ok=True, parents=True) 255 | 256 | # Generate timestamp for the log file or use provided one 257 | timestamp = run_timestamp if run_timestamp else datetime.datetime.now().strftime("%Y%m%d_%H%M%S") 258 | log_filename = f"{company_name}_{phase}_{timestamp}.log" 259 | log_path = company_logs_dir / log_filename 260 | 261 | # Prepare log entry 262 | log_entry = { 263 | "timestamp": datetime.datetime.now().isoformat(), 264 | "company": company_name, 265 | "phase": phase, 266 | "section": section, 267 | "model": model, 268 | "temperature": temperature, 269 | "max_tokens": max_tokens, 270 | "messages": messages 271 | } 272 | 273 | # Append to log file 274 | try: 275 | # Create file if it doesn't exist 276 | if not log_path.exists(): 277 | with open(log_path, 'w', encoding='utf-8') as f: 278 | f.write(f"# LLM Interaction Log for {company_name}\n") 279 | f.write(f"# Phase: {phase}\n") 280 | f.write(f"# Created: {timestamp}\n\n") 281 | 282 | # Append log entry 283 | with open(log_path, 'a', encoding='utf-8') as f: 284 | f.write(f"\n## {section} - {datetime.datetime.now().isoformat()}\n") 285 | f.write(json.dumps(log_entry, indent=2)) 286 | f.write("\n\n---\n\n") 287 | 288 | logger.info(f"Logged LLM prompt for {company_name}/{phase}/{section} to {log_path}") 289 | except Exception as e: 290 | logger.error(f"Failed to log LLM prompt: {str(e)}") 291 | 292 | 293 | def extract_from_image(file_path: str, run_timestamp: Optional[str] = None) -> str: 294 | """Extract text from an image using OCR. 295 | 296 | Args: 297 | file_path: Path to the image file 298 | run_timestamp: Optional timestamp for consistent log file naming 299 | 300 | Returns: 301 | Extracted text content 302 | """ 303 | if not OCR_AVAILABLE: 304 | return "ERROR: Image extraction requires Pillow and pytesseract. Please install with: pip install Pillow pytesseract" 305 | 306 | try: 307 | image = Image.open(file_path) 308 | text = pytesseract.image_to_string(image) 309 | 310 | # Use OpenAI for better image understanding if available 311 | if OPENAI_AVAILABLE and os.environ.get("OPENAI_API_KEY"): 312 | try: 313 | # Get company name from file path 314 | file_path_obj = Path(file_path) 315 | company_name = file_path_obj.parent.name 316 | 317 | # Create a timestamp for this processing run if not provided 318 | if run_timestamp is None: 319 | run_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") 320 | 321 | # Try to get a better description using OpenAI's vision capabilities 322 | client = openai.Client(api_key=os.environ["OPENAI_API_KEY"]) 323 | with open(file_path, "rb") as image_file: 324 | base_image = image_file.read() 325 | 326 | # Prepare messages 327 | messages = [ 328 | { 329 | "role": "user", 330 | "content": [ 331 | {"type": "text", "text": "Describe this image in detail, focusing on any financial data, charts, or business information visible."}, 332 | {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64.b64encode(base_image).decode('utf-8')}"}} 333 | ] 334 | } 335 | ] 336 | 337 | # Log the prompt 338 | log_llm_prompt( 339 | company_name=company_name, 340 | phase="document_processing", 341 | section=f"image_analysis_{file_path_obj.stem}", 342 | messages=messages, 343 | model=OPENAI_VISION_MODEL, # Use global variable 344 | temperature=0.3, 345 | max_tokens=300, 346 | run_timestamp=run_timestamp 347 | ) 348 | 349 | response = client.chat.completions.create( 350 | model=OPENAI_VISION_MODEL, # Use global variable 351 | messages=messages, 352 | temperature=0.3, 353 | max_tokens=300 354 | ) 355 | vision_description = response.choices[0].message.content 356 | return f"## OCR Text:\n\n{text}\n\n## Image Analysis:\n\n{vision_description}" 357 | except Exception as e: 358 | logger.warning(f"OpenAI vision processing failed: {str(e)}") 359 | return f"## OCR Text:\n\n{text}" 360 | else: 361 | return f"## OCR Text:\n\n{text}" 362 | except Exception as e: 363 | logger.error(f"Error extracting text from image {file_path}: {str(e)}") 364 | return f"ERROR: Could not extract text from {file_path}." 365 | 366 | 367 | def convert_to_markdown(file_path: str, run_timestamp: Optional[str] = None) -> Tuple[str, str]: 368 | """Convert various file formats to markdown text. 369 | 370 | Args: 371 | file_path: Path to the file 372 | run_timestamp: Optional timestamp for consistent log naming across a run 373 | 374 | Returns: 375 | Tuple of (markdown_content, file_name) 376 | """ 377 | file_path = Path(file_path) 378 | file_ext = file_path.suffix.lower()[1:] # Remove the dot 379 | file_name = file_path.stem 380 | 381 | content = f"# {file_name}\n\n" 382 | content += f"Source: {file_path}\n" 383 | content += f"Processed on: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n---\n\n" 384 | 385 | try: 386 | if file_ext == "txt": 387 | extracted = extract_from_txt(str(file_path)) 388 | elif file_ext == "pdf": 389 | extracted = extract_from_pdf(str(file_path)) 390 | elif file_ext == "docx": 391 | extracted = extract_from_docx(str(file_path)) 392 | elif file_ext == "pptx": 393 | extracted = extract_from_pptx(str(file_path)) 394 | elif file_ext in ["xlsx", "xls"]: 395 | extracted = extract_from_xlsx(str(file_path)) 396 | elif file_ext in ["jpg", "jpeg", "png", "gif", "bmp"]: 397 | extracted = extract_from_image(str(file_path), run_timestamp) 398 | else: 399 | extracted = f"Unsupported file format: {file_ext}" 400 | logger.warning(f"Unsupported file format: {file_ext}") 401 | 402 | content += extracted 403 | 404 | except Exception as e: 405 | logger.error(f"Error processing {file_path}: {str(e)}") 406 | content += f"ERROR: Failed to process file {file_path}. Exception: {str(e)}" 407 | 408 | return content, f"{file_name}.md" 409 | 410 | 411 | def process_company_folder(company_folder: str) -> List[Tuple[str, str]]: 412 | """Process all files in a company folder. 413 | 414 | Args: 415 | company_folder: Path to the company folder 416 | 417 | Returns: 418 | List of tuples (markdown_content, markdown_file_path) 419 | """ 420 | logger.info(f"Processing company folder: {company_folder}") 421 | company_path = Path(company_folder) 422 | 423 | if not company_path.exists() or not company_path.is_dir(): 424 | logger.error(f"Company folder does not exist: {company_folder}") 425 | return [] 426 | 427 | # Get company name from folder name 428 | company_name = company_path.name 429 | 430 | # Create output folders proactively 431 | output_folder = company_path / "processed" 432 | output_folder.mkdir(exist_ok=True) 433 | 434 | # Create logs directory proactively 435 | logs_folder = company_path / "logs" 436 | logs_folder.mkdir(exist_ok=True) 437 | logger.info(f"Ensured logs directory exists: {logs_folder}") 438 | 439 | # Create a single timestamp for this processing run 440 | run_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") 441 | 442 | results = [] 443 | 444 | # Process all files in the folder 445 | for file_path in company_path.glob("*"): 446 | if file_path.is_file() and not file_path.name.startswith('.') and not file_path.name.endswith('.md'): 447 | logger.info(f"Processing file: {file_path}") 448 | 449 | # Convert the file to markdown using the common run timestamp 450 | markdown_content, markdown_name = convert_to_markdown(str(file_path), run_timestamp) 451 | 452 | # Save the markdown file 453 | markdown_path = output_folder / markdown_name 454 | with open(markdown_path, 'w', encoding='utf-8') as f: 455 | f.write(markdown_content) 456 | 457 | results.append((markdown_content, str(markdown_path))) 458 | 459 | logger.info(f"Processed {len(results)} files for company: {company_name}") 460 | return results --------------------------------------------------------------------------------