├── src ├── chemgraph │ ├── __init__.py │ ├── models │ │ ├── __init__.py │ │ ├── atomsdata.py │ │ ├── graspa_input.py │ │ ├── calculators │ │ │ ├── emt_calc.py │ │ │ ├── aimnet2_calc.py │ │ │ ├── mopac_calc.py │ │ │ ├── psi4_calc.py │ │ │ ├── fairchem_calc.py │ │ │ └── nwchem_calc.py │ │ ├── supported_models.py │ │ └── agent_response.py │ ├── tools │ │ ├── __init__.py │ │ ├── files │ │ │ ├── __init__.py │ │ │ └── template │ │ │ │ ├── __init__.py │ │ │ │ ├── force_field.def │ │ │ │ ├── methane.def │ │ │ │ ├── N2.def │ │ │ │ ├── CO2.def │ │ │ │ ├── TIP4P.def │ │ │ │ └── simulation.input │ │ ├── local_model_loader.py │ │ ├── alcf_loader.py │ │ ├── generic_tools.py │ │ ├── cheminformatics_tools.py │ │ ├── anthropic_loader.py │ │ ├── groq_loader.py │ │ └── gemini_loader.py │ ├── utils │ │ ├── __init__.py │ │ ├── logging_config.py │ │ └── get_workflow_from_llm.py │ ├── state │ │ ├── multi_agent_state.py │ │ └── state.py │ ├── prompt │ │ ├── llama_prompt.py │ │ └── single_agent_prompt.py │ └── graphs │ │ └── mock_agent.py └── ui │ ├── __init__.py │ └── config.py ├── tests ├── __init__.py ├── conftest.py ├── test_llm_agent.py └── test_calculators.py ├── docs ├── license.md ├── code_formatting_and_linting.md ├── citation.md ├── acknowledgements.md ├── index.md ├── project_structure.md ├── example_usage.md ├── running_local_models.md └── streamlit_web_interface.md ├── .pre-commit-config.yaml ├── scripts └── evaluations │ ├── generate_evaluation_data │ ├── Exp12 │ │ └── find_error.py │ ├── Exp6 │ │ └── manual_files │ │ │ ├── 2,3,3,3-tetrafluoropropanoic acid.xyz │ │ │ ├── (2E,4Z)-3-chlorohexa-2,4-dienedioate.xyz │ │ │ ├── 4-bromo-6,8-dioxabicyclo[3.2.1]octane.xyz │ │ │ ├── O-ethyl N-prop-2-enylcarbamothioate.xyz │ │ │ ├── 2-thiophen-2-yl-3,1-benzoxazin-4-one.xyz │ │ │ ├── 2-[4-(hydroxymethyl)phenoxy]acetic acid.xyz │ │ │ ├── 4-hydroxy-3-methyl-2-prop-2-enylcyclopent-2-en-1-one.xyz │ │ │ ├── 2-ethyl-4-phenyl-1,3-thiazole.xyz │ │ │ ├── 6-pyridin-2-ylpyridine-3-sulfonic acid.xyz │ │ │ ├── 5-(5-fluoro-2-methoxyphenyl)-1H-pyrazol-3-amine.xyz │ │ │ ├── 2-(5-chloropyridin-2-yl)-1H-quinolin-4-one.xyz │ │ │ ├── 2-[difluoromethyl(propan-2-yloxy)phosphoryl]oxypropane.xyz │ │ │ ├── 12,16-dioxatetracyclo[11.2.1.02,11.03,8]hexadeca-2(11),3,5,7,9-pentaene.xyz │ │ │ ├── 3-(4-methylphenyl)-5-pyridin-4-yl-1,2,4-oxadiazole.xyz │ │ │ ├── (E)-1-(5-bromo-2-hydroxyphenyl)-3-(4-fluorophenyl)prop-2-en-1-one.xyz │ │ │ ├── 1-benzyl-5-nitroindole.xyz │ │ │ ├── 2-[(3,4-dichlorophenyl)methyl-(2-hydroxyethyl)amino]ethanol.xyz │ │ │ ├── 7-methoxy-1,2-dimethyl-9H-pyrido[3,4-b]indol-2-ium.xyz │ │ │ ├── [(E)-(2-chloro-1-methylindol-3-yl)methylideneamino] 3-chlorobenzoate.xyz │ │ │ ├── N-benzyl-N-methyl-3,5-dinitrobenzamide.xyz │ │ │ └── N-butyl-N-ethyl-3-methyl-2-nitrobenzamide.xyz │ └── Exp11 │ │ └── manual_files │ │ ├── C(C(C(=O)O)O)S.xyz │ │ ├── CC(COC)O.xyz │ │ ├── COC(=O)NS(=O)(=O)OC.xyz │ │ ├── CC1=C(C=C(C=C1Cl)[N+](=O)[O-])Cl.xyz │ │ ├── CCOC(C(F)(F)F)(C(F)(F)F)O.xyz │ │ ├── COC1=C(C=C(C=C1)C(=O)Cl)OC.xyz │ │ ├── C1=CC=C(C=C1)OC2=C(C(=O)C(C2(Cl)Cl)(Cl)Cl)Cl.xyz │ │ ├── C1=CC(=C(N=C1)Cl)C(=O)NC2=NC=C(C=C2)Cl.xyz │ │ ├── C1=CC2=CN(N=C2C=C1)C3=CC(=CC=C3)F.xyz │ │ ├── C1=CC=C(C=C1)N2N=C(N=N2)C3=CN=CC=C3.xyz │ │ ├── CCCCC1=C(C=C(S1)C)C.xyz │ │ ├── CN1C2=C(C=C(C=C2)F)SC1=NC(=O)C3=C(SC(=C3)Cl)Cl.xyz │ │ ├── C1CC(C1)(C2=CC=CC3=CC=CC=C32)O.xyz │ │ ├── CN1C2=C(C(=O)NC1=O)N(C(=S)N2)CCOC.xyz │ │ └── C1=CC=C(C(=C1)C2=NC3=C(C=CC(=C3)F)NC2=O)N.xyz │ ├── pubchempy │ └── get_molecule_from_pubchempy.py │ ├── run_llm_workflow │ ├── Exp12_from_reaction_to_enthalpy │ │ └── run_llm_workflow.py │ ├── Exp14_from_reaction_to_enthalpy_multiagent │ │ └── run_llm_workflow.py │ ├── Exp13_from_reaction_to_gibbs │ │ └── run_llm_workflow.py │ ├── Exp15_from_reaction_to_gibbs_multi_agent │ │ └── run_llm_workflow.py │ ├── Exp8_from_smiles_to_opt │ │ └── run_llm_workflow.py │ ├── Exp1_from_name_to_smiles │ │ └── run_llm_workflow.py │ ├── Exp10_from_smiles_to_gibbs │ │ └── run_llm_workflow.py │ └── Exp7_from_smiles_to_coords │ │ └── run_llm_workflow.py │ └── mock_llm │ └── mock_eval.py ├── .github └── workflows │ ├── tests.yml │ ├── ci.yml │ ├── doc_ci.yml │ └── conda-tests.yml ├── .gitignore ├── environment.yml ├── docker-compose.yml ├── Dockerfile ├── notebooks ├── Demo_infrared_spectrum.ipynb └── cif_files │ └── calf-20_pacmof.cif ├── config.toml ├── pyproject.toml ├── mkdocs.yml └── Dockerfile.arm /src/chemgraph/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/chemgraph/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/chemgraph/tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/chemgraph/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/chemgraph/tools/files/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/chemgraph/tools/files/template/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Empty file to make tests a package 2 | -------------------------------------------------------------------------------- /docs/license.md: -------------------------------------------------------------------------------- 1 | !!! info 2 | This project is licensed under the Apache 2.0 License. -------------------------------------------------------------------------------- /src/chemgraph/tools/files/template/force_field.def: -------------------------------------------------------------------------------- 1 | # rules to overwrite 2 | 0 3 | # number of defined interactions 4 | 0 5 | # mixing rules to overwrite 6 | 0 7 | -------------------------------------------------------------------------------- /src/ui/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | ChemGraph UI Package 3 | 4 | This package contains the user interface components for ChemGraph including 5 | the Streamlit web app and command-line interface. 6 | """ 7 | 8 | __version__ = "0.1.0" 9 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | # Ruff version. 4 | rev: v0.9.8 5 | hooks: 6 | # Run the linter. 7 | - id: ruff 8 | # Run the formatter. 9 | - id: ruff-format 10 | -------------------------------------------------------------------------------- /docs/code_formatting_and_linting.md: -------------------------------------------------------------------------------- 1 | This project uses [Ruff](https://github.com/astral-sh/ruff) for **both formatting and linting**. To ensure all code follows our style guidelines, install the pre-commit hook: 2 | 3 | ```sh 4 | pip install pre-commit 5 | pre-commit install 6 | ``` -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp12/find_error.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | 4 | with open(sys.argv[1], "r") as rf: 5 | data = json.load(rf) 6 | 7 | for item in data: 8 | if "ERROR" in data[item]["manual_workflow"]["result"]['value']: 9 | print(item) 10 | -------------------------------------------------------------------------------- /src/chemgraph/state/multi_agent_state.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict, Annotated 2 | from langgraph.graph import add_messages 3 | 4 | 5 | class ManagerWorkerState(TypedDict): 6 | messages: Annotated[list, add_messages] 7 | worker_result: Annotated[list, add_messages] 8 | current_task_index: int 9 | task_list: list 10 | worker_channel: dict[str, Annotated[list[str], add_messages]] 11 | current_worker: str 12 | -------------------------------------------------------------------------------- /docs/citation.md: -------------------------------------------------------------------------------- 1 | If you use ChemGraph in your research, please cite our work: 2 | 3 | ```bibtex 4 | @article{pham2025chemgraph, 5 | title={ChemGraph: An Agentic Framework for Computational Chemistry Workflows}, 6 | author={Pham, Thang D and Tanikanti, Aditya and Keçeli, Murat}, 7 | journal={arXiv preprint arXiv:2506.06363}, 8 | year={2025} 9 | url={https://arxiv.org/abs/2506.06363} 10 | } 11 | ``` -------------------------------------------------------------------------------- /docs/acknowledgements.md: -------------------------------------------------------------------------------- 1 | !!! info 2 | This research used resources of the Argonne Leadership Computing Facility, a U.S. 3 | Department of Energy (DOE) Office of Science user facility at Argonne National 4 | Laboratory and is based on research supported by the U.S. DOE Office of Science- 5 | Advanced Scientific Computing Research Program, under Contract No. DE-AC02- 6 | 06CH11357. Our work leverages ALCF Inference Endpoints, which provide a robust API 7 | for LLM inference on ALCF HPC clusters via Globus Compute. We are thankful to Serkan 8 | Altuntaş for his contributions to the user interface of ChemGraph and for insightful 9 | discussions on AIOps. -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | !!! info "ChemGraph" 2 | 3 | **ChemGraph** is an agentic framework that can automate molecular simulation workflows using large language models (LLMs). Built on top of `LangGraph` and `ASE`, ChemGraph allows users to perform complex computational chemistry tasks, from structure generation to thermochemistry calculations, with a natural language interface. 4 | 5 | !!! info "ChemGraph" 6 | 7 | ChemGraph supports diverse simulation backends, including ab initio quantum chemistry methods (e.g. coupled-cluster, DFT via NWChem, ORCA), semi-empirical methods (e.g., XTB via TBLite), and machine learning potentials (e.g, MACE, UMA) through a modular integration with `ASE`. -------------------------------------------------------------------------------- /docs/project_structure.md: -------------------------------------------------------------------------------- 1 | ``` 2 | chemgraph/ 3 | │ 4 | ├── src/ # Source code 5 | │ ├── chemgraph/ # Top-level package 6 | │ │ ├── agent/ # Agent-based task management 7 | │ │ ├── graphs/ # Workflow graph utilities 8 | │ │ ├── models/ # Different Pydantic models 9 | │ │ ├── prompt/ # Agent prompt 10 | │ │ ├── state/ # Agent state 11 | │ │ ├── tools/ # Tools for molecular simulations 12 | │ │ ├── utils/ # Other utility functions 13 | │ 14 | ├── pyproject.toml # Project configuration 15 | └── README.md # Project documentation 16 | ``` -------------------------------------------------------------------------------- /src/chemgraph/tools/files/template/methane.def: -------------------------------------------------------------------------------- 1 | # critical constants: Temperature [T], Pressure [Pa], and Acentric factor [-] 2 | 190.564 3 | 4599200.0 4 | 0.01142 5 | # Number Of Atoms 6 | 1 7 | # Number Of Groups 8 | 1 9 | # Alkane-group 10 | rigid 11 | # number of atoms 12 | 1 13 | # atomic positions 14 | 0 CH4_sp3 0.0 0.0 0.0 15 | # Chiral centers Bond BondDipoles Bend UrayBradley InvBend Torsion Imp. Torsion Bond/Bond Bond/Bend Bend/Bend Bond/Torsion Bend/Torsion IntraVDW Intra ch-ch Intra ch-bd Intra bd-bd 16 | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 17 | # Number of config moves 18 | 0 -------------------------------------------------------------------------------- /src/chemgraph/state/state.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict, Annotated 2 | from langgraph.graph import add_messages 3 | from langgraph.managed.is_last_step import RemainingSteps 4 | 5 | 6 | class State(TypedDict): 7 | messages: Annotated[list, add_messages] 8 | remaining_steps: RemainingSteps 9 | 10 | 11 | class MultiAgentState(TypedDict): 12 | question: str 13 | first_router_response: Annotated[list, add_messages] 14 | regular_response: Annotated[list, add_messages] 15 | feedback_response: Annotated[list, add_messages] 16 | geometry_response: Annotated[list, add_messages] 17 | parameter_response: Annotated[list, add_messages] 18 | opt_response: Annotated[list, add_messages] 19 | end_response: Annotated[list, add_messages] 20 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import warnings 3 | from ase import Atoms 4 | 5 | # Configure pytest-asyncio 6 | #pytest_plugins = ("pytest_asyncio",) 7 | 8 | 9 | @pytest.fixture(autouse=True) 10 | def setup_test_env(): 11 | """Setup any test environment variables or configurations needed""" 12 | # Filter numpy deprecation warnings 13 | warnings.filterwarnings( 14 | "ignore", 15 | message="In future, it will be an error for 'np.bool_' scalars to be interpreted as an index", 16 | category=DeprecationWarning, 17 | ) 18 | pass 19 | 20 | 21 | @pytest.fixture 22 | def simple_h2_molecule(): 23 | """Fixture providing a simple H2 molecule for testing""" 24 | return Atoms("H2", positions=[[0, 0, 0], [0, 0, 1]]) 25 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: ["3.10", "3.11", "3.12"] 15 | 16 | steps: 17 | - uses: actions/checkout@v4 18 | 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install -e . 28 | 29 | - name: Run tests 30 | run: | 31 | python -m pytest tests/ -v -------------------------------------------------------------------------------- /src/chemgraph/models/atomsdata.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | from typing import List, Optional, Union 3 | 4 | 5 | class AtomsData(BaseModel): 6 | """AtomsData object inherited from Pydantic BaseModel. Used to store atomic data (from ASE Atoms object or QCElemental Molecule object) that cannot be parsed via LLM Schema.""" 7 | 8 | numbers: List[int] = Field(..., description="Atomic numbers") 9 | positions: List[List[float]] = Field(..., description="Atomic positions") 10 | cell: Optional[Union[List[List[float]], None]] = Field( 11 | default=None, description="Cell vectors or None" 12 | ) 13 | pbc: Optional[Union[List[bool], None]] = Field( 14 | default=None, description="Periodic boundary conditions or None" 15 | ) 16 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/2,3,3,3-tetrafluoropropanoic acid.xyz: -------------------------------------------------------------------------------- 1 | 11 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -0.42921437 0.54576328 -0.18097090 4 | C 0.65590010 0.39948988 0.88313998 5 | O 0.44556271 0.05142040 2.02654375 6 | O 1.87921107 0.70368394 0.39568866 7 | C -0.76626418 -0.82752519 -0.77327532 8 | F -1.67797985 -0.69202319 -1.77310340 9 | F 0.36255084 -1.39606172 -1.30338990 10 | F -1.26672222 -1.66521933 0.16955693 11 | F -1.59824360 1.09397592 0.37648418 12 | H -0.09445923 1.21703144 -0.97603048 13 | H 2.48965873 0.56946458 1.15535650 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / cache files 2 | __pycache__/ 3 | *.py[cod] 4 | *.pyo 5 | *.pyd 6 | *.pyc 7 | **/__pycache__/ 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | eggs/ 15 | *.egg-info/ 16 | .installed.cfg 17 | *.egg 18 | 19 | # IDEs / editors 20 | .vscode/ 21 | .idea/ 22 | *.iml 23 | 24 | # Pytest cache 25 | .pytest_cache/ 26 | 27 | # MacOS 28 | .DS_Store 29 | 30 | # Jupyter Notebook checkpoints and modified notebooks 31 | .ipynb_checkpoints/ 32 | 33 | # Generated molecular structure files 34 | *.xyz 35 | # env 36 | chemgraph-env/ 37 | # env 38 | .env 39 | # Log files 40 | *run_logs/ 41 | *vib/ 42 | plots/ 43 | initial_evaluations/ 44 | test/ 45 | test_outputs/ 46 | *ir/ 47 | 48 | .venv 49 | combine* 50 | vllm/ 51 | logs/ 52 | error_log.txt 53 | .env 54 | test.csv 55 | nwchem/ 56 | nwchem.nwi 57 | nwchem.nwo 58 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp11/manual_files/C(C(C(=O)O)O)S.xyz: -------------------------------------------------------------------------------- 1 | 13 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C 1.00384778 -0.38523592 -0.54797190 4 | C 0.04914690 0.28959097 0.44763385 5 | C -1.39897450 -0.09286661 0.11887822 6 | O -2.01916183 -0.89337859 0.80273504 7 | O -1.89856412 0.52232032 -0.96933914 8 | O 0.34120627 -0.10532630 1.76844762 9 | S 2.72560723 0.06029128 -0.26856786 10 | H 0.68866902 -0.13608385 -1.57227673 11 | H 0.95454275 -1.47788978 -0.43084357 12 | H 0.16681909 1.37609294 0.32082756 13 | H -2.80171964 0.16101959 -1.09142179 14 | H -0.41176814 -0.67262040 2.02275157 15 | H 2.60034919 1.35408635 -0.60085285 16 | -------------------------------------------------------------------------------- /src/chemgraph/tools/files/template/N2.def: -------------------------------------------------------------------------------- 1 | # critical constants: Temperature [T], Pressure [Pa], and Acentric factor [-] 2 | 126.192 3 | 3395800.0 4 | 0.0372 5 | #Number Of Atoms 6 | 3 7 | # Number of groups 8 | 1 9 | # N2-group 10 | rigid 11 | # number of atoms 12 | 3 13 | # atomic positions 14 | 0 N_n2 0.0 0.0 0.55 15 | 1 N_com 0.0 0.0 0.0 16 | 2 N_n2 0.0 0.0 -0.55 17 | # Chiral centers Bond BondDipoles Bend UrayBradley InvBend Torsion Imp. Torsion Bond/Bond Stretch/Bend Bend/Bend Stretch/Torsion Bend/Torsion IntraVDW IntraCoulomb 18 | 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 19 | # Bond stretch: atom n1-n2, type, parameters 20 | 0 1 RIGID_BOND 21 | 1 2 RIGID_BOND 22 | # Number of config moves 23 | 0 24 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: chemgraph 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python=3.10 7 | - pip 8 | - numpy=2.2.6 9 | - pandas=2.2.3 10 | - pytest=8.4.1 11 | - rich=14.1.0 12 | - toml=0.10.2 13 | - tblite 14 | - nwchem 15 | - pip: 16 | - ase==3.25.0 17 | - rdkit==2025.3.3 18 | - langgraph==0.4.7 19 | - langchain-openai==0.3.27 20 | - langchain-ollama==0.3.4 21 | - langchain-anthropic==0.3.17 22 | - langchain-google-genai==2.1.7 23 | - langchain-experimental==0.3.4 24 | - pydantic==2.11.7 25 | - pubchempy @ git+https://github.com/keceli/PubChemPy.git@main 26 | - pyppeteer==2.0.0 27 | - numexpr==2.11.0 28 | - deepdiff==8.5.0 29 | - pymatgen==2025.3.10 30 | - mace-torch==0.3.13 31 | - streamlit==1.48.1 32 | - stmol==0.0.9 33 | - ipython-genutils==0.2.0 34 | - langsmith==0.3.45 35 | -------------------------------------------------------------------------------- /src/chemgraph/tools/files/template/CO2.def: -------------------------------------------------------------------------------- 1 | # critical constants: Temperature [T], Pressure [Pa], and Acentric factor [-] 2 | 304.1282 3 | 7377300.0 4 | 0.22394 5 | #Number Of Atoms 6 | 3 7 | # Number of groups 8 | 1 9 | # CO2-group 10 | rigid 11 | # number of atoms 12 | 3 13 | # atomic positions 14 | 0 O_co2 0.0 0.0 1.16 15 | 1 C_co2 0.0 0.0 0.0 16 | 2 O_co2 0.0 0.0 -1.16 17 | # Chiral centers Bond BondDipoles Bend UrayBradley InvBend Torsion Imp. Torsion Bond/Bond Stretch/Bend Bend/Bend Stretch/Torsion Bend/Torsion IntraVDW IntraCoulomb 18 | 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 19 | # Bond stretch: atom n1-n2, type, parameters 20 | 0 1 RIGID_BOND 21 | 1 2 RIGID_BOND 22 | # Number of config moves 23 | 0 24 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | on: 3 | push: 4 | branches: 5 | - master 6 | - main 7 | permissions: 8 | contents: write 9 | jobs: 10 | deploy: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - name: Configure Git Credentials 15 | run: | 16 | git config user.name github-actions[bot] 17 | git config user.email 41898282+github-actions[bot]@users.noreply.github.com 18 | - uses: actions/setup-python@v5 19 | with: 20 | python-version: 3.x 21 | - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 22 | - uses: actions/cache@v4 23 | with: 24 | key: mkdocs-material-${{ env.cache_id }} 25 | path: ~/.cache 26 | restore-keys: | 27 | mkdocs-material- 28 | - run: pip install mkdocs-material mkdocstrings 29 | - run: mkdocs gh-deploy --force -------------------------------------------------------------------------------- /.github/workflows/doc_ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | on: 3 | push: 4 | branches: 5 | - master 6 | - main 7 | permissions: 8 | contents: write 9 | jobs: 10 | deploy: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - name: Configure Git Credentials 15 | run: | 16 | git config user.name github-actions[bot] 17 | git config user.email 41898282+github-actions[bot]@users.noreply.github.com 18 | - uses: actions/setup-python@v5 19 | with: 20 | python-version: 3.x 21 | - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 22 | - uses: actions/cache@v4 23 | with: 24 | key: mkdocs-material-${{ env.cache_id }} 25 | path: ~/.cache 26 | restore-keys: | 27 | mkdocs-material- 28 | - run: pip install mkdocs-material mkdocstrings 29 | - run: mkdocs gh-deploy --force 30 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/(2E,4Z)-3-chlorohexa-2,4-dienedioate.xyz: -------------------------------------------------------------------------------- 1 | 14 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -0.64730736 0.09619728 -0.33420021 4 | C -1.80821817 -0.58266614 -0.15480842 5 | C -3.09305511 0.05431106 -0.62691524 6 | O -3.14716006 1.17966897 -1.17746243 7 | O -4.13916989 -0.63191938 -0.42927409 8 | C 0.69181070 -0.33036490 0.04320689 9 | C 1.74013497 0.50736498 -0.22733803 10 | C 3.22920399 0.21442251 0.10470721 11 | O 3.86137097 1.21920583 -0.29979015 12 | O 3.58026368 -0.82311811 0.65083607 13 | Cl 0.86979259 -1.86063274 0.80520889 14 | H -0.72356606 1.07414240 -0.81637298 15 | H -1.87870796 -1.56170131 0.30911345 16 | H 1.46460771 1.44508954 -0.71558121 17 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp11/manual_files/CC(COC)O.xyz: -------------------------------------------------------------------------------- 1 | 16 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -0.81962116 1.70764166 -0.06452879 4 | C -0.80183077 0.41645332 -0.86626935 5 | C -0.04037893 -0.71141845 -0.17098697 6 | O 1.32912270 -0.33050277 -0.02459644 7 | C 1.93364947 -1.01377730 1.10128185 8 | O -2.17574572 -0.00834329 -0.98496038 9 | H -1.38285981 2.48233917 -0.60525242 10 | H 0.21382505 2.04932411 0.10318951 11 | H -1.31441058 1.53469647 0.90262650 12 | H -0.35878195 0.58491092 -1.86857514 13 | H -0.11901067 -1.63098879 -0.77714979 14 | H -0.51420646 -0.91312095 0.80831435 15 | H 1.83881233 -2.11001251 1.00967694 16 | H 1.46716848 -0.69092466 2.04797261 17 | H 2.99702259 -0.74404813 1.12365620 18 | H -2.25275458 -0.62222881 -1.73439870 19 | -------------------------------------------------------------------------------- /src/chemgraph/tools/files/template/TIP4P.def: -------------------------------------------------------------------------------- 1 | # critical constants: Temperature [T], Pressure [Pa], and Acentric factor [-] 2 | 647.14 3 | 22064000.0 4 | -0.217000 5 | # total number Of atoms 6 | 4 7 | # Number of groups 8 | 1 9 | # water-group 10 | rigid 11 | # number of atoms 12 | 4 13 | # atomic positions 14 | 0 Ow 0.0 0.0 0.0 15 | 1 Lw 0.0 0.15 0.0 16 | 2 Hw 0.75695 0.58588 0.0 17 | 3 Hw -0.75695 0.58588 0.0 18 | # Chiral centers Bond BondDipoles Bend UrayBradley InvBend Torsion Imp. Torsion Bond/Bond Stretch/Bend Bend/Bend Stretch/Torsion Bend/Torsion IntraVDW IntraCoulomb 19 | 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 20 | # Bond stretch: atom n1-n2, type, parameters 21 | 0 1 RIGID_BOND 22 | 0 2 RIGID_BOND 23 | 0 3 RIGID_BOND 24 | # Number of config moves 25 | 0 26 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp11/manual_files/COC(=O)NS(=O)(=O)OC.xyz: -------------------------------------------------------------------------------- 1 | 17 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C 3.40533405 0.62715737 -0.92584574 4 | O 1.97621695 0.51194174 -1.22767368 5 | C 1.30129897 -0.09239765 -0.22027639 6 | O 1.77688841 -0.47267031 0.83971654 7 | N -0.03609888 -0.21259915 -0.55390420 8 | S -1.14799451 -1.00638160 0.41687490 9 | O -1.37768557 -0.23607091 1.61254663 10 | O -0.81675854 -2.39929729 0.49060642 11 | O -2.39449186 -0.89522648 -0.57161233 12 | C -3.23563589 0.32307626 -0.50934639 13 | H 3.85126829 1.13355478 -1.78904861 14 | H 3.83844326 -0.37145746 -0.78043720 15 | H 3.54542981 1.20628117 -0.00401067 16 | H -0.36123434 0.08341704 -1.47605076 17 | H -4.08454324 0.11950718 -1.17156706 18 | H -2.67593481 1.19401016 -0.88183713 19 | H -3.56450209 0.48715514 0.52186590 20 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp11/manual_files/CC1=C(C=C(C=C1Cl)[N+](=O)[O-])Cl.xyz: -------------------------------------------------------------------------------- 1 | 17 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -2.68647667 0.17089214 0.22470915 4 | C -1.20242586 0.07862890 0.09273598 5 | C -0.37191167 1.20048682 0.26911239 6 | C 1.01788531 1.14102607 0.12475770 7 | C 1.59285018 -0.09457504 -0.16774733 8 | C 0.82815472 -1.25066055 -0.31580521 9 | C -0.55791260 -1.14548511 -0.16308530 10 | Cl -1.49399421 -2.59111101 -0.27856442 11 | N 3.02655750 -0.18440015 -0.30170633 12 | O 3.51466410 -1.29832641 -0.55202879 13 | O 3.68754379 0.85648266 -0.15552081 14 | Cl -1.07273434 2.71781299 0.69938171 15 | H -2.98894010 0.00301689 1.27265224 16 | H -3.03903940 1.16740477 -0.06948307 17 | H -3.17485341 -0.59394289 -0.39213375 18 | H 1.62857409 2.03027778 0.24665059 19 | H 1.29205856 -2.20752785 -0.53392476 20 | -------------------------------------------------------------------------------- /src/chemgraph/tools/files/template/simulation.input: -------------------------------------------------------------------------------- 1 | NumberOfInitializationCycles NCYCLE 2 | NumberOfEquilibrationCycles 0 3 | NumberOfProductionCycles NCYCLE 4 | UseMaxStep no 5 | MaxStepPerCycle 1 6 | 7 | RestartFile no 8 | BMCBiasingMethod LJ_Biasing 9 | NumberOfTrialPositions 10 10 | NumberOfTrialOrientations 10 11 | NumberOfBlocks 1 12 | AdsorbateAllocateSpace 30240 13 | 14 | NumberOfSimulations 1 15 | SingleSimulation yes 16 | DifferentFrameworks yes 17 | 18 | UseChargesFromCIFFile yes 19 | InputFileType cif 20 | FrameworkName CIFFILE 21 | UnitCells 0 UC_X UC_Y UC_Z 22 | ChargeMethod Ewald 23 | Temperature TEMPERATURE 24 | Pressure PRESSURE 25 | OverlapCriteria 1e5 26 | CutOffVDW CUTOFF 27 | CutOffCoulomb CUTOFF 28 | EwaldPrecision 1e-6 29 | 30 | Component 0 MoleculeName ADSORBATE 31 | IdealGasRosenbluthWeight 1.0 32 | FugacityCoefficient PR-EOS 33 | TranslationProbability 1.0 34 | RotationProbability 1.0 35 | ReinsertionProbability 1.0 36 | SwapProbability 2.0 37 | CreateNumberOfMolecules 0 38 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/4-bromo-6,8-dioxabicyclo[3.2.1]octane.xyz: -------------------------------------------------------------------------------- 1 | 18 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C 0.05053435 1.35046328 0.29006857 4 | C 1.41008147 0.74682489 -0.09417775 5 | C 1.38471943 -0.79256398 -0.01996791 6 | C -0.01594470 -1.32155878 -0.44930532 7 | O -0.86557039 -1.40848065 0.71071331 8 | C -1.81522497 -0.34973720 0.61502010 9 | C -1.13229767 0.60354391 -0.37612824 10 | O -0.61117015 -0.36591474 -1.32002558 11 | Br 2.79794226 -1.58316052 -1.12649141 12 | H 0.01291449 2.41044487 -0.00392254 13 | H -0.08046942 1.35504892 1.38240538 14 | H 2.20569322 1.14622396 0.55641724 15 | H 1.64740958 1.06407165 -1.11889638 16 | H 1.59244953 -1.14933620 1.00162077 17 | H 0.03020595 -2.32154432 -0.92546482 18 | H -2.03724324 0.05624007 1.61311610 19 | H -2.75467334 -0.71895227 0.17056087 20 | H -1.81935639 1.27838711 -0.90554236 21 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp11/manual_files/CCOC(C(F)(F)F)(C(F)(F)F)O.xyz: -------------------------------------------------------------------------------- 1 | 19 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C 2.70030713 0.26420240 0.21550443 4 | C 1.43675465 0.15520309 -0.62108370 5 | O 0.42525697 -0.35554679 0.27728375 6 | C -0.88743996 0.07463389 0.05375973 7 | C -1.50096126 -0.62150599 -1.19550431 8 | F -2.74150900 -0.15316444 -1.46849014 9 | F -1.57039007 -1.96403342 -0.99365374 10 | F -0.73365277 -0.40794227 -2.29786761 11 | C -1.62030256 -0.35394994 1.36869014 12 | F -1.31014302 -1.61845503 1.73425300 13 | F -1.23012067 0.47423038 2.38899338 14 | F -2.96650396 -0.24735492 1.25306464 15 | O -1.03733050 1.45014855 -0.14122354 16 | H 2.95266276 -0.70786968 0.65999632 17 | H 3.52521910 0.58885142 -0.43363079 18 | H 2.58048812 0.99973563 1.02316216 19 | H 1.56514840 -0.55661283 -1.45192659 20 | H 1.11882015 1.11321011 -1.05041468 21 | H -0.70630350 1.86621984 0.67908754 22 | -------------------------------------------------------------------------------- /tests/test_llm_agent.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from chemgraph.agent.llm_agent import ChemGraph 3 | from unittest.mock import Mock, patch 4 | from langchain_core.messages import HumanMessage, AIMessage 5 | 6 | 7 | @pytest.fixture 8 | def mock_llm(): 9 | return Mock() 10 | 11 | 12 | def test_chemgraph_initialization(): 13 | with patch("chemgraph.agent.llm_agent.load_openai_model") as mock_load: 14 | mock_load.return_value = Mock() 15 | agent = ChemGraph(model_name="gpt-4o-mini") 16 | assert hasattr(agent, "workflow") 17 | 18 | 19 | def test_agent_query(mock_llm): 20 | with patch("chemgraph.agent.llm_agent.load_openai_model") as mock_load: 21 | # Set up the mock chain 22 | mock_chain = Mock() 23 | mock_chain.invoke.return_value = AIMessage(content="Test response") 24 | mock_llm.bind_tools.return_value = mock_chain 25 | mock_load.return_value = mock_llm 26 | 27 | agent = ChemGraph(model_name="gpt-4o-mini") 28 | response = agent.run("What is the SMILES string for water?") 29 | assert isinstance(response, AIMessage) 30 | assert response.content == "Test response" 31 | mock_llm.bind_tools.assert_called_once() 32 | mock_chain.invoke.assert_called_once() 33 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/O-ethyl N-prop-2-enylcarbamothioate.xyz: -------------------------------------------------------------------------------- 1 | 20 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -3.39341162 0.53753488 -1.16676706 4 | C -2.64858950 0.24585368 0.12713894 5 | O -1.28060367 -0.10710400 -0.27943644 6 | C -0.41118834 -0.41851446 0.70734394 7 | S -0.79121949 -0.47843632 2.32779723 8 | N 0.81379453 -0.67016100 0.19236523 9 | C 1.99751944 -0.93293789 0.97641690 10 | C 2.94752830 0.25475090 0.89960236 11 | C 4.14391390 0.06239887 0.33644359 12 | H -2.93768038 1.37688481 -1.71511847 13 | H -3.41550381 -0.34552904 -1.82431634 14 | H -4.42613714 0.80679724 -0.90439041 15 | H -3.07749169 -0.59535081 0.69225971 16 | H -2.59989473 1.11309930 0.80355507 17 | H 0.93764227 -0.55583905 -0.81486908 18 | H 1.69932610 -1.15639047 2.01357573 19 | H 2.54664319 -1.78246346 0.56297740 20 | H 2.69414250 1.26514757 1.25467221 21 | H 3.74623274 0.59392115 -1.91883556 22 | H 3.45523892 0.78567152 -2.59226925 23 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp11/manual_files/COC1=C(C=C(C=C1)C(=O)Cl)OC.xyz: -------------------------------------------------------------------------------- 1 | 22 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C 3.37740223 0.54684544 -0.07018300 4 | O 2.05480450 1.11300858 -0.04362760 5 | C 1.04444535 0.21032138 -0.00788364 6 | C -0.26044010 0.75858704 0.01843820 7 | C -1.35818667 -0.08155260 0.05564732 8 | C -1.21310052 -1.48843915 0.06829408 9 | C 0.08495637 -2.03172675 0.04208580 10 | C 1.18735401 -1.17666659 0.00459286 11 | C -2.46518744 -2.27070884 0.10869525 12 | O -3.57388155 -1.77356471 0.13072714 13 | Cl -2.27854677 -4.01877942 0.12420847 14 | O -0.30502424 2.11722340 0.00382685 15 | C -1.62545853 2.69424367 0.03019294 16 | H 3.52299145 -0.08267384 -0.96152693 17 | H 3.56777236 -0.06195562 0.82709370 18 | H 4.07464924 1.39279150 -0.09743876 19 | H -2.37593390 0.30726415 0.07663077 20 | H 0.22512270 -3.11020569 0.05105814 21 | H 2.18673797 -1.61944371 -0.01529997 22 | H -2.17128809 2.40735421 0.94264359 23 | H -1.48311612 3.78143391 0.01404878 24 | H -2.21607225 2.38664361 -0.84693860 25 | -------------------------------------------------------------------------------- /src/chemgraph/utils/logging_config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | 5 | def setup_logger(name=None, level=logging.INFO): 6 | """Set up a logger with consistent formatting. 7 | 8 | This function configures a logger with a standard format that includes 9 | timestamp, logger name, log level, and message. It ensures that handlers 10 | are not duplicated if the logger already exists. 11 | 12 | Parameters 13 | ---------- 14 | name : str, optional 15 | Logger name. If None, returns the root logger, by default None 16 | level : int, optional 17 | Logging level (e.g., logging.INFO, logging.DEBUG), by default logging.INFO 18 | 19 | Returns 20 | ------- 21 | logging.Logger 22 | Configured logger instance with the specified name and level 23 | 24 | Notes 25 | ----- 26 | The logger format includes: 27 | - Timestamp 28 | - Logger name 29 | - Log level 30 | - Message 31 | """ 32 | logger = logging.getLogger(name) 33 | 34 | if not logger.handlers: # Only add handler if none exists 35 | handler = logging.StreamHandler(sys.stdout) 36 | formatter = logging.Formatter( 37 | "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 38 | ) 39 | handler.setFormatter(formatter) 40 | logger.addHandler(handler) 41 | 42 | logger.setLevel(level) 43 | return logger 44 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | jupyter_lab: 5 | build: 6 | context: . 7 | dockerfile: Dockerfile 8 | ports: 9 | - "8888:8888" 10 | volumes: 11 | - .:/app 12 | environment: 13 | - VLLM_BASE_URL=http://vllm_server:8000/v1 14 | - OPENAI_API_KEY=dummy-key 15 | depends_on: 16 | - vllm_server 17 | 18 | vllm_server: 19 | build: 20 | context: ./vllm 21 | dockerfile: docker/Dockerfile.arm 22 | command: ["--host", "0.0.0.0", "--port", "8000", "--model", "meta-llama/Llama-3.2-3B-Instruct", "--enable-auto-tool-choice","--tool-call-parser","llama3_json","--max-model-len", "10240","--tensor-parallel-size", "1","--max-num-seqs", "16"] 23 | ports: 24 | - "8001:8000" 25 | privileged: true 26 | shm_size: '8g' 27 | deploy: 28 | resources: 29 | limits: 30 | memory: 12G 31 | reservations: 32 | memory: 10G 33 | environment: 34 | - VLLM_LOG_LEVEL=debug 35 | - HF_TOKEN=${HF_TOKEN} 36 | - OMP_NUM_THREADS=8 37 | networks: 38 | default: 39 | aliases: 40 | - vllm_server_alias 41 | 42 | # Networks allow services to communicate with each other using their service names as hostnames. 43 | # A default network is created if not specified, but explicit definition can be useful. 44 | networks: 45 | default: 46 | driver: bridge -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/2-thiophen-2-yl-3,1-benzoxazin-4-one.xyz: -------------------------------------------------------------------------------- 1 | 23 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -4.06752083 0.13006338 0.01315105 4 | C -3.51086393 1.39092744 0.28974183 5 | C -2.12383516 1.55881851 0.34132498 6 | C -1.27074009 0.47428239 0.11838257 7 | C -1.83936129 -0.80038599 -0.16130370 8 | C -3.22824435 -0.96411271 -0.21201573 9 | C -0.93769778 -1.92040845 -0.39130996 10 | O -1.23700688 -3.07576321 -0.64238879 11 | O 0.44531847 -1.63558237 -0.31469849 12 | C 0.87703335 -0.35008296 -0.03422954 13 | N 0.09768286 0.65957113 0.17348829 14 | C 2.30098693 -0.19408625 0.01521710 15 | C 3.02718893 0.95975335 0.27076680 16 | C 4.43744507 0.72494617 0.23627216 17 | C 4.76948510 -0.58285675 -0.04039612 18 | S 3.37783510 -1.55895869 -0.26531752 19 | H -5.15032389 -0.00564620 -0.02811293 20 | H -4.15089363 2.25843463 0.46854744 21 | H -1.66690699 2.52523897 0.55366450 22 | H -3.61407711 -1.96000847 -0.42987805 23 | H 2.53191842 1.91272794 0.46952253 24 | H 5.18459135 1.50157741 0.41118314 25 | H 5.74798636 -1.04844927 -0.12922444 26 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/2-[4-(hydroxymethyl)phenoxy]acetic acid.xyz: -------------------------------------------------------------------------------- 1 | 23 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -1.78228967 -0.75223556 1.10702194 4 | C -1.74492850 -0.25555097 -0.19536297 5 | C -0.53020607 0.19222813 -0.72144465 6 | C 0.61900232 0.20222132 0.07297350 7 | C 0.56324549 -0.30354175 1.37174734 8 | C -0.63649548 -0.78988066 1.91067415 9 | C -0.66688232 -1.31192533 3.32928594 10 | O 0.41442244 -2.20390563 3.59751162 11 | O -0.54451828 0.64278466 -2.02706650 12 | C 0.68515029 0.52706420 -2.74097578 13 | C 1.41565263 1.85779203 -2.69751733 14 | O 2.32581429 2.13652964 -1.92935212 15 | O 0.93049709 2.72992011 -3.61503581 16 | H -2.73352582 -1.12489478 1.50428686 17 | H -2.63539304 -0.22368545 -0.82651100 18 | H 1.54426136 0.61315310 -0.33172193 19 | H 1.46479924 -0.32747775 1.99190520 20 | H -1.64183826 -1.79143380 3.52230154 21 | H -0.57784551 -0.47253349 4.03226710 22 | H 0.30594094 -2.93103087 2.95770420 23 | H 0.43359152 0.28207530 -3.77709473 24 | H 1.33733310 -0.24752212 -2.31684392 25 | H 1.45421224 3.55184965 -3.50636556 26 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp11/manual_files/C1=CC=C(C=C1)OC2=C(C(=O)C(C2(Cl)Cl)(Cl)Cl)Cl.xyz: -------------------------------------------------------------------------------- 1 | 23 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -3.56028835 0.65992908 0.65641427 4 | C -2.78692304 1.27890179 -0.33119018 5 | C -1.58956060 0.69864223 -0.75687624 6 | C -1.16918984 -0.48337974 -0.15724839 7 | C -1.93526879 -1.13831634 0.80180308 8 | C -3.13080049 -0.54992661 1.21629167 9 | O 0.02946156 -1.08870856 -0.57756017 10 | C 1.23999002 -0.60096298 -0.27072982 11 | C 2.32950230 -0.98651283 -0.98023353 12 | C 3.52645303 -0.35141525 -0.41805433 13 | O 4.63509796 -0.25740970 -0.89351721 14 | C 3.11164968 0.20600576 1.00342424 15 | C 1.56812921 0.43969637 0.80533755 16 | Cl 1.23560201 2.07203889 0.07449963 17 | Cl 0.64786200 0.28709380 2.29967967 18 | Cl 3.99822768 1.61859332 1.50013029 19 | Cl 3.45745649 -1.14248915 2.15706557 20 | Cl 2.32002809 -2.00143552 -2.31883786 21 | H -4.49724684 1.11378642 0.98562907 22 | H -3.11156362 2.21510464 -0.79413058 23 | H -0.99355451 1.15619636 -1.54846164 24 | H -1.59982095 -2.09353761 1.21393000 25 | H -3.72524302 -1.05189439 1.98458019 26 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/4-hydroxy-3-methyl-2-prop-2-enylcyclopent-2-en-1-one.xyz: -------------------------------------------------------------------------------- 1 | 23 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -0.92244947 1.97684007 0.60877263 4 | C -0.79756539 0.63783133 -0.02944534 5 | C 0.31358535 -0.07482890 -0.31142830 6 | C -0.06729455 -1.35748620 -0.97177334 7 | O 0.71555426 -2.11435733 -1.52932536 8 | C -1.58688029 -1.52417641 -0.80692148 9 | C -2.03614725 -0.06729797 -0.56677878 10 | O -2.36806189 0.59562618 -1.78598563 11 | C 1.75548110 0.26862115 -0.16188391 12 | C 2.37896899 -0.65464485 0.84778169 13 | C 2.81861778 -0.25022789 2.04591157 14 | H -1.43667702 1.91447720 1.58122917 15 | H 0.05405460 2.45486066 0.77317668 16 | H -1.53419308 2.63135234 -0.03333287 17 | H -1.76290013 -2.11918185 0.10404728 18 | H -2.06706157 -2.05384412 -1.63977647 19 | H -2.87903414 0.02395405 0.14059553 20 | H -3.11684305 0.14392181 -2.20995419 21 | H 2.23428497 0.10130341 -1.14089620 22 | H 1.88529751 1.31703000 0.13158503 23 | H 2.42615522 -1.70126022 0.54640042 24 | H 3.22722205 -0.94342530 2.77234941 25 | H 2.76588597 0.79491285 2.34803955 26 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/2-ethyl-4-phenyl-1,3-thiazole.xyz: -------------------------------------------------------------------------------- 1 | 24 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -4.14824094 -0.29450964 -0.15922779 4 | C -3.29146738 -0.14796810 1.10768600 5 | C -1.96763110 0.46371552 0.77350119 6 | N -0.88847843 -0.17617800 0.44271421 7 | C 0.16961172 0.68354696 0.12584331 8 | C -0.16090922 2.02146441 0.20358312 9 | S -1.79159508 2.24165718 0.67186174 10 | C 1.44541834 0.11027264 -0.26357949 11 | C 1.59384707 -1.28743380 -0.34360504 12 | C 2.81360354 -1.85261252 -0.72069918 13 | C 3.90868507 -1.03669609 -1.03342931 14 | C 3.77403588 0.35589618 -0.95986411 15 | C 2.55702852 0.92384341 -0.57778207 16 | H -3.71555993 -1.02583215 -0.85498393 17 | H -5.15788058 -0.62554355 0.10932295 18 | H -4.23235517 0.67265295 -0.67938920 19 | H -3.13200699 -1.11776532 1.60017333 20 | H -3.80041613 0.51467108 1.81830900 21 | H 0.46303797 2.88983524 0.00787692 22 | H 0.72904555 -1.91145778 -0.10263084 23 | H 2.89910752 -2.94092802 -0.76930451 24 | H 4.85906592 -1.48269342 -1.33281346 25 | H 4.61617818 1.01004052 -1.19987556 26 | H 2.45787566 2.01202230 -0.51776280 27 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/6-pyridin-2-ylpyridine-3-sulfonic acid.xyz: -------------------------------------------------------------------------------- 1 | 24 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C 3.79593614 -0.60949634 1.06970341 4 | C 4.48942328 -0.23981251 -0.08774576 5 | C 3.73258219 0.15344118 -1.20428098 6 | N 2.40096692 0.19139189 -1.20585929 7 | C 1.72952927 -0.16369064 -0.10869843 8 | C 2.40087325 -0.57475639 1.06410633 9 | C 0.24502031 -0.11519031 -0.15848658 10 | N -0.41717277 -0.56407022 0.91521668 11 | C -1.75346908 -0.54063571 0.92797473 12 | C -2.51089278 -0.04785012 -0.15178099 13 | C -1.82601022 0.42263858 -1.29632739 14 | C -0.43837594 0.37974143 -1.29862143 15 | S -4.25492746 0.02542083 -0.05691741 16 | O -4.83259927 -0.07646214 -1.38004319 17 | O -4.72015017 -0.81571577 1.01711055 18 | O -4.43604861 1.57640280 0.40533922 19 | H 4.32818796 -0.92212403 1.96880978 20 | H 5.57606247 -0.25081042 -0.13807716 21 | H 4.22390837 0.45304780 -2.13843362 22 | H 1.79435051 -0.85127914 1.92347927 23 | H -2.25622132 -0.92502685 1.81906926 24 | H -2.39322918 0.78910056 -2.15182275 25 | H 0.16301595 0.71785458 -2.14003609 26 | H -5.04075983 1.98788092 -0.24775374 27 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp11/manual_files/C1=CC(=C(N=C1)Cl)C(=O)NC2=NC=C(C=C2)Cl.xyz: -------------------------------------------------------------------------------- 1 | 24 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -4.02171952 -0.46021289 -0.56177786 4 | C -2.66335269 -0.19973692 -0.75307494 5 | C -1.94551166 0.51278673 0.23072664 6 | C -2.65980629 0.90829175 1.38076987 7 | N -3.95029163 0.65820536 1.56009862 8 | C -4.62382289 -0.00460828 0.61985273 9 | Cl -1.84840370 1.73219144 2.65689389 10 | C -0.49989603 0.89598562 0.03221158 11 | O -0.11313625 2.06044678 -0.06063592 12 | N 0.30041071 -0.21211083 -0.06865179 13 | C 1.69180402 -0.27615047 -0.27231777 14 | N 2.16145240 -1.51855584 -0.38140873 15 | C 3.45311444 -1.71892730 -0.60268825 16 | C 4.37468872 -0.65702058 -0.72597599 17 | C 3.88217326 0.65153612 -0.59042618 18 | C 2.52319550 0.84208545 -0.35598403 19 | Cl 6.02659929 -0.96715221 -1.02639818 20 | H -4.59837760 -0.99557761 -1.31408746 21 | H -2.14806725 -0.52620363 -1.66163700 22 | H -5.68812465 -0.17499253 0.81945498 23 | H -0.12842093 -1.13121179 0.03781445 24 | H 3.80716411 -2.74920171 -0.69599836 25 | H 4.57181691 1.49439049 -0.67523289 26 | H 2.09651174 1.83574284 -0.24560294 27 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3:latest 2 | 3 | # Set the working directory in the container 4 | WORKDIR /app 5 | 6 | # Copy project files 7 | COPY . /app 8 | 9 | # Install system dependencies 10 | RUN apt-get update && apt-get install -y --no-install-recommends \ 11 | build-essential \ 12 | git \ 13 | gfortran \ 14 | liblapack-dev \ 15 | pkg-config \ 16 | cmake \ 17 | # Dependencies for headless Chrome (pyppeteer) 18 | libx11-xcb1 libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 libgbm1 libasound2 \ 19 | && rm -rf /var/lib/apt/lists/* 20 | 21 | # Create conda environment with tblite, rdkit and other dependencies that are hard to install with pip 22 | RUN conda install -c conda-forge -c rdkit -c pytorch \ 23 | python=3.11 \ 24 | "pytorch<2.6" \ 25 | cpuonly \ 26 | tblite=0.4.0 \ 27 | rdkit \ 28 | -y 29 | 30 | # Install Python dependencies using modified pyproject.toml (excluding problematic packages) 31 | RUN grep -v "tblite\|rdkit\|torch<2.6" pyproject.toml > temp_pyproject.toml && \ 32 | mv temp_pyproject.toml pyproject.toml 33 | 34 | # Install packages using pip 35 | RUN pip install --no-cache-dir . 36 | 37 | # Install JupyterLab 38 | RUN pip install --no-cache-dir jupyterlab 39 | 40 | # Expose JupyterLab port 41 | EXPOSE 8888 42 | 43 | # Command to run JupyterLab 44 | CMD ["jupyter", "lab", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root", "--LabApp.token=''"] -------------------------------------------------------------------------------- /src/chemgraph/models/graspa_input.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | 3 | 4 | class GRASPAInputSchema(BaseModel): 5 | output_path: str = Field( 6 | description="Absolute or relative path to the directory where gRASPA output files will be stored. If not provided, the output will be stored in the current working directory." 7 | ) 8 | cif_path: str = Field( 9 | description="Absolute or relative path to the directory where the CIF file is stored." 10 | ) 11 | mof_name: str = Field(description="Name of the MOF excluding .cif extension") 12 | adsorbate: str = Field( 13 | default='CO2', description="Name of the adsorbate molecule. Only support CO2, H2, CH4 and N2." 14 | ) 15 | temperature: float = Field(default=300, description="Simulation temperature in Kelvin.") 16 | pressure: float = Field(default=1e5, description="Simulation pressure in Pascal.") 17 | n_cycle: int = Field( 18 | default=100, description="Number of Monte Carlo steps to run in the GCMC simulation." 19 | ) 20 | cutoff: float = Field(default=12.8, description="The LJ and Coulomb cutoff in Angstrom") 21 | graspa_cmd: str= Field( 22 | default="/eagle/projects/HPCBot/thang/soft/gRASPA/src_clean/nvc_main.x > raspa.err 2> raspa.log", 23 | description="The command to run gRASPA. If not provided, the default command will be used." 24 | ) 25 | graspa_version: str = Field( 26 | default="cuda", 27 | description="The version of gRASPA to use. Only support 'cuda' and 'sycl'." 28 | ) -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp11/manual_files/C1=CC2=CN(N=C2C=C1)C3=CC(=CC=C3)F.xyz: -------------------------------------------------------------------------------- 1 | 25 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -4.28024087 0.69776109 -0.31950683 4 | C -3.10706483 1.45628322 -0.34819174 5 | C -1.87705954 0.80532049 -0.20737098 6 | C -0.53420916 1.20684142 -0.17232760 7 | N 0.22390118 0.08775505 -0.00244734 8 | N -0.55824779 -1.05826166 0.07980126 9 | C -1.82915870 -0.62892868 -0.04405301 10 | C -3.00961072 -1.38029928 -0.02473540 11 | C -4.22892055 -0.70553360 -0.15923964 12 | C 1.62974213 -0.01837770 0.09661175 13 | C 2.22024037 -1.27322520 0.28756368 14 | C 3.61130271 -1.34983551 0.38686495 15 | C 4.42679233 -0.21059977 0.29890806 16 | C 3.80113682 1.02453253 0.10685678 17 | C 2.41851145 1.13886220 0.00503758 18 | F 4.18308033 -2.57491101 0.57557461 19 | H -5.24136675 1.20627674 -0.42242692 20 | H -3.12728453 2.53832603 -0.47417802 21 | H -0.08722373 2.19349083 -0.25305767 22 | H -2.96255947 -2.46028725 0.09500423 23 | H -5.15165654 -1.29085382 -0.13883039 24 | H 1.59614985 -2.16554744 0.35855318 25 | H 5.51086988 -0.30575986 0.38146332 26 | H 4.40954185 1.92921870 0.03604563 27 | H 1.96333430 2.11936289 -0.14191951 28 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/5-(5-fluoro-2-methoxyphenyl)-1H-pyrazol-3-amine.xyz: -------------------------------------------------------------------------------- 1 | 25 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -1.43088448 1.87061261 2.10546559 4 | O -0.60388531 1.15997605 1.15007001 5 | C -1.19176927 0.13467001 0.47587289 6 | C -0.35784511 -0.65638573 -0.35791118 7 | C -0.90961910 -1.77321863 -1.01696726 8 | C -2.26690539 -2.07040621 -0.88129079 9 | C -3.10320287 -1.26670718 -0.09565682 10 | C -2.54737068 -0.17371877 0.56911574 11 | F -2.76061124 -3.15960742 -1.54087026 12 | C 1.04395260 -0.31050151 -0.54556744 13 | C 1.79603667 0.84456074 -0.27828168 14 | C 3.11526289 0.54547210 -0.74974181 15 | N 3.17651823 -0.68059875 -1.27414623 16 | N 1.90601782 -1.17908142 -1.14080088 17 | N 4.20254044 1.36069175 -0.75194880 18 | H -2.25562082 2.39632493 1.60469240 19 | H -0.76603321 2.59452227 2.58878732 20 | H -1.84792260 1.18583300 2.85836734 21 | H -0.30741884 -2.42313699 -1.66312180 22 | H -4.16683341 -1.50140446 -0.00942188 23 | H -3.19050250 0.45556474 1.18537077 24 | H 1.43646116 1.75709618 0.17798504 25 | H 1.71253194 -2.14910709 -1.41084135 26 | H 5.09141651 0.89507494 -0.89009317 27 | H 4.22568655 2.14347486 -0.10906574 28 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp11/manual_files/C1=CC=C(C=C1)N2N=C(N=N2)C3=CN=CC=C3.xyz: -------------------------------------------------------------------------------- 1 | 26 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C 4.92321522 0.30533439 0.09541241 4 | C 4.44503916 0.73203908 -1.15611121 5 | C 3.07553148 0.73581239 -1.44227193 6 | C 2.17567214 0.30547875 -0.45859628 7 | C 2.62656914 -0.12425991 0.79564493 8 | C 4.00070894 -0.12110427 1.06398536 9 | N 0.83037803 0.31224885 -0.74799986 10 | N -0.11041664 -0.07819680 0.11943749 11 | C -1.26687139 0.08811860 -0.56382656 12 | N -1.00834222 0.57307727 -1.82460026 13 | N 0.27986723 0.71250636 -1.94249039 14 | C -2.57968852 -0.21371161 -0.01125863 15 | C -2.66403517 -0.70510182 1.30198882 16 | N -3.82509036 -1.00566381 1.88165416 17 | C -4.95280515 -0.83777366 1.19989667 18 | C -4.97775287 -0.34994079 -0.12572729 19 | C -3.76807939 -0.03103853 -0.74524477 20 | H 5.99149423 0.30626592 0.30814470 21 | H 5.13568023 1.06910317 -1.92945063 22 | H 2.68885827 1.06382088 -2.40816172 23 | H 1.89015526 -0.45131404 1.53266139 24 | H 4.34079728 -0.45794899 2.04441783 25 | H -1.74362497 -0.85049217 1.88105950 26 | H -5.87871524 -1.09931619 1.72131342 27 | H -5.92549049 -0.22864822 -0.64506508 28 | H -3.70305418 0.35070516 -1.76554779 29 | -------------------------------------------------------------------------------- /src/chemgraph/tools/local_model_loader.py: -------------------------------------------------------------------------------- 1 | from langchain_ollama import ChatOllama 2 | from chemgraph.models.supported_models import supported_ollama_models 3 | 4 | 5 | def load_ollama_model(model_name: str, temperature: float) -> ChatOllama: 6 | """Load an Ollama chat model into LangChain. 7 | 8 | This function loads a local Ollama model and configures it for use with 9 | LangChain. It verifies that the requested model is supported before 10 | attempting to load it. 11 | 12 | Parameters 13 | ---------- 14 | model_name : str 15 | The name of the Ollama model to load. See supported_ollama_models for list 16 | of supported models. 17 | temperature : float 18 | Controls the randomness of the generated text. Higher values (e.g., 0.8) 19 | make the output more random, while lower values (e.g., 0.2) make it more 20 | deterministic. 21 | 22 | Returns 23 | ------- 24 | ChatOllama 25 | An instance of LangChain's ChatOllama model. 26 | 27 | Raises 28 | ------ 29 | ValueError 30 | If the specified model is not in the list of supported models. 31 | 32 | Notes 33 | ----- 34 | The model must be installed locally using Ollama before it can be loaded. 35 | """ 36 | if model_name not in supported_ollama_models: 37 | raise ValueError( 38 | f"Unsupported model '{model_name}'. Supported models are: {supported_ollama_models}." 39 | ) 40 | 41 | llm = ChatOllama( 42 | model=model_name, 43 | temperature=temperature, 44 | ) 45 | print(f"Successfully loaded model: {model_name}") 46 | return llm 47 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp11/manual_files/CCCCC1=C(C=C(S1)C)C.xyz: -------------------------------------------------------------------------------- 1 | 27 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -4.13360146 0.54251440 -0.49789302 4 | C -3.15698444 -0.24747785 0.36445580 5 | C -1.72611062 0.18387998 0.07924535 6 | C -0.72765225 -0.62188234 0.91222156 7 | C 0.68596682 -0.24803776 0.60652748 8 | C 1.55383493 -0.78795473 -0.29971067 9 | C 2.82736616 -0.09416980 -0.32602527 10 | C 2.89715098 0.97207579 0.52667981 11 | S 1.38723720 1.13178082 1.39260564 12 | C 4.01170170 1.91628639 0.78961249 13 | C 1.20873746 -1.96614516 -1.15407525 14 | H -5.17232524 0.24667568 -0.30221240 15 | H -3.93449459 0.37748807 -1.56793217 16 | H -4.05021381 1.62170966 -0.29940954 17 | H -3.25597275 -1.32825632 0.16514537 18 | H -3.37903582 -0.10454052 1.43536299 19 | H -1.51426405 0.05491856 -0.99704139 20 | H -1.59392496 1.25998862 0.29041354 21 | H -0.93922402 -0.45183738 1.97921507 22 | H -0.88907324 -1.69366220 0.72221507 23 | H 3.64998560 -0.40114698 -0.97907849 24 | H 4.31182508 1.89824821 1.84453749 25 | H 4.87454869 1.64183274 0.17738270 26 | H 3.73033022 2.94945922 0.55186358 27 | H 0.27677065 -1.80151411 -1.71582264 28 | H 2.00320649 -2.18020660 -1.87833920 29 | H 1.05421529 -2.87002640 -0.54643583 30 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/2-(5-chloropyridin-2-yl)-1H-quinolin-4-one.xyz: -------------------------------------------------------------------------------- 1 | 27 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -4.45283597 -0.24918104 0.67526354 4 | C -4.09197634 0.96804644 0.07001052 5 | C -2.75447487 1.23312608 -0.22030760 6 | C -1.76212117 0.28348657 0.09149315 7 | C -2.12183769 -0.93862154 0.69884765 8 | C -3.47064835 -1.18631545 0.98259463 9 | C -1.09125009 -1.95103078 1.03470372 10 | O -1.37591705 -3.03986318 1.57131260 11 | C 0.25720656 -1.56187711 0.68594190 12 | C 0.57409685 -0.35621451 0.09154223 13 | N -0.42676415 0.52775310 -0.18898551 14 | C 1.96623241 0.03194877 -0.26226534 15 | N 2.91229901 -0.86600499 0.03156717 16 | C 4.18550648 -0.62263723 -0.24067508 17 | C 4.62332840 0.57046685 -0.84450118 18 | C 3.65328814 1.52976976 -1.16352875 19 | C 2.32063787 1.25572104 -0.86969003 20 | Cl 6.27668953 0.84230861 -1.17770588 21 | H -5.49714619 -0.46604632 0.90649222 22 | H -4.84958944 1.71422872 -0.17777604 23 | H -2.47266814 2.18045158 -0.69147453 24 | H -3.70304593 -2.14277046 1.45174078 25 | H 1.06142067 -2.25361740 0.90282135 26 | H -0.19975567 1.41852127 -0.62725967 27 | H 4.91601273 -1.39479574 0.02244836 28 | H 3.95490258 2.46692406 -1.63249593 29 | H 1.56840983 2.00622290 -1.12011427 30 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp11/manual_files/CN1C2=C(C=C(C=C2)F)SC1=NC(=O)C3=C(SC(=C3)Cl)Cl.xyz: -------------------------------------------------------------------------------- 1 | 28 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -0.88810101 -2.05386180 0.20954546 4 | N -1.07406374 -0.62984432 -0.02471721 5 | C -2.33268645 -0.04054808 -0.10950492 6 | C -2.27396702 1.33460913 -0.37724971 7 | C -3.41484963 2.11920131 -0.49523359 8 | C -4.64881952 1.48583299 -0.32694931 9 | C -4.74533377 0.11205244 -0.05757523 10 | C -3.57668739 -0.64849400 0.05014505 11 | F -5.77932437 2.25104731 -0.43115528 12 | S -0.63002869 1.87897073 -0.52600771 13 | C -0.03744017 0.22967582 -0.19954787 14 | N 1.19580568 -0.19012975 -0.09472671 15 | C 2.27730851 0.63844275 -0.27479333 16 | O 2.22890617 1.83367752 -0.61013140 17 | C 3.58448388 -0.00514490 -0.04003410 18 | C 3.87953641 -1.29517132 0.37825122 19 | S 5.60087942 -1.54987262 0.49382495 20 | C 5.94195583 0.08164026 -0.01857810 21 | C 4.78738942 0.77961410 -0.25714462 22 | Cl 7.53428618 0.56882171 -0.14762264 23 | Cl 2.89695132 -2.59205533 0.78383952 24 | H 0.18046490 -2.26146426 0.32549742 25 | H -1.41382169 -2.34912791 1.12397106 26 | H -1.27785650 -2.62708468 -0.64022427 27 | H -3.35896528 3.18845300 -0.70863137 28 | H -5.72215961 -0.36579301 0.06753720 29 | H -3.67218274 -1.71550850 0.26257354 30 | H 4.73831984 1.82206142 -0.57889930 31 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/2-[difluoromethyl(propan-2-yloxy)phosphoryl]oxypropane.xyz: -------------------------------------------------------------------------------- 1 | 28 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -3.11656194 -0.30863386 -0.73427443 4 | C -2.33927007 0.92199733 -0.31605861 5 | C -3.13388231 1.83995672 0.59553404 6 | O -1.15184614 0.55239311 0.48426285 7 | P -0.05687130 -0.43414235 -0.09580116 8 | O -0.11439903 -0.71279925 -1.55081588 9 | C -0.24803525 -1.88569886 1.04258026 10 | F -1.45812879 -2.51580720 0.86798195 11 | F 0.74502073 -2.81433749 0.82261136 12 | O 1.29441084 0.21536952 0.43596362 13 | C 2.54989418 -0.27482348 -0.16236182 14 | C 2.84614232 0.60142233 -1.35621512 15 | C 3.58587299 -0.17244580 0.93824806 16 | H -3.41243418 -0.89782445 0.14492961 17 | H -2.53128723 -0.93714123 -1.41558991 18 | H -4.02469860 0.03044783 -1.24920774 19 | H -1.96281840 1.46389551 -1.19590275 20 | H -4.03412042 2.16831820 0.05683973 21 | H -2.55098936 2.72288128 0.88864308 22 | H -3.44126950 1.30777851 1.50847249 23 | H -0.18429630 -1.54976859 2.08931449 24 | H 2.39772454 -1.31914016 -0.46850949 25 | H 3.78872067 0.26600162 -1.80877356 26 | H 2.03948326 0.52545445 -2.09615326 27 | H 2.95604410 1.65045353 -1.04357890 28 | H 3.68904007 0.86847173 1.27617425 29 | H 4.54904481 -0.51755197 0.53815407 30 | H 3.31951031 -0.79472698 1.80353280 31 | -------------------------------------------------------------------------------- /src/chemgraph/tools/alcf_loader.py: -------------------------------------------------------------------------------- 1 | from langchain_openai import ChatOpenAI 2 | from chemgraph.models.supported_models import supported_alcf_models 3 | 4 | 5 | def load_alcf_model(model_name: str, base_url: str, api_key: str = None) -> ChatOpenAI: 6 | """ 7 | Load an models from ALCF inference endpoints (https://github.com/argonne-lcf/inference-endpoints). 8 | 9 | Parameters 10 | ---------- 11 | model_name : str 12 | The name of the model to load. See supported_alcf_models for list of supported models. 13 | base_url : str 14 | The base URL of the API endpoint. 15 | api_key : str, optional 16 | The OpenAI API key. If not provided, the function will attempt to retrieve it 17 | from the environment variable `OPENAI_API_KEY`. 18 | 19 | Returns 20 | ------- 21 | ChatOpenAI 22 | An instance of LangChain's ChatOpenAI model. 23 | 24 | Raises 25 | ------ 26 | ValueError 27 | If the API key is not provided and cannot be retrieved from the environment. 28 | """ 29 | 30 | if api_key is None: 31 | raise ValueError("API key (access token) is not found") 32 | 33 | if model_name not in supported_alcf_models: 34 | raise ValueError( 35 | f"Model {model_name} is not supported on ALCF yet. Supported models are: {supported_alcf_models}" 36 | ) 37 | try: 38 | llm = ChatOpenAI( 39 | model=model_name, 40 | base_url=base_url, 41 | api_key=api_key, 42 | ) 43 | print(llm.max_tokens) 44 | print(f"Successfully loaded model: {model_name} from {base_url}") 45 | 46 | except Exception as e: 47 | print(f"Error with loading {model_name}") 48 | print(e) 49 | 50 | return llm 51 | -------------------------------------------------------------------------------- /notebooks/Demo_infrared_spectrum.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "953a0ae8-c496-4286-8619-17844af03c4c", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "from chemgraph.agent.llm_agent import ChemGraph\n", 11 | "\n", 12 | "workflow_type = \"multi_agent\"\n", 13 | "\n", 14 | "cg = ChemGraph(\n", 15 | " model_name='gpt-4o', \n", 16 | " workflow_type = workflow_type, \n", 17 | " structured_output=False, \n", 18 | " return_option=\"state\",\n", 19 | " )\n", 20 | "cg.visualize()" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "id": "5f385ade-f22d-4ecc-840a-5d3dca57b8d5", 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "# Calculate the infrared absorption spectrum of water\n", 31 | "query = \"Calculate the infrared absorption spectrum of water using ASE and TBLite\"\n", 32 | "result = cg.run(query, config={\"configurable\": {\"thread_id\": 3}})" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "id": "ac27ce46", 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [] 42 | } 43 | ], 44 | "metadata": { 45 | "kernelspec": { 46 | "display_name": "chemgraph", 47 | "language": "python", 48 | "name": "python3" 49 | }, 50 | "language_info": { 51 | "codemirror_mode": { 52 | "name": "ipython", 53 | "version": 3 54 | }, 55 | "file_extension": ".py", 56 | "mimetype": "text/x-python", 57 | "name": "python", 58 | "nbconvert_exporter": "python", 59 | "pygments_lexer": "ipython3", 60 | "version": "3.10.18" 61 | } 62 | }, 63 | "nbformat": 4, 64 | "nbformat_minor": 5 65 | } 66 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/12,16-dioxatetracyclo[11.2.1.02,11.03,8]hexadeca-2(11),3,5,7,9-pentaene.xyz: -------------------------------------------------------------------------------- 1 | 28 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C 2.18649854 -0.27145213 -1.26134769 4 | C 3.11410383 -0.67645617 -0.09224732 5 | C 2.90503000 0.45667328 0.94572969 6 | O 2.41835999 1.55374675 0.17431488 7 | C 1.41764663 0.93187514 -0.66750549 8 | C 0.30052783 0.44906967 0.23105597 9 | C 0.63498550 0.07540467 1.52298316 10 | O 1.92965845 0.09884244 1.95372934 11 | C -0.34594372 -0.37651939 2.43515896 12 | C -1.64874923 -0.48279829 2.02364362 13 | C -2.03381836 -0.18344221 0.69126154 14 | C -3.36817871 -0.33199942 0.24856769 15 | C -3.71286788 -0.11089598 -1.06790333 16 | C -2.72581596 0.26913677 -2.00032332 17 | C -1.41799063 0.43427211 -1.58478402 18 | C -1.03073762 0.25210615 -0.23245660 19 | H 2.78309032 0.09828369 -2.10565705 20 | H 1.51802244 -1.06163873 -1.63243190 21 | H 2.92497658 -1.66806429 0.33862326 22 | H 4.16775612 -0.64641028 -0.40128872 23 | H 3.81860123 0.73572781 1.50474667 24 | H 1.08318543 1.65724571 -1.41852205 25 | H -0.01674861 -0.62844262 3.44065269 26 | H -2.41975619 -0.83049131 2.71562758 27 | H -4.11505406 -0.64036768 0.98361211 28 | H -4.74834180 -0.24132105 -1.38764473 29 | H -2.97448523 0.42973425 -3.05157470 30 | H -0.64395490 0.70818113 -2.30602023 31 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp11/manual_files/C1CC(C1)(C2=CC=CC3=CC=CC=C32)O.xyz: -------------------------------------------------------------------------------- 1 | 29 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -3.32437796 0.29799744 1.07308272 4 | C -2.85672815 0.69184829 -0.34087939 5 | C -1.59468160 -0.17634537 -0.12646757 6 | C -1.87957021 -0.13327105 1.39927860 7 | C -0.25569700 0.31885007 -0.62376215 8 | C -0.16651635 1.34694612 -1.54330498 9 | C 1.08196512 1.90777099 -1.89044702 10 | C 2.23397873 1.41688652 -1.32446168 11 | C 2.19688356 0.32054255 -0.43151507 12 | C 3.38290655 -0.20112761 0.13618644 13 | C 3.35535183 -1.30654441 0.95905664 14 | C 2.13029875 -1.94860362 1.22965095 15 | C 0.95773796 -1.45443734 0.68773664 16 | C 0.94495485 -0.28920860 -0.11391632 17 | O -1.89222330 -1.48976961 -0.54129030 18 | H -4.00275674 -0.56455527 1.04627807 19 | H -3.73078310 1.10816832 1.68688585 20 | H -2.60526695 1.75853153 -0.38740162 21 | H -3.47751169 0.37931063 -1.18525871 22 | H -1.73969165 -1.08979042 1.91145637 23 | H -1.31002167 0.67395033 1.88229807 24 | H -1.07505402 1.74867022 -1.99700570 25 | H 1.11580820 2.73867471 -2.59813466 26 | H 3.20811307 1.84860119 -1.56079367 27 | H 4.32439228 0.29746311 -0.10382124 28 | H 4.28129027 -1.69303237 1.38802840 29 | H 2.09337507 -2.83858214 1.86188190 30 | H 0.01410155 -1.97058138 0.86778136 31 | H -1.41027742 -1.69836285 -1.36114196 32 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp11/manual_files/CN1C2=C(C(=O)NC1=O)N(C(=S)N2)CCOC.xyz: -------------------------------------------------------------------------------- 1 | 29 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C 3.43125121 1.85659763 -0.04076604 4 | N 2.57854521 0.68275966 0.06556276 5 | C 1.38078866 0.57964864 -0.55099334 6 | C 0.56547477 -0.52893030 -0.50170189 7 | C 0.92264548 -1.68367985 0.25387835 8 | O 0.28282563 -2.73318935 0.38041523 9 | N 2.15125973 -1.50240997 0.90469080 10 | C 3.01252388 -0.40595582 0.85231560 11 | O 4.08733659 -0.38207014 1.45119189 12 | N -0.54206437 -0.24236969 -1.27040320 13 | C -0.47657724 1.03841099 -1.76023834 14 | S -1.59210187 1.85674665 -2.63195526 15 | N 0.77372641 1.52016511 -1.32417641 16 | C -1.71480491 -1.09618954 -1.41574130 17 | C -2.50271925 -1.11372775 -0.11969700 18 | O -2.60406830 0.22939551 0.35357713 19 | C -3.57075603 0.31160638 1.41330681 20 | H 3.00171112 2.69005227 0.52850684 21 | H 3.53477905 2.13214006 -1.09645752 22 | H 4.40363324 1.58976566 0.37386014 23 | H 2.48400974 -2.28020567 1.48261451 24 | H 1.05877174 2.48570627 -1.49696687 25 | H -1.39519310 -2.10855111 -1.68387579 26 | H -2.30461643 -0.65847069 -2.22610965 27 | H -2.00357632 -1.75117368 0.62828846 28 | H -3.50315815 -1.53246666 -0.31157002 29 | H -4.58752741 0.07016517 1.05721689 30 | H -3.56358447 1.34535174 1.78534927 31 | H -3.30853462 -0.36912151 2.24412058 32 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp11/manual_files/C1=CC=C(C(=C1)C2=NC3=C(C=CC(=C3)F)NC2=O)N.xyz: -------------------------------------------------------------------------------- 1 | 29 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -4.02938231 1.28992441 0.05757502 4 | C -4.59162838 0.01158521 0.16071654 5 | C -3.76256907 -1.10292695 0.21176614 6 | C -2.35711773 -0.98818438 0.15324218 7 | C -1.76452036 0.33542259 0.09858286 8 | C -2.64310250 1.43122830 0.02079840 9 | C -0.31519789 0.50935286 0.06018861 10 | N 0.45932733 -0.52571509 -0.22370305 11 | C 1.81179981 -0.44622405 -0.33083766 12 | C 2.47397279 0.78395030 -0.09938841 13 | C 3.86203968 0.86678910 -0.21416942 14 | C 4.61698190 -0.25119629 -0.55568550 15 | C 3.95935402 -1.47601846 -0.77887828 16 | C 2.57904411 -1.58443879 -0.66929225 17 | F 4.68119860 -2.58811219 -1.11127876 18 | N 1.67366150 1.84942752 0.24382517 19 | C 0.29350029 1.83069696 0.35360918 20 | O -0.29869235 2.86258787 0.69657243 21 | N -1.58672253 -2.09652470 0.20100952 22 | H -4.65789368 2.18106040 0.00751747 23 | H -5.67364570 -0.12626890 0.20357945 24 | H -4.17202297 -2.11231016 0.28961876 25 | H -2.19974833 2.42444786 -0.04423974 26 | H 4.35325829 1.82400881 -0.03337554 27 | H 5.70288785 -0.18155132 -0.64710621 28 | H 2.06892746 -2.53430042 -0.84003660 29 | H 2.10344700 2.75224137 0.45325047 30 | H -1.99827828 -3.00301402 0.01897851 31 | H -0.58887859 -1.93593788 -0.01480556 32 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/3-(4-methylphenyl)-5-pyridin-4-yl-1,2,4-oxadiazole.xyz: -------------------------------------------------------------------------------- 1 | 29 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C 5.67830305 0.39194717 -0.30580249 4 | C 4.22060466 0.11294013 -0.09922305 5 | C 3.23599815 0.95391405 -0.63950495 6 | C 1.87587750 0.67776800 -0.47672575 7 | C 1.48319743 -0.46145916 0.23614962 8 | C 2.44723034 -1.31847103 0.78468861 9 | C 3.80303731 -1.02425094 0.61084484 10 | C 0.06475641 -0.77769424 0.41786970 11 | N -0.38792218 -1.82130028 1.06731845 12 | O -1.83822633 -1.65117563 0.94593610 13 | C -2.06570271 -0.51314260 0.22790574 14 | N -0.91665307 0.05304610 -0.11612726 15 | C -3.40385396 -0.03249089 -0.08662581 16 | C -3.55732927 1.14717769 -0.83490453 17 | C -4.85536963 1.59496803 -1.12836845 18 | N -5.96445142 0.95871171 -0.73315721 19 | C -5.81318078 -0.16158503 -0.02307638 20 | C -4.56690321 -0.70442933 0.32962179 21 | H 5.86680514 1.46351878 -0.43519839 22 | H 6.27364598 0.02577482 0.53839401 23 | H 6.04526216 -0.11863135 -1.20996784 24 | H 3.54334113 1.84474720 -1.19479344 25 | H 1.09724619 1.32339992 -0.89046868 26 | H 2.08654937 -2.19250234 1.33092356 27 | H 4.56340877 -1.68547447 1.03701236 28 | H -2.67807499 1.69685754 -1.17708665 29 | H -4.99687599 2.51184978 -1.71071285 30 | H -6.73103650 -0.66881279 0.29102242 31 | H -4.50968353 -1.62520082 0.91209027 32 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/(E)-1-(5-bromo-2-hydroxyphenyl)-3-(4-fluorophenyl)prop-2-en-1-one.xyz: -------------------------------------------------------------------------------- 1 | 29 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -3.03211903 -1.06098584 -0.29219236 4 | C -4.35781944 -0.86956820 -0.67304482 5 | C -4.85365791 0.43343553 -0.76682192 6 | C -4.02924746 1.53270763 -0.48236520 7 | C -2.71153607 1.31306802 -0.11010990 8 | C -2.16710428 0.01501800 -0.00416667 9 | C -0.76996905 -0.17816120 0.34646871 10 | C -0.17537867 -1.37845180 0.57538141 11 | C 1.24364050 -1.59135243 0.93805734 12 | O 1.56944410 -2.56303586 1.62818402 13 | C 2.27469214 -0.60235124 0.47720606 14 | C 2.33890694 -0.08821277 -0.82853657 15 | C 3.33856397 0.83714956 -1.16271887 16 | C 4.32105892 1.19348998 -0.24714808 17 | C 4.32113775 0.59578101 1.02252265 18 | C 3.32400653 -0.31825307 1.36401233 19 | Br 5.67503933 1.00577762 2.26347482 20 | O 1.43668825 -0.52586862 -1.76419351 21 | F -6.15245147 0.64390340 -1.14334310 22 | H -2.65267619 -2.08218709 -0.22829337 23 | H -4.99886776 -1.72729476 -0.90100137 24 | H -4.42486342 2.55033256 -0.56170820 25 | H -2.07357044 2.17597832 0.10583139 26 | H -0.17004909 0.73674608 0.40642242 27 | H -0.76827345 -2.29264833 0.64060306 28 | H 3.34822731 1.26606706 -2.17219390 29 | H 5.11012009 1.90665238 -0.48706158 30 | H 3.35955653 -0.79563132 2.34633573 31 | H 1.67650135 -0.13210462 -2.62156677 32 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/1-benzyl-5-nitroindole.xyz: -------------------------------------------------------------------------------- 1 | 31 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -3.83550144 1.28694623 1.22766501 4 | C -3.69365463 -0.10407484 1.32784616 5 | C -2.88720063 -0.79241377 0.41469456 6 | C -2.20871392 -0.09183451 -0.58532509 7 | C -2.37374428 1.29252252 -0.70524484 8 | C -3.17342826 1.98420415 0.20931041 9 | C -1.26302740 -0.83047475 -1.50150844 10 | N -0.04399908 -1.26754106 -0.80759101 11 | C 0.13764305 -2.48108921 -0.18371592 12 | C 1.38042210 -2.49080314 0.43292571 13 | C 2.00418155 -1.22018501 0.18858192 14 | C 1.06851997 -0.47012255 -0.60406251 15 | C 1.32509304 0.83343025 -1.03402470 16 | C 2.53229622 1.42738362 -0.66121878 17 | C 3.46456795 0.69846190 0.12247404 18 | C 3.20785838 -0.62307824 0.54861454 19 | N 4.69541875 1.31731314 0.48259085 20 | O 4.91730373 2.49116363 0.09954673 21 | O 5.52143117 0.67096304 1.17097513 22 | H -4.46503284 1.82544697 1.93867607 23 | H -4.20865677 -0.65689427 2.11574637 24 | H -2.76812853 -1.87781596 0.47816516 25 | H -1.85862238 1.81610481 -1.51567920 26 | H -3.27639858 3.06767860 0.12168668 27 | H -0.95840885 -0.17834252 -2.32467112 28 | H -1.75125312 -1.73044736 -1.89599723 29 | H -0.62532814 -3.25114350 -0.23069386 30 | H 1.81772932 -3.29815387 1.00327688 31 | H 0.59746035 1.36745740 -1.64255700 32 | H 2.78001476 2.44230030 -0.95855363 33 | H 3.94115850 -1.15696198 1.14751591 34 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/2-[(3,4-dichlorophenyl)methyl-(2-hydroxyethyl)amino]ethanol.xyz: -------------------------------------------------------------------------------- 1 | 31 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C 1.41113719 0.30441220 1.00289002 4 | C 2.47059921 -0.27620603 1.70558428 5 | C 3.58473472 -0.77301007 1.01716525 6 | C 3.63079667 -0.66764851 -0.40067747 7 | C 2.56308069 -0.07352057 -1.08179085 8 | C 1.44483513 0.38733918 -0.38442607 9 | C 0.26571228 1.00283839 -1.13060152 10 | N -0.99223124 0.37333679 -0.80620107 11 | C -2.15085300 1.26796039 -0.75997395 12 | C -2.15658672 2.02066522 0.56562004 13 | O -1.82677253 1.20433917 1.68481878 14 | C -1.22398160 -0.98664877 -1.26884511 15 | C -1.44614435 -1.94930691 -0.11662729 16 | O -2.39107692 -1.34489096 0.78707470 17 | Cl 4.96527505 -1.26105103 -1.28358624 18 | Cl 4.86713822 -1.49498966 1.87896628 19 | H 0.54137125 0.69589860 1.53784332 20 | H 2.43173712 -0.35089725 2.78774167 21 | H 2.59627398 0.01048808 -2.16756402 22 | H 0.46023354 0.94327311 -2.21726974 23 | H 0.17436071 2.06569061 -0.85824868 24 | H -3.05516676 0.65363025 -0.85187841 25 | H -2.08873661 1.99439674 -1.58638079 26 | H -3.15630082 2.45080623 0.73737625 27 | H -1.41713141 2.83256145 0.52818149 28 | H -2.05943980 0.28013971 1.41307976 29 | H -2.12490335 -0.99305252 -1.90506145 30 | H -0.35350126 -1.31206533 -1.85341286 31 | H -1.86694299 -2.88883540 -0.50074602 32 | H -0.50013162 -2.13608660 0.41333282 33 | H -2.59738478 -1.97956651 1.49306566 34 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/7-methoxy-1,2-dimethyl-9H-pyrido[3,4-b]indol-2-ium.xyz: -------------------------------------------------------------------------------- 1 | 32 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -2.91071297 1.93710299 0.39682845 4 | C -2.37610623 0.56400510 0.20288669 5 | N -3.23287414 -0.45124443 -0.09367959 6 | C -2.81445118 -1.73453628 -0.28198818 7 | C -1.47995456 -2.08785943 -0.19082162 8 | C -0.55787649 -1.08428071 0.09057440 9 | C -1.03098669 0.24163831 0.28348975 10 | N 0.06556436 1.05433635 0.51753142 11 | C 1.22862332 0.30072620 0.48241581 12 | C 0.86899480 -1.04671013 0.21917684 13 | C 1.86620396 -2.02586178 0.12268138 14 | C 3.19080140 -1.65624659 0.28499981 15 | C 3.54558618 -0.29765054 0.54254973 16 | C 2.54473066 0.70412448 0.64841113 17 | O 4.87161812 -0.02938629 0.67115349 18 | C 5.27891528 1.33582958 0.91275840 19 | C -4.66458255 -0.16266480 -0.23897319 20 | H -3.70377543 1.97400320 1.15621315 21 | H -3.31748469 2.36336887 -0.53258825 22 | H -2.10367014 2.59596153 0.73244698 23 | H -3.60981494 -2.44459959 -0.50688697 24 | H -1.15662290 -3.11290842 -0.33853443 25 | H 0.04003619 2.05538232 0.68957074 26 | H 1.57732056 -3.05620092 -0.07698955 27 | H 4.00116176 -2.37860213 0.21873845 28 | H 2.78339960 1.75190441 0.84730761 29 | H 6.36564822 1.27918514 0.98204041 30 | H 4.85548567 1.71643709 1.85313750 31 | H 4.98351023 1.99146315 0.08117460 32 | H -4.85257231 0.51372598 -1.08263087 33 | H -5.07166112 0.29533922 0.66986375 34 | H -5.18445398 -1.10578187 -0.42218245 35 | -------------------------------------------------------------------------------- /src/chemgraph/models/calculators/emt_calc.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | 3 | 4 | class EMTCalc(BaseModel): 5 | """Effective Medium Theory (EMT) calculator configuration. 6 | 7 | This class defines the configuration parameters for the EMT calculator, 8 | which is a simple empirical potential for metals. It provides a fast 9 | approximation for metallic systems. 10 | 11 | Parameters 12 | ---------- 13 | calculator_type : str, optional 14 | Calculator type. Currently supports only 'emt', by default 'emt' 15 | asap_cutoff : bool, optional 16 | If True, the cutoff mimics how ASAP does it; the global cutoff is 17 | chosen from the largest atom present in the simulation, by default False 18 | 19 | Notes 20 | ----- 21 | The EMT calculator is a simple empirical potential that works well for 22 | metallic systems. It is particularly useful for quick calculations and 23 | as a starting point for more accurate methods. 24 | """ 25 | 26 | calculator_type: str = Field( 27 | default="emt", description="Calculator type. Currently supports only 'emt'." 28 | ) 29 | asap_cutoff: bool = Field( 30 | default=False, 31 | description="If True, the cutoff mimics how ASAP does it; the global cutoff is chosen from the largest atom present in the simulation.", 32 | ) 33 | 34 | def get_calculator(self): 35 | """Get an ASE-compatible EMT calculator instance. 36 | 37 | Returns 38 | ------- 39 | EMT 40 | An ASE-compatible EMT calculator instance with the specified 41 | configuration parameters 42 | 43 | Raises 44 | ------ 45 | ValueError 46 | If an invalid calculator_type is specified 47 | """ 48 | if self.calculator_type != "emt": 49 | raise ValueError("Invalid calculator_type. The only valid option is 'emt'.") 50 | 51 | from ase.calculators.emt import EMT 52 | 53 | return EMT(asap_cutoff=self.asap_cutoff) 54 | -------------------------------------------------------------------------------- /config.toml: -------------------------------------------------------------------------------- 1 | [general] 2 | model = "gemini-2.5-flash" 3 | workflow = "single_agent" 4 | output = "state" 5 | structured = true 6 | report = true 7 | thread = 1 8 | recursion_limit = 20 9 | verbose = false 10 | 11 | [logging] 12 | level = "INFO" 13 | file = "./chemgraph.log" 14 | console = true 15 | format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 16 | 17 | [features] 18 | enable_experimental = false 19 | enable_cache = true 20 | cache_dir = "./cache" 21 | cache_expiry = 24 22 | 23 | [security] 24 | validate_keys = true 25 | rate_limit = true 26 | max_requests_per_minute = 60 27 | 28 | [api.openai] 29 | base_url = "https://api.openai.com/v1" 30 | timeout = 30 31 | 32 | [api.groq] 33 | base_url = "https://api.groq.com/openai/v1" 34 | timeout = 30 35 | 36 | [api.anthropic] 37 | base_url = "https://api.anthropic.com" 38 | timeout = 30 39 | 40 | [api.google] 41 | base_url = "https://generativelanguage.googleapis.com/v1beta" 42 | timeout = 30 43 | 44 | [api.local] 45 | base_url = "http://localhost:11434" 46 | timeout = 60 47 | 48 | [chemistry.optimization] 49 | method = "BFGS" 50 | fmax = 0.05 51 | steps = 200 52 | 53 | [chemistry.frequencies] 54 | displacement = 0.01 55 | nprocs = 1 56 | 57 | [chemistry.calculators] 58 | default = "mace_mp" 59 | fallback = "emt" 60 | 61 | [output.files] 62 | directory = "./chemgraph_output" 63 | pattern = "{timestamp}_{query_hash}" 64 | formats = [ "xyz", "json", "html",] 65 | 66 | [output.visualization] 67 | enable_3d = true 68 | viewer = "py3dmol" 69 | dpi = 300 70 | 71 | [advanced.agent] 72 | custom_system_prompt = "" 73 | max_memory_tokens = 8000 74 | enable_function_calling = true 75 | 76 | [advanced.parallel] 77 | enable_parallel = false 78 | num_workers = 2 79 | 80 | [environments.development] 81 | model = "gpt-4o-mini" 82 | verbose = true 83 | enable_cache = false 84 | 85 | [environments.production] 86 | model = "gpt-4o" 87 | verbose = false 88 | enable_cache = true 89 | rate_limit = true 90 | 91 | [environments.testing] 92 | model = "gpt-4o-mini" 93 | verbose = true 94 | enable_cache = false 95 | -------------------------------------------------------------------------------- /src/chemgraph/tools/generic_tools.py: -------------------------------------------------------------------------------- 1 | from langchain_core.tools import tool 2 | import math 3 | import numexpr 4 | from langchain_core.tools import Tool 5 | from langchain_experimental.utilities import PythonREPL 6 | 7 | 8 | @tool 9 | def calculator(expression: str) -> str: 10 | """Evaluate mathematical expressions safely. 11 | 12 | This function provides a safe way to evaluate mathematical expressions 13 | using numexpr. It supports basic mathematical operations and common 14 | mathematical functions. 15 | 16 | Parameters 17 | ---------- 18 | expression : str 19 | Mathematical expression to evaluate (e.g., "2 * pi + 5") 20 | 21 | Returns 22 | ------- 23 | str 24 | String result or error message 25 | 26 | Notes 27 | ----- 28 | Supported mathematical functions: 29 | - Basic operations: +, -, *, /, ** 30 | - Trigonometric: sin, cos, tan 31 | - Other: sqrt, abs 32 | - Constants: pi, e 33 | """ 34 | local_dict = { 35 | "pi": math.pi, 36 | "e": math.e, 37 | "sin": math.sin, 38 | "cos": math.cos, 39 | "tan": math.tan, 40 | "sqrt": math.sqrt, 41 | "abs": abs, 42 | } 43 | 44 | try: 45 | cleaned_expression = expression.strip() 46 | if not cleaned_expression: 47 | return "Error: Empty expression" 48 | 49 | result = numexpr.evaluate( 50 | cleaned_expression, 51 | global_dict={}, 52 | local_dict=local_dict, 53 | ) 54 | 55 | if isinstance(result, (int, float)): 56 | return f"{float(result):.6f}".rstrip("0").rstrip(".") 57 | return str(result) 58 | 59 | except Exception as e: 60 | return f"Error evaluating expression: {e!s}" 61 | 62 | 63 | python_repl = PythonREPL() 64 | repl_tool = Tool( 65 | name="python_repl", 66 | description="A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.", 67 | func=python_repl.run, 68 | ) 69 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/[(E)-(2-chloro-1-methylindol-3-yl)methylideneamino] 3-chlorobenzoate.xyz: -------------------------------------------------------------------------------- 1 | 35 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -4.92314822 2.29012110 0.47664074 4 | N -3.78148785 1.39619669 0.35508246 5 | C -3.81018803 -0.00707759 0.50345823 6 | C -4.86818447 -0.85614251 0.78933000 7 | C -4.58653688 -2.23235324 0.88418504 8 | C -3.28226974 -2.71901055 0.68860007 9 | C -2.22035802 -1.84320691 0.40118555 10 | C -2.49046401 -0.48417125 0.31563887 11 | C -1.65330341 0.67681598 0.05560458 12 | C -2.51032057 1.80240940 0.09494986 13 | Cl -2.09040570 3.41468278 -0.12879477 14 | C -0.24251648 0.61591312 -0.17201627 15 | N 0.34070317 -0.53799530 -0.05814501 16 | O 1.65458169 -0.60313536 -0.59387550 17 | C 2.52724579 0.18727521 0.09594694 18 | O 2.20241611 0.98763105 0.95436468 19 | C 3.93365772 -0.03682825 -0.35687284 20 | C 4.93435176 0.77330269 0.19337610 21 | C 6.26079649 0.63642229 -0.22775113 22 | C 6.59637911 -0.32312136 -1.19944871 23 | C 5.59177216 -1.14042519 -1.73160840 24 | C 4.26710430 -0.99887098 -1.31131413 25 | Cl 7.47127680 1.64632284 0.44245605 26 | H -5.03285358 2.89061433 -0.43356605 27 | H -4.79251471 2.96639455 1.33017628 28 | H -5.81034483 1.67533400 0.62684258 29 | H -5.87019840 -0.45295704 0.93205835 30 | H -5.39231685 -2.93097219 1.11306279 31 | H -3.08026500 -3.78756225 0.75942712 32 | H -1.18990995 -2.16497215 0.23840574 33 | H 0.21901413 1.59099927 -0.39688592 34 | H 4.66786433 1.51082676 0.94333388 35 | H 7.63077232 -0.41457312 -1.51863155 36 | H 5.84065602 -1.89015183 -2.48233852 37 | H 3.48899481 -1.63773498 -1.72581867 38 | -------------------------------------------------------------------------------- /src/chemgraph/models/calculators/aimnet2_calc.py: -------------------------------------------------------------------------------- 1 | """AIMNET2 foundation models parameters for ChemGraph 2 | """ 3 | 4 | import os 5 | from pathlib import Path 6 | from typing import Optional, Union 7 | from pydantic import BaseModel, Field 8 | import torch 9 | 10 | 11 | class AIMNET2Calc(BaseModel): 12 | """AIMNET2 calculator configuration. 13 | 14 | This class defines the configuration parameters for AIMNET2 machine learning models 15 | used in molecular simulations. It supports different calculator types including 16 | aimnet2 or a Path. 17 | 18 | Parameters 19 | ---------- 20 | calculator_type : str, optional 21 | Type of calculator to use. Options: 'aimnet2' (default) 22 | model : str or Path, optional 23 | Name or path to the model file. If None, uses default model for selected calculator type. 24 | device : str, optional 25 | """ 26 | 27 | calculator_type: str = Field( 28 | default="aimnet2", 29 | description="Type of calculator. Options: 'aimnet2' (default) ", 30 | ) 31 | model: Optional[Union[str, Path]] = Field( 32 | default='aimnet2', 33 | description="Path to the model. If None, it will use the default model for the selected calculator type. " 34 | "Options: 'aimnet2' ", 35 | ) 36 | 37 | def get_calculator(self): 38 | """Get the appropriate AIMNET2Calculator instance based on the selected calculator type. 39 | 40 | Returns 41 | ------- 42 | AIMNET2Calc 43 | An instance of the appropriate AIMNET2 calculator 44 | 45 | Raises 46 | ------ 47 | ValueError 48 | If an invalid calculator_type is specified 49 | """ 50 | from aimnet2calc import AIMNet2ASE 51 | 52 | # Allow loading slice and AIMNET2 objects for compatibility with older model files 53 | 54 | # Force torch to disable weights_only loading (allows full pickle loads) for AIMNET2 models 55 | 56 | if self.calculator_type == "aimnet2": 57 | return AIMNet2ASE(self.model) 58 | else: 59 | raise ValueError( 60 | "Invalid calculator_type. Choose 'aimnet2' or path." 61 | ) 62 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/N-benzyl-N-methyl-3,5-dinitrobenzamide.xyz: -------------------------------------------------------------------------------- 1 | 36 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C -0.38504536 -1.70761568 -1.04393288 4 | N -0.63956181 -1.29661576 0.32887918 5 | C -2.01823718 -1.50761276 0.80259991 6 | C -2.97874845 -0.44462751 0.34333860 7 | C -3.52178077 -0.49675512 -0.94586324 8 | C -4.32750618 0.54721660 -1.41112186 9 | C -4.62659028 1.63036547 -0.57475455 10 | C -4.12373434 1.65791082 0.73284332 11 | C -3.31148204 0.61892748 1.18874501 12 | C 0.08300977 -0.32301485 0.98300922 13 | O -0.38121951 0.29853408 1.93947699 14 | C 1.51433842 -0.04804162 0.54764674 15 | C 1.97238836 1.27672730 0.57720892 16 | C 3.30512829 1.57523303 0.21019531 17 | C 4.20422155 0.55097407 -0.12005434 18 | C 3.75495988 -0.77356394 -0.09158562 19 | C 2.41445848 -1.07919475 0.24515160 20 | N 4.64364141 -1.84812711 -0.44701178 21 | O 5.83033318 -1.57456312 -0.72912140 22 | O 4.19251671 -3.01603722 -0.46146690 23 | N 3.73109682 2.94834844 0.15371959 24 | O 2.90374970 3.84206724 0.43520594 25 | O 4.91168742 3.19392360 -0.17771211 26 | H -0.67206019 -0.93187132 -1.77529554 27 | H -0.97636475 -2.61032250 -1.24044692 28 | H 0.67501576 -1.94024275 -1.17814558 29 | H -2.32494874 -2.48183467 0.40907154 30 | H -1.97588196 -1.52887105 1.89525074 31 | H -3.29697117 -1.35656372 -1.58485082 32 | H -4.71748272 0.50794702 -2.43025201 33 | H -5.25313876 2.44752807 -0.93722407 34 | H -4.35395294 2.48879650 1.40190533 35 | H -2.88887937 0.63719673 2.19456849 36 | H 1.30524707 2.08390128 0.87161404 37 | H 5.23004490 0.78080685 -0.39383736 38 | H 2.10174878 -2.12092913 0.28224654 39 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=42"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "ChemGraph" 7 | version = "1.0.0" 8 | description = "A computational chemistry agent for molecular simulation tasks." 9 | authors = [ 10 | { name = "Thang Pham", email = "tpham@anl.gov" }, 11 | { name = "Murat Keçeli", email = "keceli@anl.gov" }, 12 | { name = "Aditya Tanikanti", email = "atanikanti@anl.gov" } 13 | ] 14 | requires-python = ">=3.10" 15 | dependencies = [ 16 | "ase==3.25.0", 17 | "rdkit==2025.3.3", 18 | "langgraph==0.4.7", 19 | "langchain-openai==0.3.27", 20 | "langchain-ollama==0.3.4", 21 | "langchain-anthropic==0.3.17", 22 | "langchain-google-genai==2.1.7", 23 | "langchain-groq", 24 | "langchain-experimental==0.3.4", 25 | "pydantic==2.11.7", 26 | "pandas==2.2.3", 27 | "pubchempy @ git+https://github.com/keceli/PubChemPy.git@main", 28 | "pyppeteer==2.0.0", 29 | "numpy==2.2.6", 30 | "numexpr==2.11.0", 31 | "tblite==0.4.0", 32 | "pytest==8.4.1", 33 | "deepdiff==8.5.0", 34 | "pymatgen==2025.3.10", 35 | "mace-torch==0.3.13", 36 | "streamlit==1.48.1", 37 | "stmol==0.0.9", 38 | "ipython-genutils==0.2.0", 39 | "langsmith==0.3.45", 40 | "rich==14.1.0", 41 | "toml==0.10.2" 42 | ] 43 | 44 | [project.optional-dependencies] 45 | uma = [ 46 | "fairchem-core==2.3.0", 47 | "e3nn>=0.5", 48 | ] 49 | ui = [ 50 | "streamlit", 51 | "stmol", 52 | "ipython-genutils", 53 | ] 54 | 55 | [project.urls] 56 | "Homepage" = "https://github.com/argonne-lcf/ChemGraph" 57 | "Repository" = "https://github.com/argonne-lcf/ChemGraph" 58 | 59 | [project.scripts] 60 | chemgraph = "ui.cli:main" 61 | 62 | [tool.setuptools.packages.find] 63 | where = ["src/"] 64 | 65 | [tool.ruff] 66 | line-length = 88 # Match Black's default (adjust as needed) 67 | target-version = "py310" # Adjust based on your Python version 68 | exclude = ["notebooks/"] # Add files/folders to ignore 69 | 70 | [tool.ruff.format] 71 | quote-style = "preserve" # Keep existing quote style 72 | indent-style = "space" # Use spaces for indentation 73 | skip-magic-trailing-comma = false # Ensure Black-style formatting 74 | 75 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: ChemGraph Docs 2 | repo_name: ChemGraph 3 | repo_url: https://github.com/argonne-lcf/ChemGraph/tree/ 4 | 5 | theme: 6 | name: material 7 | 8 | # Font settings 9 | font: 10 | text: Merriweather Sans 11 | code: Roboto Mono 12 | 13 | features: 14 | - content.code.copy 15 | - content.code.annotate 16 | - navigation.footer 17 | - search.suggest 18 | - header.autohide 19 | - navigation.tabs 20 | 21 | palette: 22 | # Automatic mode 23 | - media: "(prefers-color-scheme)" 24 | toggle: 25 | icon: material/brightness-auto 26 | name: Switch to light mode 27 | 28 | # Light Mode 29 | - media: "(prefers-color-scheme: light)" 30 | scheme: default 31 | toggle: 32 | icon: material/brightness-7 33 | name: Switch to dark mode 34 | primary: green 35 | accent: deep purple 36 | 37 | # Dark Mode 38 | - media: "(prefers-color-scheme: dark)" 39 | scheme: slate 40 | toggle: 41 | icon: material/brightness-4 42 | name: Switch to system preferences 43 | primary: indigo 44 | accent: deep orange 45 | 46 | plugins: 47 | - mkdocstrings 48 | - search 49 | 50 | nav: 51 | - Overview: index.md 52 | - Installation: installation.md 53 | - Example Usage: example_usage.md 54 | - Streamlit Web Interface: streamlit_web_interface.md 55 | - Configuration with TOML: configuration_with_toml.md 56 | - Project Structure: project_structure.md 57 | - Running Local Models with vLLM: running_local_models.md 58 | - Docker Support with Docker Compose (Recommended for vLLM): docker_support.md 59 | - Code Formatting & Linting: code_formatting_and_linting.md 60 | - Citation: citation.md 61 | - Acknowledgements: acknowledgements.md 62 | - License: license.md 63 | 64 | markdown_extensions: 65 | - pymdownx.highlight: 66 | anchor_linenums: true 67 | line_spans: __span 68 | pygments_lang_class: true 69 | - pymdownx.inlinehilite 70 | - pymdownx.snippets 71 | - pymdownx.superfences 72 | - pymdownx.details 73 | - admonition 74 | - pymdownx.tabbed: 75 | alternate_style: true 76 | combine_header_slug: true 77 | - def_list 78 | - pymdownx.tasklist: 79 | custom_checkbox: true -------------------------------------------------------------------------------- /scripts/evaluations/pubchempy/get_molecule_from_pubchempy.py: -------------------------------------------------------------------------------- 1 | import pubchempy as pcp 2 | import random 3 | import time 4 | import json 5 | from chemgraph.tools.ASE_tools import ( 6 | smiles_to_atomsdata, 7 | molecule_name_to_smiles, 8 | ) 9 | 10 | 11 | def get_random_molecule_names(n=2, cid_range=(0, 10000000), seed=2025, max_natoms=20, min_natoms=6): 12 | """Get a list of random molecule names and smiles from PubChemPy. 13 | 14 | Args: 15 | n (int): Number of molecules to retrieve. 16 | cid_range (tuple): Range of PubChem CIDs to sample from. 17 | seed (int): Random seed for reproducibility. 18 | natoms (int): Maximum number of atoms per molecule. 19 | 20 | Returns: 21 | list: A list of dictionaries, each containing data for one molecule. 22 | """ 23 | random.seed(seed) 24 | output = [] 25 | tried = set() 26 | count = 0 27 | 28 | while len(output) < n: 29 | cid = random.randint(*cid_range) 30 | if cid in tried: 31 | continue 32 | tried.add(cid) 33 | 34 | try: 35 | compound = pcp.Compound.from_cid(cid) 36 | name = compound.iupac_name or (compound.synonyms[0] if compound.synonyms else None) 37 | if not name: 38 | continue 39 | 40 | smiles = molecule_name_to_smiles.invoke({"name": name}) 41 | atomsdata = smiles_to_atomsdata.invoke({"smiles": smiles}) 42 | 43 | if len(atomsdata.numbers) < max_natoms and len(atomsdata.numbers) > min_natoms: 44 | molecule_info = { 45 | "index": count, 46 | "name": name, 47 | "number_of_atoms": len(atomsdata.numbers), 48 | "smiles": smiles, 49 | } 50 | output.append(molecule_info) 51 | count += 1 52 | print(count) 53 | else: 54 | print(f"Too many atoms in {name}, skipping...") 55 | 56 | except Exception: 57 | continue 58 | 59 | time.sleep(0.5) 60 | 61 | return output 62 | 63 | 64 | def main(): 65 | output = get_random_molecule_names(n=60, seed=2025) 66 | with open('pubchempy_molecule_max.json', 'w') as f: 67 | json.dump(output, f, indent=4) 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /scripts/evaluations/generate_evaluation_data/Exp6/manual_files/N-butyl-N-ethyl-3-methyl-2-nitrobenzamide.xyz: -------------------------------------------------------------------------------- 1 | 39 2 | Properties=species:S:1:pos:R:3 pbc="F F F" 3 | C 1.88425893 2.48905432 -0.02212777 4 | C 2.96589017 1.41053076 0.03506324 5 | C 2.48700469 0.05348982 0.54993883 6 | C 1.54854705 -0.64463812 -0.43533310 7 | N 0.78825039 -1.78618377 0.15247289 8 | C 1.27465233 -3.12760790 -0.20554781 9 | C 0.75172397 -3.67000238 -1.52116312 10 | C -0.54294724 -1.67134106 0.47425819 11 | O -1.28211132 -2.64640391 0.55825421 12 | C -1.00895287 -0.29500852 0.90604797 13 | C -0.55004962 0.31823617 2.06676577 14 | C -1.10237929 1.55256594 2.45225566 15 | C -2.15867487 2.11884007 1.73248130 16 | C -2.68278431 1.49483809 0.59803207 17 | C -2.05119977 0.30326191 0.17440264 18 | N -2.19891182 -0.23465180 -1.14683548 19 | O -3.21320837 -0.03211060 -1.85397463 20 | O -1.22080116 -0.90396619 -1.59522700 21 | C -3.81899635 2.07295879 -0.16276035 22 | H 1.06533207 2.21537677 -0.69793230 23 | H 2.31212694 3.43019119 -0.38415911 24 | H 1.45431719 2.67858679 0.96931110 25 | H 3.77821156 1.74490983 0.69115119 26 | H 3.39930146 1.27413669 -0.96416982 27 | H 3.35255371 -0.60887056 0.70243585 28 | H 1.97777322 0.15790220 1.51815374 29 | H 2.13753410 -1.05910385 -1.27132445 30 | H 0.81292295 0.05188169 -0.86667920 31 | H 2.36542421 -3.03956700 -0.23788165 32 | H 0.98677438 -3.80039558 0.61187786 33 | H 1.07678938 -3.04805817 -2.36173312 34 | H 1.14322257 -4.68064935 -1.66624857 35 | H -0.34119126 -3.72051455 -1.50590095 36 | H 0.22548238 -0.17407148 2.65767099 37 | H -0.70632872 2.05637023 3.33588934 38 | H -2.59843620 3.06700120 2.05293953 39 | H -3.56075480 3.06446455 -0.55145772 40 | H -4.69732545 2.18713031 0.48309288 41 | H -4.05304023 1.40141745 -0.99517404 42 | -------------------------------------------------------------------------------- /src/chemgraph/tools/cheminformatics_tools.py: -------------------------------------------------------------------------------- 1 | import pubchempy 2 | from langchain_core.tools import tool 3 | from chemgraph.models.atomsdata import AtomsData 4 | 5 | 6 | @tool 7 | def molecule_name_to_smiles(name: str) -> str: 8 | """Convert a molecule name to SMILES format. 9 | 10 | Parameters 11 | ---------- 12 | name : str 13 | The name of the molecule to convert. 14 | 15 | Returns 16 | ------- 17 | str 18 | The SMILES string representation of the molecule. 19 | 20 | Raises 21 | ------ 22 | IndexError 23 | If the molecule name is not found in PubChem. 24 | """ 25 | return pubchempy.get_compounds(str(name), "name")[0].canonical_smiles 26 | 27 | 28 | @tool 29 | def smiles_to_atomsdata(smiles: str, randomSeed: int = 2025) -> AtomsData: 30 | """Convert a SMILES string to AtomsData format. 31 | 32 | Parameters 33 | ---------- 34 | smiles : str 35 | SMILES string representation of the molecule. 36 | randomSeed : int, optional 37 | Random seed for RDKit 3D structure generation, by default 2025. 38 | 39 | Returns 40 | ------- 41 | AtomsData 42 | AtomsData object containing the molecular structure. 43 | 44 | Raises 45 | ------ 46 | ValueError 47 | If the SMILES string is invalid or if 3D structure generation fails. 48 | """ 49 | from rdkit import Chem 50 | from rdkit.Chem import AllChem 51 | 52 | # Generate the molecule object 53 | mol = Chem.MolFromSmiles(smiles) 54 | if mol is None: 55 | raise ValueError("Invalid SMILES string.") 56 | 57 | # Add hydrogens and optimize 3D structure 58 | mol = Chem.AddHs(mol) 59 | if AllChem.EmbedMolecule(mol, randomSeed=randomSeed) != 0: 60 | raise ValueError("Failed to generate 3D coordinates.") 61 | if AllChem.UFFOptimizeMolecule(mol) != 0: 62 | raise ValueError("Failed to optimize 3D geometry.") 63 | # Extract atomic information 64 | conf = mol.GetConformer() 65 | numbers = [atom.GetAtomicNum() for atom in mol.GetAtoms()] 66 | positions = [list(conf.GetAtomPosition(i)) for i in range(mol.GetNumAtoms())] 67 | 68 | # Create AtomsData object 69 | atoms_data = AtomsData( 70 | numbers=numbers, 71 | positions=positions, 72 | cell=[[0, 0, 0], [0, 0, 0], [0, 0, 0]], 73 | pbc=[False, False, False], # No periodic boundary conditions 74 | ) 75 | return atoms_data 76 | -------------------------------------------------------------------------------- /Dockerfile.arm: -------------------------------------------------------------------------------- 1 | # This vLLM Dockerfile is used to construct an image that can build and run vLLM on ARM CPU platform. 2 | 3 | FROM ubuntu:22.04 AS cpu-test-arm 4 | 5 | ENV CCACHE_DIR=/root/.cache/ccache 6 | 7 | ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache 8 | 9 | RUN --mount=type=cache,target=/var/cache/apt \ 10 | apt-get update -y \ 11 | && apt-get install -y curl ccache git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \ 12 | && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \ 13 | && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 14 | 15 | # tcmalloc provides better memory allocation efficiency, e.g., holding memory in caches to speed up access of commonly-used objects. 16 | RUN --mount=type=cache,target=/root/.cache/pip \ 17 | pip install py-cpuinfo # Use this to gather CPU info and optimize based on ARM Neoverse cores 18 | 19 | # Set LD_PRELOAD for tcmalloc on ARM 20 | ENV LD_PRELOAD="/usr/lib/aarch64-linux-gnu/libtcmalloc_minimal.so.4" 21 | 22 | RUN echo 'ulimit -c 0' >> ~/.bashrc 23 | 24 | WORKDIR /workspace 25 | 26 | ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" 27 | ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} 28 | RUN --mount=type=cache,target=/root/.cache/pip \ 29 | --mount=type=bind,src=requirements/build.txt,target=requirements/build.txt \ 30 | pip install --upgrade pip && \ 31 | pip install -r requirements/build.txt 32 | 33 | FROM cpu-test-arm AS build 34 | 35 | WORKDIR /workspace/vllm 36 | 37 | RUN --mount=type=cache,target=/root/.cache/pip \ 38 | --mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \ 39 | --mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \ 40 | pip install -v -r requirements/cpu.txt 41 | 42 | COPY . . 43 | ARG GIT_REPO_CHECK=0 44 | RUN --mount=type=bind,source=.git,target=.git \ 45 | if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi 46 | 47 | # Disabling AVX512 specific optimizations for ARM 48 | ARG VLLM_CPU_DISABLE_AVX512="true" 49 | ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512} 50 | ENV MAX_JOBS=1 51 | 52 | RUN --mount=type=cache,target=/root/.cache/pip \ 53 | --mount=type=cache,target=/root/.cache/ccache \ 54 | --mount=type=bind,source=.git,target=.git \ 55 | VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \ 56 | pip install dist/*.whl && \ 57 | rm -rf dist 58 | 59 | WORKDIR /workspace/ 60 | 61 | RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks 62 | 63 | ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] -------------------------------------------------------------------------------- /src/chemgraph/models/calculators/mopac_calc.py: -------------------------------------------------------------------------------- 1 | # Keywords and parameters obtained from QCEngine: https://github.com/MolSSI/QCEngine 2 | # MOPAC parameters for CompChemAgent 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class MopacCalc(BaseModel): 8 | """MOPAC calculator configuration. 9 | 10 | This class defines the configuration parameters for the MOPAC calculator, 11 | which is a semi-empirical quantum chemistry program. It provides various 12 | semi-empirical methods for molecular calculations. 13 | 14 | Parameters 15 | ---------- 16 | calculator_type : str, optional 17 | Type of calculator. Currently supports only 'mopac', by default 'mopac' 18 | method : str, optional 19 | Computational method to be used. Available methods include: 20 | ['mndo', 'am1', 'pm3', 'rm1', 'mndod', 'pm6', 'pm6-d3', 'pm6-dh+', 21 | 'pm6-dh2', 'pm6-dh2x', 'pm6-d3h4', 'pm6-3dh4x', 'pm7', 'pm7-ts'], 22 | by default 'am1' 23 | iter : int, optional 24 | Maximum number of self-consistent field (SCF) iterations allowed, 25 | by default 100 26 | pulay : bool, optional 27 | Enable Pulay's convergence acceleration for the SCF procedure, 28 | by default True 29 | 30 | Notes 31 | ----- 32 | MOPAC is a semi-empirical quantum chemistry program that provides 33 | various methods for molecular calculations. The available methods 34 | range from basic semi-empirical methods (MNDO, AM1, PM3) to more 35 | advanced ones with dispersion corrections (PM6-D3, PM6-DH+). 36 | """ 37 | 38 | calculator_type: str = Field( 39 | default="mopac", 40 | description="Type of calculator. Currently supports only 'mopac'.", 41 | ) 42 | method: str = Field( 43 | default="am1", 44 | description="Computational method to be used. Available methods include ['mndo', 'am1', 'pm3', 'rm1', 'mndod', 'pm6', 'pm6-d3', 'pm6-dh+', 'pm6-dh2', 'pm6-dh2x', 'pm6-d3h4', 'pm6-3dh4x', 'pm7', 'pm7-ts']", 45 | ) 46 | iter: int = Field( 47 | default=100, 48 | description="Maximum number of self-consistent field (SCF) iterations allowed.", 49 | ) 50 | pulay: bool = Field( 51 | default=True, 52 | description="Enable Pulay's convergence acceleration for the SCF procedure.", 53 | ) 54 | 55 | def get_calculator(self): 56 | """Get MOPAC calculator parameters. 57 | 58 | Returns 59 | ------- 60 | dict 61 | A dictionary containing the MOPAC calculator parameters: 62 | - method: The computational method to use 63 | - ITER: Maximum number of SCF iterations 64 | - PULAY: Whether to use Pulay's convergence acceleration 65 | """ 66 | return { 67 | "method": self.method, 68 | "ITER": self.iter, 69 | "PULAY": self.pulay, 70 | } 71 | -------------------------------------------------------------------------------- /tests/test_calculators.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from chemgraph.models.calculators.emt_calc import EMTCalc 4 | from chemgraph.models.calculators.mace_calc import MaceCalc 5 | from chemgraph.models.calculators.tblite_calc import TBLiteCalc 6 | from chemgraph.models.calculators.orca_calc import OrcaCalc 7 | from ase import Atoms 8 | 9 | 10 | def test_emt_calculator(): 11 | # Test EMT calculator initialization 12 | calc = EMTCalc() 13 | ase_calc = calc.get_calculator() 14 | 15 | # Create a simple molecule 16 | atoms = Atoms("H2", positions=[[0, 0, 0], [0, 0, 1]]) 17 | atoms.calc = ase_calc 18 | 19 | # Test energy calculation 20 | energy = atoms.get_potential_energy() 21 | assert isinstance(energy, float) 22 | 23 | # Test forces calculation 24 | forces = atoms.get_forces() 25 | assert isinstance(forces, np.ndarray) 26 | assert forces.shape == (2, 3) 27 | 28 | 29 | @pytest.mark.skipif(not pytest.importorskip("mace"), reason="MACE not installed") 30 | def test_mace_calculator(): 31 | # Test MACE calculator initialization 32 | calc = MaceCalc(model_type="medium") 33 | ase_calc = calc.get_calculator() 34 | 35 | # Create a simple molecule 36 | atoms = Atoms("H2", positions=[[0, 0, 0], [0, 0, 1]]) 37 | atoms.calc = ase_calc 38 | 39 | # Test energy calculation 40 | energy = atoms.get_potential_energy() 41 | assert isinstance(energy, float) 42 | 43 | # Test forces calculation 44 | forces = atoms.get_forces() 45 | assert isinstance(forces, np.ndarray) 46 | assert forces.shape == (2, 3) 47 | 48 | 49 | @pytest.mark.skipif(not pytest.importorskip("tblite"), reason="TBLite not installed") 50 | def test_tblite_calculator(): 51 | # Test TBLite calculator initialization 52 | calc = TBLiteCalc() 53 | ase_calc = calc.get_calculator() 54 | 55 | # Create a simple molecule 56 | atoms = Atoms("H2", positions=[[0, 0, 0], [0, 0, 1]]) 57 | atoms.calc = ase_calc 58 | 59 | # Test energy calculation 60 | energy = atoms.get_potential_energy() 61 | assert isinstance(energy, float) 62 | 63 | # Test forces calculation 64 | forces = atoms.get_forces() 65 | assert isinstance(forces, np.ndarray) 66 | assert forces.shape == (2, 3) 67 | 68 | 69 | @pytest.mark.skipif(not pytest.importorskip("ase.io.orca"), reason="ORCA not installed") 70 | def test_orca_calculator(): 71 | # Test ORCA calculator initialization 72 | from ase.calculators.calculator import BadConfiguration 73 | from ase import Atoms 74 | 75 | try: 76 | calc = OrcaCalc() 77 | ase_calc = calc.get_calculator() 78 | except BadConfiguration: 79 | pytest.skip("ORCA calculator not configured in ASE.") 80 | 81 | # Create a simple molecule 82 | atoms = Atoms("H2", positions=[[0, 0, 0], [0, 0, 1]]) 83 | atoms.calc = ase_calc 84 | 85 | # Test basic calculator properties 86 | assert hasattr(ase_calc, "calculate") 87 | -------------------------------------------------------------------------------- /src/chemgraph/prompt/llama_prompt.py: -------------------------------------------------------------------------------- 1 | single_agent_prompt = """ 2 | You are a computational chemistry expert. Your goal is to solve the user's request using only the **minimum number of necessary tools**, without guessing or overusing functionality. 3 | 4 | Responsibilities: 5 | 6 | 1. Carefully extract all relevant inputs from the user's request, including: 7 | - Molecule names, SMILES strings, structures 8 | - Methods, calculator types 9 | - Simulation conditions (temperature, pressure, etc.) 10 | 11 | 2. Before calling any tool: 12 | - Confirm the tool is clearly required to fulfill the user's request. 13 | - If the user's request can be answered without a tool call, **do not call any tool**. 14 | - Never call a tool just because data is available — only call it if **it is essential** to progress. 15 | 16 | 3. When calling a tool: 17 | - Do not nest tool calls. 18 | - Use **exact** Python dictionary format, following the tool’s schema strictly. 19 | - Do not include wrappers like `"type": "object"` or `"value": {...}"`. 20 | - Example (valid input for ASE run): 21 | ```python 22 | { 23 | "atomsdata": { "numbers": [...], "positions": [...], "cell": [...], "pbc": [...] }, 24 | "driver": "opt", 25 | "optimizer": "bfgs", 26 | "calculator": { "calculator_type": "mace_mp" }, 27 | "fmax": 0.01, 28 | "steps": 1000, 29 | "temperature": 298.15, 30 | "pressure": 101325.0 31 | } 32 | ``` 33 | 34 | 4. Always use outputs from tool responses. Never fabricate SMILES, molecular structures, or results. 35 | 36 | 5. Handle tool failures by explaining the issue or retrying only with corrected input. Otherwise, proceed to the next step. 37 | 38 | 6. When the user's task is fulfilled: 39 | - **Stop immediately.** 40 | - Return only the final result. 41 | - Do not reason further or call more tools unless explicitly instructed. 42 | 43 | 7. Do not call tools that are irrelevant or unrelated to the specific task described by the user. 44 | 45 | Summary: Use only the necessary tools. Stay focused on the user’s exact question. Avoid guessing, avoid unnecessary reasoning, and stop once the task is complete. 46 | """ 47 | 48 | 49 | formatter_prompt = """You are an agent that formats responses based on user intent. You must select the correct output type based on the content of the result: 50 | 51 | 1. Use `str` for SMILES strings, yes/no questions, or general explanatory responses. If the user asks for a SMILES string, only return the SMILES string instead of text. 52 | 2. Use `AtomsData` for molecular structures or atomic geometries (e.g., atomic positions, element lists, or 3D coordinates). 53 | 3. Use `VibrationalFrequency` for vibrational frequency data. This includes one or more vibrational modes, typically expressed in units like cm⁻¹. 54 | - IMPORTANT: Do NOT use `ScalarResult` for vibrational frequencies. Vibrational data is a list or array of values and requires `VibrationalFrequency`. 55 | 4. Use `ScalarResult` (float) only for scalar thermodynamic or energetic quantities such as: 56 | - Enthalpy 57 | - Entropy 58 | - Gibbs free energy 59 | """ 60 | -------------------------------------------------------------------------------- /src/chemgraph/models/supported_models.py: -------------------------------------------------------------------------------- 1 | """ 2 | Lists of supported models for different LLM providers. 3 | """ 4 | # OpenAI models that are supported 5 | supported_openai_models = [ 6 | "gpt-4o-mini", 7 | "gpt-4o", 8 | "gpt-4.1", 9 | "gpt-3.5-turbo-0125", 10 | ] 11 | # Ollama models that are supported 12 | supported_ollama_models = ["llama3.2", "llama3.1"] 13 | # ALCF models that are supported (these would be models available through ALCF's infrastructure) 14 | supported_alcf_models = [ 15 | "AuroraGPT-IT-v4-0125_2", 16 | "meta-llama/Meta-Llama-3.1-405B-Instruct", 17 | "meta-llama/Llama-3.3-70B-Instruct", 18 | "meta-llama/Meta-Llama-3.1-70B-Instruct", 19 | "Qwen/Qwen2.5-14B-Instruct", 20 | "Qwen/Qwen2.5-7B-Instruct", 21 | "Qwen/QwQ-32B-Preview", 22 | "Qwen/QwQ-32B", 23 | "Qwen/Qwen3-32B", 24 | "meta-llama/Llama-4-Scout-17B-16E-Instruct", 25 | "meta-llama/Llama-4-Maverick-17B-128E-Instruct", 26 | ] 27 | # Anthropic models 28 | supported_anthropic_models = [ 29 | "claude-opus-4-20250514", 30 | "claude-sonnet-4-20250514", 31 | "claude-3-5-haiku-20241022", 32 | "claude-3-7-sonnet-20250219", 33 | "claude-3-5-sonnet-20241022", 34 | "claude-3-5-sonnet-20240620", 35 | "claude-3-opus-20240229", 36 | "claude-3-sonnet-20240229", 37 | "claude-3-haiku-20240307", 38 | ] 39 | # Gemini models. gemini-2.0 doesn't work with toolcall in our last test. 40 | supported_gemini_models = [ 41 | "gemini-2.5-pro", 42 | "gemini-2.5-flash", 43 | ] 44 | 45 | # GROQ models 46 | supported_groq_models = [ 47 | "openai/gpt-oss-120b", 48 | "openai/gpt-oss-20b", 49 | "qwen/qwen3-32b", 50 | "deepseek-r1-distill-llama-70b", 51 | "gemma2-9b-it", 52 | "groq/compound", 53 | "groq/compound-mini", 54 | "llama-3.1-8b-instant", 55 | "llama-3.3-70b-versatile", 56 | "meta-llama/llama-4-maverick-17b-128e-instruct", 57 | "meta-llama/llama-4-scout-17b-16e-instruct", 58 | "meta-llama/llama-guard-4-12b", 59 | "meta-llama/llama-prompt-guard-2-22m", 60 | "meta-llama/llama-prompt-guard-2-86m", 61 | "moonshotai/kimi-k2-instruct-0905", 62 | "whisper-large-v3", 63 | "whisper-large-v3-turbo", 64 | ] 65 | 66 | 67 | 68 | 69 | supported_argo_models = [ 70 | "argo:gpt-3.5-turbo", 71 | "argo:gpt-3.5-turbo-16k", 72 | "argo:gpt-4", 73 | "argo:gpt-4-32k", 74 | "argo:gpt-4-turbo", 75 | "argo:gpt-4o", 76 | "argo:gpt-4o-latest", 77 | "argo:gpt-o1-preview", 78 | "argo:o1-preview", 79 | "argo:gpt-o1-mini", 80 | "argo:o1-mini", 81 | "argo:gpt-o3-mini", 82 | "argo:o3-mini", 83 | "argo:gpt-o1", 84 | "argo:o1", 85 | "argo:gpt-o3", 86 | "argo:o3", 87 | "argo:gpt-o4-mini", 88 | "argo:o4-mini", 89 | "argo:gpt-4.1", 90 | "argo:gpt-4.1-mini", 91 | "argo:gpt-4.1-nano", 92 | ] 93 | 94 | all_supported_models = ( 95 | supported_openai_models 96 | + supported_ollama_models 97 | + supported_alcf_models 98 | + supported_anthropic_models 99 | + supported_argo_models 100 | + supported_gemini_models 101 | + supported_groq_models 102 | ) 103 | -------------------------------------------------------------------------------- /src/chemgraph/models/calculators/psi4_calc.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | 3 | 4 | class Psi4Calc(BaseModel): 5 | """PSI4 quantum chemistry calculator configuration. 6 | 7 | This class defines the configuration parameters for PSI4 quantum chemistry 8 | calculations. It supports various quantum chemical methods, basis sets, and 9 | SCF convergence parameters. 10 | 11 | Parameters 12 | ---------- 13 | calculator_type : str, optional 14 | Type of calculator. Only 'psi4' is supported, by default 'psi4' 15 | method : str, optional 16 | Computational method to be used. Common methods include: 17 | ['hf', 'mp2', 'ccsd', 'ccsd(t)', 'df-mp2', 'b3lyp', 'pbe0', 'm06-2x'], 18 | by default 'b3lyp' 19 | basis : str, optional 20 | Basis set to be used. Common basis sets include: 21 | ['sto-3g', '6-31g', 'cc-pvdz', 'cc-pvtz', 'def2-svp', 'aug-cc-pvdz'], 22 | by default '6-31g' 23 | reference : str, optional 24 | Wavefunction reference type. Options: 'rhf' (default), 'uhf', 'rohf', 25 | by default 'rhf' 26 | scf_type : str, optional 27 | SCF solver type. Options: 'pk' (default), 'df' (Density-Fitted), 28 | 'cd' (Cholesky Decomposition), by default 'pk' 29 | maxiter : int, optional 30 | Maximum number of SCF iterations, by default 50 31 | """ 32 | 33 | calculator_type: str = Field( 34 | default="psi4", description="Type of calculator. Only 'psi4' is supported." 35 | ) 36 | method: str = Field( 37 | default="b3lyp", 38 | description=( 39 | "Computational method to be used. List of common methods: ['hf', 'mp2', 'ccsd', 'ccsd(t)', 'df-mp2', 'b3lyp', 'pbe0', 'm06-2x']" 40 | ), 41 | ) 42 | basis: str = Field( 43 | default="6-31g", 44 | description=( 45 | "Basis set to be used. List of common basis set: ['sto-3g', '6-31g', 'cc-pvdz', 'cc-pvtz', 'def2-svp', 'aug-cc-pvdz'] " 46 | ), 47 | ) 48 | reference: str = Field( 49 | default="rhf", 50 | description="Wavefunction reference type. Options: 'rhf' (default), 'uhf', 'rohf'.", 51 | ) 52 | 53 | scf_type: str = Field( 54 | default="pk", 55 | description="SCF solver type. Options: 'pk' (default), 'df' (Density-Fitted), 'cd' (Cholesky Decomposition).", 56 | ) 57 | 58 | maxiter: int = Field( 59 | default=50, description="Maximum number of SCF iterations. Default is 50." 60 | ) 61 | 62 | def get_calculator(self) -> dict: 63 | """Get a dictionary of PSI4 calculation parameters. 64 | 65 | Constructs and returns a dictionary containing the parameters 66 | for a PSI4 calculation based on the current settings. 67 | 68 | Returns 69 | ------- 70 | dict 71 | A dictionary with PSI4 calculation parameters including method, 72 | basis, reference, SCF type, and maximum iterations 73 | """ 74 | params = { 75 | "method": self.method, 76 | "basis": self.basis, 77 | "reference": self.reference, 78 | "scf_type": self.scf_type, 79 | "maxiter": self.maxiter, 80 | } 81 | return params 82 | -------------------------------------------------------------------------------- /src/chemgraph/tools/anthropic_loader.py: -------------------------------------------------------------------------------- 1 | """Load Anthropic models using LangChain.""" 2 | 3 | import os 4 | from getpass import getpass 5 | from langchain_anthropic import ChatAnthropic 6 | from chemgraph.models.supported_models import supported_anthropic_models 7 | from chemgraph.utils.logging_config import setup_logger 8 | 9 | logger = setup_logger(__name__) 10 | 11 | 12 | def load_anthropic_model( 13 | model_name: str, temperature: float, api_key: str = None, prompt: str = None 14 | ) -> ChatAnthropic: 15 | """ 16 | Load an Anthropic chat model into LangChain. 17 | 18 | Parameters 19 | ---------- 20 | model_name : str 21 | The name of the OpenAI chat model to load. See supported_anthropic_models for list 22 | of supported models. 23 | temperature : float 24 | Controls the randomness of the generated text. A higher temperature results 25 | in more random outputs, while a lower temperature results in more deterministic outputs. 26 | api_key : str, optional 27 | The OpenAI API key. If not provided, the function will attempt to retrieve it 28 | from the environment variable `OPENAI_API_KEY`. 29 | 30 | Returns 31 | ------- 32 | ChatOpenAI 33 | An instance of LangChain's ChatOpenAI model. 34 | 35 | Raises 36 | ------ 37 | ValueError 38 | If the API key is not provided and cannot be retrieved from the environment. 39 | 40 | Notes 41 | ----- 42 | Ensure the model_name provided is one of the supported models. Unsupported models 43 | will result in an exception. 44 | """ 45 | 46 | if api_key is None: 47 | api_key = os.getenv("ANTHROPIC_API_KEY") 48 | if not api_key: 49 | logger.info("Anthropic API key not found in environment variables.") 50 | api_key = getpass("Please enter your Anthropic API key: ") 51 | os.environ["ANTHROPIC_API_KEY"] = api_key 52 | 53 | if model_name not in supported_anthropic_models: 54 | raise ValueError( 55 | f"Unsupported model '{model_name}'. Supported models are: {supported_anthropic_models}." 56 | ) 57 | 58 | try: 59 | logger.info(f"Loading Anthropic model: {model_name}") 60 | llm = ChatAnthropic( 61 | model=model_name, 62 | temperature=temperature, 63 | api_key=api_key, 64 | max_tokens=6000, 65 | ) 66 | # No guarantee that api_key is valid, authentication happens only during invocation 67 | logger.info(f"Requested model: {model_name}") 68 | logger.info("OpenAI model loaded successfully") 69 | return llm 70 | except Exception as e: 71 | # Can remove this since authentication happens only during invocation 72 | if "AuthenticationError" in str(e) or "invalid_api_key" in str(e): 73 | logger.warning("Invalid OpenAI API key.") 74 | api_key = getpass("Please enter a valid OpenAI API key: ") 75 | os.environ["OPENAI_API_KEY"] = api_key 76 | # Retry with new API key 77 | return load_anthropic_model(model_name, temperature, api_key, prompt) 78 | else: 79 | logger.error(f"Error loading OpenAI model: {str(e)}") 80 | raise 81 | -------------------------------------------------------------------------------- /src/chemgraph/models/calculators/fairchem_calc.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | 3 | from typing import Optional, Union, Dict 4 | from pathlib import Path 5 | import torch 6 | import logging 7 | 8 | try: 9 | from fairchem.core import FAIRChemCalculator 10 | from fairchem.core.units.mlip_unit.mlip_unit import MLIPPredictUnit 11 | from fairchem.core.units.mlip_unit.api.inference import UMATask 12 | 13 | 14 | except ImportError: 15 | logging.warning("fairchem is not installed. .") 16 | 17 | 18 | from fairchem.core import pretrained_mlip, FAIRChemCalculator 19 | 20 | 21 | class FAIRChemCalc(BaseModel): 22 | """FAIRChem calculator configuration for ASE integration. 23 | 24 | Parameters 25 | ---------- 26 | task_name : str, optional 27 | Task name (omol', 'omat', 'oc20', 'odac', or 'omc) for the prediction head. 28 | Must match available tasks in the model. 29 | seed : int, optional 30 | Seed for model reproducibility. Default is 42. 31 | spin : int, optional 32 | Spin multiplicity. Default is 1. 33 | charge : int, optional 34 | System charge. Default is 0. 35 | model_name: str 36 | Inference model name. Default is uma-s-1p1. 37 | device : str, optional 38 | Device to run inference on. Default is 'cuda' if available, otherwise 'cpu'. 39 | 40 | """ 41 | 42 | calculator_type: str = Field( 43 | default="FAIRChem", description="Calculator identifier. Must be 'FAIRChem'." 44 | ) 45 | task_name: Optional[str] = Field( 46 | default=None, 47 | description="Prediction task. Options are 'omol', 'omat', 'oc20', 'odac', or 'omc", 48 | ) 49 | seed: int = Field(default=42, description="Random seed for inference reproducibility.") 50 | spin: Optional[int] = Field(default=1, description="Total spin multiplicity of the system.") 51 | charge: Optional[int] = Field(default=0, description="Total system charge.") 52 | model_name: str = Field( 53 | default="uma-s-1p1", description="Model names. Options are 'uma-s-1p1' and 'uma-m-1'" 54 | ) 55 | device: str = Field( 56 | default="cuda" if torch.cuda.is_available() else "cpu", 57 | description="Computation device to use, either 'cpu' or 'cuda'.", 58 | ) 59 | inference_settings: str = Field( 60 | default="default", description="Settings for inference. Can be 'default' or 'turbo'" 61 | ) 62 | 63 | def get_calculator(self) -> FAIRChemCalculator: 64 | """Return a configured FAIRChemCalculator. 65 | 66 | Parameters 67 | ---------- 68 | predict_unit : MLIPPredictUnit 69 | Pre-loaded MLIP model. 70 | 71 | Returns 72 | ------- 73 | FAIRChemCalculator 74 | ASE-compatible calculator instance. 75 | """ 76 | 77 | predict_unit = pretrained_mlip.get_predict_unit( 78 | model_name=self.model_name, 79 | inference_settings=self.inference_settings, 80 | device=self.device, 81 | ) 82 | return FAIRChemCalculator( 83 | predict_unit=predict_unit, 84 | task_name=self.task_name, 85 | seed=self.seed, 86 | ) 87 | 88 | def get_atoms_properties(self) -> Dict[str, Optional[int]]: 89 | """Return atom-level info keys to inject into atoms.info.""" 90 | return { 91 | "spin": self.spin, 92 | "charge": self.charge, 93 | } 94 | -------------------------------------------------------------------------------- /src/chemgraph/graphs/mock_agent.py: -------------------------------------------------------------------------------- 1 | from langgraph.graph import StateGraph, START, END 2 | from langchain_openai import ChatOpenAI 3 | from langgraph.checkpoint.memory import MemorySaver 4 | from chemgraph.tools.ase_tools import ( 5 | run_ase, 6 | save_atomsdata_to_file, 7 | file_to_atomsdata, 8 | ) 9 | from chemgraph.tools.cheminformatics_tools import ( 10 | molecule_name_to_smiles, 11 | smiles_to_atomsdata, 12 | ) 13 | from chemgraph.tools.generic_tools import calculator 14 | from chemgraph.prompt.single_agent_prompt import ( 15 | single_agent_prompt, 16 | ) 17 | from chemgraph.utils.logging_config import setup_logger 18 | from chemgraph.state.state import State 19 | 20 | logger = setup_logger(__name__) 21 | 22 | 23 | def ChemGraphAgent(state: State, llm: ChatOpenAI, system_prompt: str, tools=None): 24 | """LLM node that processes messages and decides next actions. 25 | 26 | Parameters 27 | ---------- 28 | state : State 29 | The current state containing messages and remaining steps 30 | llm : ChatOpenAI 31 | The language model to use for processing 32 | system_prompt : str 33 | The system prompt to guide the LLM's behavior 34 | tools : list, optional 35 | List of tools available to the agent, by default None 36 | 37 | Returns 38 | ------- 39 | dict 40 | Updated state containing the LLM's response 41 | """ 42 | 43 | # Load default tools if no tool is specified. 44 | if tools is None: 45 | tools = [ 46 | file_to_atomsdata, 47 | smiles_to_atomsdata, 48 | run_ase, 49 | molecule_name_to_smiles, 50 | save_atomsdata_to_file, 51 | calculator, 52 | ] 53 | messages = [ 54 | {"role": "system", "content": system_prompt}, 55 | {"role": "user", "content": f"{state['messages']}"}, 56 | ] 57 | llm_with_tools = llm.bind_tools(tools=tools) 58 | return {"messages": [llm_with_tools.invoke(messages)]} 59 | 60 | def construct_mock_agent_graph( 61 | llm: ChatOpenAI, 62 | system_prompt: str = single_agent_prompt, 63 | tools: list = None, 64 | ): 65 | """Construct a geometry optimization graph. 66 | 67 | Parameters 68 | ---------- 69 | llm : ChatOpenAI 70 | The language model to use for the graph 71 | system_prompt : str, optional 72 | The system prompt to guide the LLM's behavior, by default single_agent_prompt 73 | tools: list, optional 74 | The list of tools for the main agent, by default None 75 | Returns 76 | ------- 77 | StateGraph 78 | The constructed single agent graph 79 | """ 80 | logger.info("Constructing mock agent graph") 81 | checkpointer = MemorySaver() 82 | if tools is None: 83 | tools = [ 84 | file_to_atomsdata, 85 | smiles_to_atomsdata, 86 | run_ase, 87 | molecule_name_to_smiles, 88 | save_atomsdata_to_file, 89 | calculator, 90 | ] 91 | graph_builder = StateGraph(State) 92 | 93 | graph_builder.add_node( 94 | "ChemGraphAgent", 95 | lambda state: ChemGraphAgent(state, llm, system_prompt=system_prompt, tools=tools), 96 | ) 97 | graph_builder.add_edge(START, "ChemGraphAgent") 98 | graph_builder.add_edge("ChemGraphAgent", END) 99 | 100 | graph = graph_builder.compile(checkpointer=checkpointer) 101 | logger.info("Mock agent graph construction completed") 102 | return graph 103 | -------------------------------------------------------------------------------- /src/chemgraph/models/calculators/nwchem_calc.py: -------------------------------------------------------------------------------- 1 | # Main keywords and parameters obtained from https://wiki.fysik.dtu.dk/ase/_modules/ase/calculators/nwchem.html 2 | # Parameters for NWChem calculator in CompChemAgent 3 | 4 | from typing import Optional, Union, Dict 5 | from pydantic import BaseModel, Field 6 | from ase.calculators.nwchem import NWChem 7 | 8 | 9 | class NWChemCalc(BaseModel): 10 | """NWChem quantum chemistry calculator configuration. 11 | 12 | This class defines the configuration parameters for NWChem quantum chemistry 13 | calculations. It supports various quantum chemical methods, basis sets, and 14 | periodic calculations through the NWChem program. 15 | 16 | Parameters 17 | ---------- 18 | calculator_type : str, optional 19 | Calculator type. Currently supports only 'nwchem', by default 'nwchem' 20 | theory : str, optional 21 | NWChem module to be used. Options: 'dft', 'scf', 'mp2', 'ccsd', 'tce', 22 | 'tddft', 'pspw', 'band', 'paw', by default 'dft' 23 | xc : str, optional 24 | Exchange-correlation functional (only applicable for DFT calculations), 25 | by default 'PBE' 26 | basis : str or dict, optional 27 | Basis set to use. Can be a string for all elements or a dictionary 28 | mapping elements to basis sets, by default '6-31G' 29 | kpts : tuple or dict, optional 30 | K-point mesh for periodic calculations, by default None 31 | directory : str, optional 32 | Working directory for NWChem calculations, by default '.' 33 | command : str, optional 34 | Command to execute NWChem (e.g., 'nwchem PREFIX.nwi > PREFIX.nwo'), 35 | by default None 36 | """ 37 | 38 | calculator_type: str = Field( 39 | default="nwchem", 40 | description="Calculator type. Currently supports only 'nwchem'.", 41 | ) 42 | theory: Optional[str] = Field( 43 | default="dft", 44 | description="NWChem module to be used. Options: 'dft', 'scf', 'mp2', 'ccsd', 'tce', 'tddft', 'pspw', 'band', 'paw'.", 45 | ) 46 | xc: Optional[str] = Field( 47 | default="PBE", 48 | description="Exchange-correlation functional (only applicable for DFT calculations).", 49 | ) 50 | basis: Optional[Union[str, Dict[str, str]]] = Field( 51 | default="6-31G", 52 | description="Basis set to use. Can be a string for all elements or a dictionary mapping elements to basis sets.", 53 | ) 54 | kpts: Optional[Union[tuple, Dict[str, Union[int, str]]]] = Field( 55 | default=None, description="K-point mesh for periodic calculations." 56 | ) 57 | directory: str = Field( 58 | default=".", description="Working directory for NWChem calculations." 59 | ) 60 | command: Optional[str] = Field( 61 | default=None, 62 | description="Command to execute NWChem (e.g., 'nwchem PREFIX.nwi > PREFIX.nwo').", 63 | ) 64 | 65 | def get_calculator(self): 66 | """Get an ASE-compatible NWChem calculator instance. 67 | 68 | Returns 69 | ------- 70 | NWChem 71 | An ASE-compatible NWChem calculator instance 72 | 73 | Raises 74 | ------ 75 | ValueError 76 | If an invalid calculator_type is specified 77 | """ 78 | if self.calculator_type != "nwchem": 79 | raise ValueError( 80 | "Invalid calculator_type. The only valid option is 'nwchem'." 81 | ) 82 | 83 | return NWChem( 84 | theory=self.theory, 85 | xc=self.xc, 86 | basis=self.basis, 87 | kpts=self.kpts, 88 | directory=self.directory, 89 | command=self.command, 90 | ) 91 | -------------------------------------------------------------------------------- /src/chemgraph/tools/groq_loader.py: -------------------------------------------------------------------------------- 1 | """Load GROQ models using LangChain.""" 2 | 3 | import os 4 | from getpass import getpass 5 | from langchain_groq import ChatGroq 6 | from chemgraph.models.supported_models import supported_groq_models 7 | from chemgraph.utils.logging_config import setup_logger 8 | 9 | logger = setup_logger(__name__) 10 | 11 | 12 | def load_groq_model( 13 | model_name: str, 14 | temperature: float, 15 | api_key: str = None, 16 | prompt: str = None, 17 | base_url: str = None, 18 | ) -> ChatGroq: 19 | """Load a GROQ chat model into LangChain. 20 | This function loads a GROQ model and configures it for use with LangChain. 21 | It handles API key management, including prompting for the key if not provided 22 | or if the provided key is invalid. 23 | Parameters 24 | ---------- 25 | model_name : str 26 | The name of the GROQ chat model to load. See supported_groq_models for list 27 | of supported models. 28 | temperature : float 29 | Controls the randomness of the generated text. Higher values (e.g., 0.8) 30 | make the output more random, while lower values (e.g., 0.2) make it more 31 | deterministic. 32 | api_key : str, optional 33 | The GROQ API key. If not provided, the function will attempt to retrieve it 34 | from the environment variable `GROQ_API_KEY`. 35 | prompt : str, optional 36 | Custom prompt to use when requesting the API key from the user. 37 | base_url : str, optional 38 | Custom base URL for the GROQ API (currently unused but included for consistency). 39 | Returns 40 | ------- 41 | ChatGroq 42 | An instance of LangChain's ChatGroq model. 43 | Raises 44 | ------ 45 | ValueError 46 | If the model name is not in the list of supported models. 47 | Exception 48 | If there is an error loading the model or if the API key is invalid. 49 | Notes 50 | ----- 51 | The function will: 52 | 1. Check for the API key in the environment variables 53 | 2. Prompt for the key if not found 54 | 3. Validate the model name against supported models 55 | 4. Attempt to load the model 56 | 5. Handle any authentication errors by prompting for a new key 57 | """ 58 | 59 | if api_key is None: 60 | api_key = os.getenv("GROQ_API_KEY") 61 | if not api_key: 62 | logger.info("GROQ API key not found in environment variables.") 63 | api_key = getpass("Please enter your GROQ API key: ") 64 | os.environ["GROQ_API_KEY"] = api_key 65 | 66 | if model_name not in supported_groq_models: 67 | raise ValueError( 68 | f"Unsupported model '{model_name}'. Supported models are: {supported_groq_models}." 69 | ) 70 | 71 | try: 72 | logger.info(f"Loading GROQ model: {model_name}") 73 | llm = ChatGroq( 74 | model=model_name, 75 | temperature=temperature, 76 | api_key=api_key, 77 | max_tokens=6000, 78 | ) 79 | # No guarantee that api_key is valid, authentication happens only during invocation 80 | logger.info(f"Requested model: {model_name}") 81 | logger.info("GROQ model loaded successfully") 82 | return llm 83 | except Exception as e: 84 | # Can remove this since authentication happens only during invocation 85 | if "AuthenticationError" in str(e) or "invalid_api_key" in str(e): 86 | logger.warning("Invalid GROQ API key.") 87 | api_key = getpass("Please enter a valid GROQ API key: ") 88 | os.environ["GROQ_API_KEY"] = api_key 89 | # Retry with new API key 90 | return load_groq_model(model_name, temperature, api_key, prompt) 91 | else: 92 | logger.error(f"Error loading GROQ model: {str(e)}") 93 | raise 94 | 95 | -------------------------------------------------------------------------------- /src/chemgraph/tools/gemini_loader.py: -------------------------------------------------------------------------------- 1 | """Load Gemini models using LangChain.""" 2 | 3 | import os 4 | from getpass import getpass 5 | from langchain_google_genai import ChatGoogleGenerativeAI 6 | from chemgraph.models.supported_models import supported_gemini_models 7 | from chemgraph.utils.logging_config import setup_logger 8 | 9 | logger = setup_logger(__name__) 10 | 11 | 12 | def load_gemini_model( 13 | model_name: str, 14 | temperature: float, 15 | api_key: str = None, 16 | prompt: str = None, 17 | base_url: str = None, 18 | ) -> ChatGoogleGenerativeAI: 19 | """Load an Gemini chat model into LangChain. 20 | 21 | This function loads an Gemini model and configures it for use with LangChain. 22 | It handles API key management, including prompting for the key if not provided 23 | or if the provided key is invalid. 24 | 25 | Parameters 26 | ---------- 27 | model_name : str 28 | The name of the Gemini chat model to load. See supported_gemini_models for list 29 | of supported models. 30 | temperature : float 31 | Controls the randomness of the generated text. Higher values (e.g., 0.8) 32 | make the output more random, while lower values (e.g., 0.2) make it more 33 | deterministic. 34 | api_key : str, optional 35 | The Google API key. If not provided, the function will attempt to retrieve it 36 | from the environment variable `GEMINI_API_KEY`. 37 | prompt : str, optional 38 | Custom prompt to use when requesting the API key from the user. 39 | 40 | Returns 41 | ------- 42 | ChatGoogleGenerativeAI 43 | An instance of LangChain's ChatGoogleGenerativeAI model. 44 | 45 | Raises 46 | ------ 47 | ValueError 48 | If the model name is not in the list of supported models. 49 | Exception 50 | If there is an error loading the model or if the API key is invalid. 51 | 52 | Notes 53 | ----- 54 | The function will: 55 | 1. Check for the API key in the environment variables 56 | 2. Prompt for the key if not found 57 | 3. Validate the model name against supported models 58 | 4. Attempt to load the model 59 | 5. Handle any authentication errors by prompting for a new key 60 | """ 61 | 62 | if api_key is None: 63 | api_key = os.getenv("GEMINI_API_KEY") 64 | if not api_key: 65 | logger.info("Google API key not found in environment variables.") 66 | api_key = getpass("Please enter your Google API key: ") 67 | os.environ["GEMINI_API_KEY"] = api_key 68 | 69 | if model_name not in supported_gemini_models: 70 | raise ValueError( 71 | f"Unsupported model '{model_name}'. Supported models are: {supported_gemini_models}." 72 | ) 73 | 74 | try: 75 | logger.info(f"Loading Gemini model: {model_name}") 76 | llm = ChatGoogleGenerativeAI( 77 | model=model_name, 78 | temperature=temperature, 79 | api_key=api_key, 80 | max_output_tokens=6000, 81 | ) 82 | # No guarantee that api_key is valid, authentication happens only during invocation 83 | logger.info(f"Requested model: {model_name}") 84 | logger.info("Gemini model loaded successfully") 85 | return llm 86 | except Exception as e: 87 | # Can remove this since authentication happens only during invocation 88 | if "AuthenticationError" in str(e) or "invalid_api_key" in str(e): 89 | logger.warning("Invalid Google API key.") 90 | api_key = getpass("Please enter a valid Google API key: ") 91 | os.environ["GEMINI_API_KEY"] = api_key 92 | # Retry with new API key 93 | return load_gemini_model(model_name, temperature, api_key, prompt) 94 | else: 95 | logger.error(f"Error loading Google model: {str(e)}") 96 | raise 97 | -------------------------------------------------------------------------------- /scripts/evaluations/run_llm_workflow/Exp12_from_reaction_to_enthalpy/run_llm_workflow.py: -------------------------------------------------------------------------------- 1 | import json 2 | from chemgraph.agent.llm_agent import ChemGraph 3 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state 4 | import argparse 5 | import datetime 6 | 7 | 8 | def get_query( 9 | reaction: dict, 10 | query_name: str = "enthalpy", 11 | temperature: float = 298.15, 12 | pressure: float = 101325, 13 | method: str = "mace_mp", 14 | ) -> str: 15 | """Get query for LLM. 16 | 17 | Returns: 18 | _type_: _description_ 19 | """ 20 | reactants_str = " + ".join([f"{r['coefficient']} ({r['name']})" for r in reaction["reactants"]]) 21 | products_str = " + ".join([f"{p['coefficient']} ({p['name']})" for p in reaction["products"]]) 22 | 23 | reaction_equation = f"{reactants_str} -> {products_str}" 24 | query_dict = { 25 | "enthalpy": f"Calculate the reaction enthalpy for this reaction: {reaction_equation}", 26 | "enthalpy_method": f"You are given a chemical reaction: {reaction_equation}. Calculate the enthalpy for this reaction using {method} at {temperature}K.", 27 | "gibbs_free_energy": f"What is the Gibbs free energy of reaction for {reaction_equation}?", 28 | "gibbs_free_energy_method": f"What is the Gibbs free energy of reaction for {reaction_equation} using {method}?", 29 | "gibbs_free_energy_method_temperature": f"What is the Gibbs free energy of reaction for {reaction_equation} using {method} at {temperature}K?", 30 | } 31 | 32 | return query_dict.get(query_name, "Query not found") # Returns the query or a default message 33 | 34 | 35 | def main(n_reactions: int): 36 | """ """ 37 | # Load SMILES data from the specified JSON file 38 | combined_data = {} 39 | 40 | cca = ChemGraph( 41 | model_name='gpt-4o-mini', 42 | workflow_type="single_agent", 43 | structured_output=True, 44 | return_option="state", 45 | ) 46 | with open("reaction_dataset.json", "r") as rf: 47 | reactions = json.load(rf) 48 | 49 | # Iterate through the first n_structures molecules 50 | for idx, reaction in enumerate(reactions[:n_reactions]): 51 | print("********************************************") 52 | print( 53 | f"REACTION INDEX {reaction['reaction_index']}: REACTION NAME: {reaction['reaction_name']}" 54 | ) 55 | print("********************************************") 56 | 57 | name = reaction["reaction_name"] 58 | 59 | query = get_query( 60 | reaction, query_name="enthalpy_method", method="GFN2-xTB", temperature=400 61 | ) 62 | 63 | try: 64 | state = cca.run(query, config={"configurable": {"thread_id": f"{str(idx)}"}}) 65 | except Exception as e: 66 | print(e) 67 | 68 | llm_workflow = get_workflow_from_state(state) 69 | 70 | # Store results in a structured dictionary 71 | state_data = cca.write_state(config={"configurable": {"thread_id": f"{str(idx)}"}}) 72 | 73 | combined_data[name] = {"llm_workflow": llm_workflow} 74 | combined_data[name]["metadata"] = state_data 75 | 76 | # Save the results to a JSON file 77 | timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 78 | filename = f"llm_workflow_{timestamp}.json" 79 | 80 | # Save the results to a JSON file 81 | with open(filename, "w") as f: 82 | json.dump(combined_data, f, indent=4) 83 | 84 | 85 | if __name__ == "__main__": 86 | # Parse command-line arguments 87 | parser = argparse.ArgumentParser(description="Calculate properties of a reaction.") 88 | parser.add_argument( 89 | "--n_reactions", type=int, default=10, help="Number of molecules to process (default: 10)" 90 | ) 91 | args = parser.parse_args() 92 | 93 | # Call the main function with parsed arguments 94 | main(args.n_reactions) 95 | -------------------------------------------------------------------------------- /scripts/evaluations/run_llm_workflow/Exp14_from_reaction_to_enthalpy_multiagent/run_llm_workflow.py: -------------------------------------------------------------------------------- 1 | import json 2 | from chemgraph.agent.llm_agent import ChemGraph 3 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state 4 | import argparse 5 | import datetime 6 | 7 | 8 | def get_query( 9 | reaction: dict, 10 | query_name: str = "enthalpy", 11 | temperature: float = 298, 12 | pressure: float = 101325, 13 | method: str = "mace_mp", 14 | ) -> str: 15 | """Get query for LLM. 16 | 17 | Returns: 18 | _type_: _description_ 19 | """ 20 | reactants_str = " + ".join([f"{r['coefficient']} ({r['name']})" for r in reaction["reactants"]]) 21 | products_str = " + ".join([f"{p['coefficient']} ({p['name']})" for p in reaction["products"]]) 22 | 23 | reaction_equation = f"{reactants_str} -> {products_str}" 24 | query_dict = { 25 | "enthalpy": f"Calculate the reaction enthalpy for this reaction: {reaction_equation}", 26 | "enthalpy_method": f"You are given a chemical reaction: {reaction_equation}. Calculate the enthalpy for this reaction using {method} at {temperature}K.", 27 | "gibbs_free_energy": f"What is the Gibbs free energy of reaction for {reaction_equation}?", 28 | "gibbs_free_energy_method": f"What is the Gibbs free energy of reaction for {reaction_equation} using {method}?", 29 | "gibbs_free_energy_method_temperature": f"What is the Gibbs free energy of reaction for {reaction_equation} using {method} at {temperature}K?", 30 | } 31 | 32 | return query_dict.get(query_name, "Query not found") # Returns the query or a default message 33 | 34 | 35 | def main(n_reactions: int): 36 | """ """ 37 | # Load SMILES data from the specified JSON file 38 | combined_data = {} 39 | 40 | cca = ChemGraph( 41 | model_name='gpt-4o-mini', 42 | workflow_type="multi_agent", 43 | structured_output=True, 44 | return_option="state", 45 | ) 46 | with open("reaction_dataset.json", "r") as rf: 47 | reactions = json.load(rf) 48 | 49 | # Iterate through the first n_structures molecules 50 | for idx, reaction in enumerate(reactions[:n_reactions]): 51 | print("********************************************") 52 | print( 53 | f"REACTION INDEX {reaction['reaction_index']}: REACTION NAME: {reaction['reaction_name']}" 54 | ) 55 | print("********************************************") 56 | 57 | name = reaction["reaction_name"] 58 | 59 | query = get_query( 60 | reaction, query_name="enthalpy_method", method="GFN2-xTB", temperature=400 61 | ) 62 | 63 | try: 64 | state = cca.run(query, config={"configurable": {"thread_id": f"{str(idx)}"}}) 65 | except Exception as e: 66 | print(e) 67 | 68 | llm_workflow = get_workflow_from_state(state) 69 | 70 | # Store results in a structured dictionary 71 | state_data = cca.write_state(config={"configurable": {"thread_id": f"{str(idx)}"}}) 72 | 73 | combined_data[name] = {"llm_workflow": llm_workflow} 74 | combined_data[name]["metadata"] = state_data 75 | 76 | # Save the results to a JSON file 77 | timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 78 | filename = f"llm_workflow_{timestamp}.json" 79 | 80 | # Save the results to a JSON file 81 | with open(filename, "w") as f: 82 | json.dump(combined_data, f, indent=4) 83 | 84 | 85 | if __name__ == "__main__": 86 | # Parse command-line arguments 87 | parser = argparse.ArgumentParser(description="Calculate properties of a reaction.") 88 | parser.add_argument( 89 | "--n_reactions", type=int, default=10, help="Number of molecules to process (default: 10)" 90 | ) 91 | args = parser.parse_args() 92 | 93 | # Call the main function with parsed arguments 94 | main(args.n_reactions) 95 | -------------------------------------------------------------------------------- /scripts/evaluations/run_llm_workflow/Exp13_from_reaction_to_gibbs/run_llm_workflow.py: -------------------------------------------------------------------------------- 1 | import json 2 | from chemgraph.agent.llm_agent import ChemGraph 3 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state 4 | import argparse 5 | import datetime 6 | 7 | 8 | def get_query( 9 | reaction: dict, 10 | query_name: str = "enthalpy", 11 | temperature: float = 298, 12 | pressure: float = 101325, 13 | method: str = "mace_mp", 14 | ) -> str: 15 | """Get query for LLM. 16 | 17 | Returns: 18 | _type_: _description_ 19 | """ 20 | reactants_str = " + ".join([f"{r['coefficient']} ({r['name']})" for r in reaction["reactants"]]) 21 | products_str = " + ".join([f"{p['coefficient']} ({p['name']})" for p in reaction["products"]]) 22 | 23 | reaction_equation = f"{reactants_str} -> {products_str}" 24 | query_dict = { 25 | "enthalpy": f"Calculate the reaction enthalpy for this reaction: {reaction_equation}", 26 | "enthalpy_method": f"You are given a chemical reaction: {reaction_equation}. Calculate the enthalpy for this reaction using {method}.", 27 | "gibbs_free_energy": f"What is the Gibbs free energy of reaction for {reaction_equation}?", 28 | "gibbs_free_energy_method": f"What is the Gibbs free energy of reaction for {reaction_equation} using {method}?", 29 | "gibbs_free_energy_method_temperature": f"You are given a chemical reaction: {reaction_equation}. Calculate the Gibbs free energy change (ΔG) for this reaction using {method} at {temperature}K.", 30 | } 31 | 32 | return query_dict.get(query_name, "Query not found") # Returns the query or a default message 33 | 34 | 35 | def main(n_reactions: int): 36 | """ """ 37 | # Load SMILES data from the specified JSON file 38 | combined_data = {} 39 | 40 | cca = ChemGraph( 41 | model_name='gpt-4o-mini', 42 | workflow_type="single_agent", 43 | structured_output=True, 44 | return_option="state", 45 | ) 46 | 47 | with open("reaction_dataset.json", "r") as rf: 48 | reactions = json.load(rf) 49 | # Iterate through the first n_structures molecules 50 | for idx, reaction in enumerate(reactions[:n_reactions]): 51 | print("********************************************") 52 | print( 53 | f"REACTION INDEX {reaction['reaction_index']}: REACTION NAME: {reaction['reaction_name']}" 54 | ) 55 | print("********************************************") 56 | 57 | name = reaction["reaction_name"] 58 | 59 | query = get_query( 60 | reaction, 61 | query_name="gibbs_free_energy_method_temperature", 62 | method="mace_mp", 63 | temperature=500, 64 | ) 65 | state = cca.run(query, config={"configurable": {"thread_id": f"{str(idx)}"}}) 66 | 67 | llm_workflow = get_workflow_from_state(state) 68 | 69 | # Store results in a structured dictionary 70 | state_data = cca.write_state(config={"configurable": {"thread_id": f"{str(idx)}"}}) 71 | 72 | combined_data[name] = {"llm_workflow": llm_workflow} 73 | combined_data[name]["metadata"] = state_data 74 | 75 | # Save the results to a JSON file 76 | timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 77 | filename = f"llm_workflow_{timestamp}.json" 78 | 79 | # Save the results to a JSON file 80 | with open(filename, "w") as f: 81 | json.dump(combined_data, f, indent=4) 82 | 83 | 84 | if __name__ == "__main__": 85 | # Parse command-line arguments 86 | parser = argparse.ArgumentParser(description="Calculate properties of a reaction.") 87 | parser.add_argument( 88 | "--n_reactions", type=int, default=10, help="Number of molecules to process (default: 10)" 89 | ) 90 | args = parser.parse_args() 91 | 92 | # Call the main function with parsed arguments 93 | main(args.n_reactions) 94 | -------------------------------------------------------------------------------- /notebooks/cif_files/calf-20_pacmof.cif: -------------------------------------------------------------------------------- 1 | data_image0 2 | _cell_length_a 8.9138 3 | _cell_length_b 9.6935 4 | _cell_length_c 9.4836 5 | _cell_angle_alpha 90 6 | _cell_angle_beta 115.895 7 | _cell_angle_gamma 90 8 | 9 | _symmetry_space_group_name_H-M "P 1" 10 | _symmetry_int_tables_number 1 11 | 12 | loop_ 13 | _symmetry_equiv_pos_as_xyz 14 | 'x, y, z' 15 | 16 | loop_ 17 | _atom_site_label 18 | _atom_site_occupancy 19 | _atom_site_fract_x 20 | _atom_site_fract_y 21 | _atom_site_fract_z 22 | _atom_site_thermal_displace_type 23 | _atom_site_B_iso_or_equiv 24 | _atom_site_type_symbol 25 | _atom_site_charge 26 | Zn1 1.0000 0.17588 0.05771 0.43679 Biso 1.000 Zn 0.734237 27 | Zn2 1.0000 0.82412 0.55771 0.06321 Biso 1.000 Zn 0.734237 28 | Zn3 1.0000 0.82412 0.94229 0.56321 Biso 1.000 Zn 0.734237 29 | Zn4 1.0000 0.17588 0.44229 0.93679 Biso 1.000 Zn 0.734237 30 | N1 1.0000 0.03080 0.88920 0.36830 Biso 1.000 N -0.225444 31 | N2 1.0000 0.96920 0.38920 0.13170 Biso 1.000 N -0.225444 32 | N3 1.0000 0.96920 0.11080 0.63170 Biso 1.000 N -0.225444 33 | N4 1.0000 0.03080 0.61080 0.86830 Biso 1.000 N -0.225444 34 | N5 1.0000 0.90780 0.85250 0.41000 Biso 1.000 N -0.218757 35 | N6 1.0000 0.09220 0.35250 0.09000 Biso 1.000 N -0.218757 36 | N7 1.0000 0.09220 0.14750 0.59000 Biso 1.000 N -0.218757 37 | N8 1.0000 0.90780 0.64750 0.91000 Biso 1.000 N -0.218757 38 | N9 1.0000 0.90080 0.70860 0.22590 Biso 1.000 N -0.303066 39 | N10 1.0000 0.09920 0.20860 0.27410 Biso 1.000 N -0.303066 40 | N11 1.0000 0.09920 0.29140 0.77410 Biso 1.000 N -0.303066 41 | N12 1.0000 0.90080 0.79140 0.72590 Biso 1.000 N -0.303066 42 | O1 1.0000 0.40980 0.07610 0.61020 Biso 1.000 O -0.532130 43 | O2 1.0000 0.59020 0.57610 0.88980 Biso 1.000 O -0.532130 44 | O3 1.0000 0.59020 0.92390 0.38980 Biso 1.000 O -0.532130 45 | O4 1.0000 0.40980 0.42390 0.11020 Biso 1.000 O -0.532130 46 | O5 1.0000 0.67530 0.03070 0.67320 Biso 1.000 O -0.530638 47 | O6 1.0000 0.32470 0.53070 0.82680 Biso 1.000 O -0.530638 48 | O7 1.0000 0.32470 0.96930 0.32680 Biso 1.000 O -0.530638 49 | O8 1.0000 0.67530 0.46930 0.17320 Biso 1.000 O -0.530638 50 | C1 1.0000 0.02150 0.80170 0.25880 Biso 1.000 C 0.170903 51 | C2 1.0000 0.97850 0.30170 0.24120 Biso 1.000 C 0.170903 52 | C3 1.0000 0.97850 0.19830 0.74120 Biso 1.000 C 0.170903 53 | C4 1.0000 0.02150 0.69830 0.75880 Biso 1.000 C 0.170903 54 | H1 1.0000 0.09320 0.80450 0.20860 Biso 1.000 H 0.108210 55 | H2 1.0000 0.90680 0.30450 0.29140 Biso 1.000 H 0.108210 56 | H3 1.0000 0.90680 0.19550 0.79140 Biso 1.000 H 0.108210 57 | H4 1.0000 0.09320 0.69550 0.70860 Biso 1.000 H 0.108210 58 | C5 1.0000 0.83450 0.74460 0.32320 Biso 1.000 C 0.172635 59 | C6 1.0000 0.16550 0.24460 0.17680 Biso 1.000 C 0.172635 60 | C7 1.0000 0.16550 0.25540 0.67680 Biso 1.000 C 0.172635 61 | C8 1.0000 0.83450 0.75540 0.82320 Biso 1.000 C 0.172635 62 | H5 1.0000 0.74410 0.69710 0.32890 Biso 1.000 H 0.108121 63 | H6 1.0000 0.25590 0.19710 0.17110 Biso 1.000 H 0.108121 64 | H7 1.0000 0.25590 0.30290 0.67110 Biso 1.000 H 0.108121 65 | H8 1.0000 0.74410 0.80290 0.82890 Biso 1.000 H 0.108121 66 | C9 1.0000 0.52480 0.03080 0.58150 Biso 1.000 C 0.515930 67 | C10 1.0000 0.47520 0.53080 0.91850 Biso 1.000 C 0.515930 68 | C11 1.0000 0.47520 0.96920 0.41850 Biso 1.000 C 0.515930 69 | C12 1.0000 0.52480 0.46920 0.08150 Biso 1.000 C 0.515930 70 | -------------------------------------------------------------------------------- /scripts/evaluations/run_llm_workflow/Exp15_from_reaction_to_gibbs_multi_agent/run_llm_workflow.py: -------------------------------------------------------------------------------- 1 | import json 2 | from chemgraph.agent.llm_agent import ChemGraph 3 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state 4 | import argparse 5 | import datetime 6 | 7 | 8 | def get_query( 9 | reaction: dict, 10 | query_name: str = "enthalpy", 11 | temperature: float = 298, 12 | pressure: float = 101325, 13 | method: str = "mace_mp", 14 | ) -> str: 15 | """Get query for LLM. 16 | 17 | Returns: 18 | _type_: _description_ 19 | """ 20 | reactants_str = " + ".join([f"{r['coefficient']} ({r['name']})" for r in reaction["reactants"]]) 21 | products_str = " + ".join([f"{p['coefficient']} ({p['name']})" for p in reaction["products"]]) 22 | 23 | reaction_equation = f"{reactants_str} -> {products_str}" 24 | query_dict = { 25 | "enthalpy": f"Calculate the reaction enthalpy for this reaction: {reaction_equation}", 26 | "enthalpy_method": f"You are given a chemical reaction: {reaction_equation}. Calculate the enthalpy for this reaction using {method} at {temperature}K.", 27 | "gibbs_free_energy": f"What is the Gibbs free energy of reaction for {reaction_equation}?", 28 | "gibbs_free_energy_method": f"What is the Gibbs free energy of reaction for {reaction_equation} using {method}?", 29 | "gibbs_free_energy_method_temperature": f"You are given a chemical reaction: {reaction_equation}. Calculate the Gibbs free energy change for this reaction using {method} at {temperature}K.", 30 | } 31 | 32 | return query_dict.get(query_name, "Query not found") # Returns the query or a default message 33 | 34 | 35 | def main(n_reactions: int): 36 | """ """ 37 | # Load SMILES data from the specified JSON file 38 | combined_data = {} 39 | 40 | cca = ChemGraph( 41 | model_name='gpt-4o-mini', 42 | workflow_type="manager_worker", 43 | structured_output=True, 44 | return_option="state", 45 | ) 46 | with open("reaction_dataset.json", "r") as rf: 47 | reactions = json.load(rf) 48 | 49 | # Iterate through the first n_structures molecules 50 | for idx, reaction in enumerate(reactions[:n_reactions]): 51 | print("********************************************") 52 | print( 53 | f"REACTION INDEX {reaction['reaction_index']}: REACTION NAME: {reaction['reaction_name']}" 54 | ) 55 | print("********************************************") 56 | 57 | name = reaction["reaction_name"] 58 | 59 | query = get_query( 60 | reaction, 61 | query_name="gibbs_free_energy_method_temperature", 62 | method="mace_mp", 63 | temperature=500, 64 | ) 65 | 66 | try: 67 | state = cca.run(query, config={"configurable": {"thread_id": f"{str(idx)}"}}) 68 | except Exception as e: 69 | print(e) 70 | 71 | llm_workflow = get_workflow_from_state(state) 72 | 73 | # Store results in a structured dictionary 74 | state_data = cca.write_state(config={"configurable": {"thread_id": f"{str(idx)}"}}) 75 | 76 | combined_data[name] = {"llm_workflow": llm_workflow} 77 | combined_data[name]["metadata"] = state_data 78 | 79 | # Save the results to a JSON file 80 | timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 81 | filename = f"llm_workflow_{timestamp}.json" 82 | 83 | # Save the results to a JSON file 84 | with open(filename, "w") as f: 85 | json.dump(combined_data, f, indent=4) 86 | 87 | 88 | if __name__ == "__main__": 89 | # Parse command-line arguments 90 | parser = argparse.ArgumentParser(description="Calculate properties of a reaction.") 91 | parser.add_argument( 92 | "--n_reactions", type=int, default=10, help="Number of molecules to process (default: 10)" 93 | ) 94 | args = parser.parse_args() 95 | 96 | # Call the main function with parsed arguments 97 | main(args.n_reactions) 98 | -------------------------------------------------------------------------------- /src/chemgraph/models/agent_response.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | from typing import Union, Optional 3 | from chemgraph.models.atomsdata import AtomsData 4 | 5 | 6 | class VibrationalFrequency(BaseModel): 7 | """ 8 | Schema for storing vibrational frequency results from a simulation. 9 | 10 | Attributes 11 | ---------- 12 | frequency_cm1 : list[str] 13 | List of vibrational frequencies in inverse centimeters (cm⁻¹). 14 | Each entry is a string representation of the frequency value. 15 | """ 16 | 17 | frequency_cm1: list[str] = Field( 18 | ..., 19 | description="List of vibrational frequencies in cm-1.", 20 | ) 21 | 22 | class IRSpectrum(BaseModel): 23 | """ 24 | Schema for storing vibrational frequency and intensities from a simulation. 25 | 26 | Attributes 27 | ---------- 28 | frequency_cm1 : list[str] 29 | List of vibrational frequencies in inverse centimeters (cm⁻¹). 30 | Each entry is a string representation of the frequency value. 31 | intensity : list[str] 32 | List of vibrational intensities. 33 | Each entry is a string representation of the intensity value. 34 | """ 35 | 36 | frequency_cm1: list[str] = Field( 37 | ..., 38 | description="List of vibrational frequencies in cm-1.", 39 | ) 40 | 41 | intensity: list[str] = Field( 42 | ..., 43 | description="List of intensities in D/Å^2 amu^-1.", 44 | ) 45 | 46 | plot: Optional[str] = None # base64 PNG image 47 | 48 | 49 | class InfraredSpectrum(BaseModel): 50 | """ 51 | Schema for calculating infrared spectrum from a simulation. 52 | 53 | Attributes 54 | ---------- 55 | frequency_spec_cm1 : list[str] 56 | List of range of frequencies in inverse centimeters (cm⁻¹) 57 | Each entry is a string representation of the frequency value. 58 | intensity_spec_D2A2amu1 : list[str] 59 | List of range of intensities in (D/Å)^2 amu⁻¹ 60 | Each entry is a string representation of the intensity value. 61 | """ 62 | frequency_spec_cm1: list[str] = Field( 63 | ..., 64 | description="Range of frequencies for plotting spectrum in cm-1.", 65 | ) 66 | 67 | intensity_spec_D2A2amu1: list[str] = Field( 68 | ..., 69 | description="Values of intensities for plotting spectrum in (D/Å)^2 amu^-1.", 70 | ) 71 | 72 | class ScalarResult(BaseModel): 73 | """ 74 | Schema for storing a scalar numerical result from a simulation or calculation. 75 | 76 | Attributes 77 | ---------- 78 | value : float 79 | The numerical value of the scalar result (e.g., 1.23). 80 | property : str 81 | The name of the physical or chemical property represented (e.g., 'enthalpy', 'Gibbs free energy'). 82 | unit : str 83 | The unit associated with the result (e.g., 'eV', 'kJ/mol'). 84 | """ 85 | 86 | value: float = Field(..., description="Scalar numerical result like enthalpy") 87 | property: str = Field( 88 | ..., 89 | description="Name of the property, e.g. 'enthalpy', 'Gibbs free energy'", 90 | ) 91 | unit: str = Field(..., description="Unit of the result, e.g. 'eV'") 92 | 93 | 94 | class ResponseFormatter(BaseModel): 95 | """Defined structured output to the user.""" 96 | 97 | answer: Union[ 98 | str, 99 | ScalarResult, 100 | VibrationalFrequency, 101 | IRSpectrum, 102 | AtomsData, 103 | ] = Field( 104 | description=( 105 | "Structured answer to the user's query. Use:\n" 106 | "1. `str` for general or explanatory responses or SMILES string.\n" 107 | "2. `VibrationalFrequency` for vibrational frequencies.\n" 108 | "3. `ScalarResult` for single numerical properties (e.g. enthalpy).\n" 109 | "4. `AtomsData` for atomic geometries (XYZ coordinate, etc.) and optimized structures." 110 | "5. `InfraredSpectrum` for calculating infrared spectra." 111 | ) 112 | ) 113 | -------------------------------------------------------------------------------- /src/chemgraph/prompt/single_agent_prompt.py: -------------------------------------------------------------------------------- 1 | single_agent_prompt = """You are an expert in computational chemistry, using advanced tools to solve complex problems. 2 | 3 | Instructions: 4 | 1. Extract all relevant inputs from the user's query, such as SMILES strings, molecule names, methods, software, properties, and conditions. 5 | 2. If a tool is needed, call it using the correct schema. 6 | 3. Base all responses strictly on actual tool outputs—never fabricate results, coordinates or SMILES string. 7 | 4. Review previous tool outputs. If they indicate failure, retry the tool with adjusted inputs if possible. 8 | 5. Use available simulation data directly. If data is missing, clearly state that a tool call is required. 9 | 6. If no tool call is needed, respond using factual domain knowledge. 10 | """ 11 | """ 12 | formatter_prompt = You are an agent that formats responses based on user intent. You must select the correct output type based on the content of the result: 13 | 14 | 1. Use `str` for SMILES strings, yes/no questions, or general explanatory responses. 15 | 2. Use `AtomsData` for molecular structures or atomic geometries (e.g., atomic positions, element lists, or 3D coordinates). 16 | 3. Use `VibrationalFrequency` for vibrational frequency data. This includes one or more vibrational modes, typically expressed in units like cm⁻¹. 17 | - IMPORTANT: Do NOT use `ScalarResult` for vibrational frequencies. Vibrational data is a list or array of values and requires `VibrationalFrequency`. 18 | 4. Use `IRSpectrum` for vibrational frequency and intensities data and IR spectrum plot. 19 | 5. Use `ScalarResult` (float) only for scalar thermodynamic or energetic quantities such as: 20 | - Enthalpy 21 | - Entropy 22 | - Gibbs free energy 23 | 5. Use `InfraredSpectrum` for infrared (also known as IR) spectrum data. This includes a range of frequencies, typically expressed in units like cm⁻¹, and a range of intensities, typically expressed in units like (D/Å)^2 amu^-1. 24 | - IMPORTANT: Do NOT use `ScalarResult` for frequencies and intensities. Spectral data is a list or array of values and requires `InfraredSpectrum`. 25 | 26 | Additional guidance: 27 | - Always read the user’s intent carefully to determine whether the requested quantity is a **list of values** (frequencies) or a **single scalar**. 28 | """ 29 | 30 | formatter_prompt = """You are an agent responsible for formatting the final output based on both the user’s intent and the actual results from prior agents. Your top priority is to accurately extract and interpret **the correct values from previous agent outputs** — do not fabricate or infer values beyond what has been explicitly provided. 31 | 32 | Follow these rules for selecting the output type: 33 | 34 | 1. Use `str` for: 35 | - SMILES strings 36 | - Yes/No questions 37 | - General explanatory or descriptive responses 38 | 39 | 2. Use `AtomsData` if the result contains: 40 | - Atomic positions 41 | - Element numbers or symbols 42 | - Cell dimensions 43 | - Any representation of molecular structure or geometry 44 | 45 | 3. Use `VibrationalFrequency` for vibrational mode outputs: 46 | - Must contain a list or array of frequencies (typically in cm⁻¹) 47 | - Do **not** use `ScalarResult` for these — frequencies are not single-valued 48 | 49 | 4. Use `ScalarResult` only for a single numeric value representing: 50 | - Enthalpy 51 | - Entropy 52 | - Gibbs free energy 53 | - Any other scalar thermodynamic or energetic quantity 54 | 55 | Additional instructions: 56 | - Carefully check that the values you format are present in the **actual output of prior tools or agents**. 57 | - Pay close attention to whether the desired result is a **list vs. a scalar**, and choose the correct format accordingly. 58 | """ 59 | 60 | report_prompt = """You are an agent responsible for generating an html report based on the results of a computational chemistry simulation. 61 | 62 | Instructions: 63 | - Use generate_html tool to generate the report. 64 | - Make sure the input to the generate_html tool is a valid ASEOutputSchema object. 65 | - Include all the information from the ASEOutputSchema object when invoking the generate_html tool. 66 | """ 67 | -------------------------------------------------------------------------------- /docs/example_usage.md: -------------------------------------------------------------------------------- 1 | !!! note 2 | Before exploring example usage in the `notebooks/` directory, ensure you have specified the necessary API tokens in your environment. 3 | 4 | === "OpenAI API Key" 5 | 1. Log in to your OpenAI account at the OpenAI Platform website. If you don't have an account, you'll need to create one first. 6 | 7 | 2. Navigate to the API keys section. You can find this by clicking on your profile icon in the top-right corner and selecting "API keys." 8 | 9 | 3. Click the + Create new secret key button. 10 | 11 | 4. Give your key a descriptive name (e.g., "ChemGraph"). 12 | 13 | 5. Click Create secret key. A new key will be generated. 14 | 15 | 6. Copy the key and save it in a secure location. You will not be able to see it again after this step. 16 | 17 | 7. Set the key in your environment using the command provided in the instructions: 18 | ```bash 19 | export OPENAI_API_KEY="your_api_key_here" # On Unix or macOS 20 | setx OPENAI_API_KEY "your_api_key_here" # On Windows 21 | ``` 22 | 8. Restart your terminal or IDE to ensure the environment variable is loaded. 23 | 24 | === "Anthropic API Key" 25 | 1. Sign up or log in to your Anthropic account at the [Anthropic console](https://console.anthropic.com/). 26 | 27 | 2. In the left-hand navigation menu, select API Keys. 28 | 29 | 3. Click on the option to create a new API key. 30 | 31 | 4. Provide a name for your API key (e.g., "ChemGraph"). 32 | 33 | 5. Click Create Key again. 34 | 35 | 6. Copy the generated key and store it securely, as you may not be able to view it again. 36 | 37 | 7. Set the key in your environment using the command provided in the instructions: 38 | ```bash 39 | export ANTHROPIC_API_KEY="your_api_key_here" # On Unix or macOS 40 | setx ANTHROPIC_API_KEY "your_api_key_here" # On Windows 41 | ``` 42 | 8. Restart your terminal or IDE to ensure the environment variable is loaded. 43 | 44 | === "Google AI Studio (Gemini) API Key" 45 | 1. Go to the Google AI Studio website at [Google AI Studio](https://ai.google.com/studio) and sign in with your Google account. 46 | 47 | 2. In the left-hand menu, select Get API key. 48 | 49 | 3. Click the Create API key in new project button. A new key will be instantly generated. 50 | 51 | 4. Copy the API key by clicking the copy icon next to it. 52 | 53 | 5. Set the key as an environment variable: 54 | ```bash 55 | export GOOGLE_API_KEY="your_api_key_here" # On Unix or macOS 56 | setx GOOGLE_API_KEY "your_api_key_here" # On Windows 57 | ``` 58 | 6. Restart your terminal or IDE to ensure the environment variable is loaded. 59 | 60 | ???+ info "**Explore Example Notebooks**" 61 | Navigate to the `notebooks/` directory to explore various example notebooks demonstrating different capabilities of ChemGraph. 62 | 63 | - **[Single-Agent System with MACE](https://github.com/argonne-lcf/ChemGraph/blob/main/notebooks/Demo-multi_agent.ipynb)**: This notebook demonstrates how a single agent can utilize multiple tools with MACE/xTB support. 64 | 65 | - **[Single-Agent System with UMA](https://github.com/argonne-lcf/ChemGraph/blob/main/notebooks/Demo_single_agent_UMA.ipynb)**: This notebook demonstrates how a single agent can utilize multiple tools with UMA support. 66 | 67 | - **[Multi-Agent System](https://github.com/argonne-lcf/ChemGraph/blob/main/notebooks/Demo-multi_agent.ipynb)**: This notebook demonstrates a multi-agent setup where different agents (Planner, Executor and Aggregator) handle various tasks exemplifying the collaborative potential of ChemGraph. 68 | 69 | - **[Single-Agent System with gRASPA](https://github.com/argonne-lcf/ChemGraph/blob/main/notebooks/Demo_graspa_agent.ipynb)**: This notebook provides a sample guide on executing a gRASPA simulation using a single agent. For gRASPA-related installation instructions, visit the [gRASPA GitHub repository](https://github.com/snurr-group/gRASPA). The notebook's functionality has been validated on a single compute node at ALCF Polaris. 70 | -------------------------------------------------------------------------------- /src/ui/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration management for ChemGraph Streamlit app. 3 | """ 4 | 5 | import toml 6 | import os 7 | from typing import Dict, Any 8 | 9 | 10 | def load_config(config_path: str = "config.toml") -> Dict[str, Any]: 11 | """Load configuration from TOML file.""" 12 | try: 13 | if os.path.exists(config_path): 14 | with open(config_path, "r") as f: 15 | config = toml.load(f) 16 | # Validate configuration structure 17 | default_config = get_default_config() 18 | 19 | # Ensure all required sections exist 20 | for section in ["general", "api", "chemistry", "output"]: 21 | if section not in config: 22 | config[section] = default_config[section] 23 | elif isinstance(config[section], dict) and isinstance( 24 | default_config[section], dict 25 | ): 26 | # Merge missing keys from default 27 | for key, value in default_config[section].items(): 28 | if key not in config[section]: 29 | config[section][key] = value 30 | 31 | return config 32 | else: 33 | # Create default configuration file if it doesn't exist 34 | default_config = get_default_config() 35 | save_config(default_config, config_path) 36 | return default_config 37 | except Exception as e: 38 | print(f"Error loading configuration: {e}") 39 | return get_default_config() 40 | 41 | 42 | def save_config(config: Dict[str, Any], config_path: str = "config.toml") -> bool: 43 | """Save configuration to TOML file.""" 44 | try: 45 | with open(config_path, "w") as f: 46 | toml.dump(config, f) 47 | return True 48 | except Exception as e: 49 | print(f"Error saving configuration: {e}") 50 | return False 51 | 52 | 53 | def get_default_config() -> Dict[str, Any]: 54 | """Return default configuration.""" 55 | return { 56 | "general": { 57 | "model": "gpt-4o-mini", 58 | "workflow": "single_agent", 59 | "output": "state", 60 | "structured": False, 61 | "report": True, 62 | "thread": 1, 63 | "recursion_limit": 20, 64 | "verbose": False, 65 | }, 66 | "api": { 67 | "openai": {"base_url": "https://api.openai.com/v1", "timeout": 30}, 68 | "anthropic": {"base_url": "https://api.anthropic.com", "timeout": 30}, 69 | "google": { 70 | "base_url": "https://generativelanguage.googleapis.com/v1beta", 71 | "timeout": 30, 72 | }, 73 | "local": {"base_url": "http://localhost:11434", "timeout": 60}, 74 | }, 75 | "chemistry": { 76 | "optimization": {"method": "BFGS", "fmax": 0.05, "steps": 200}, 77 | "calculators": {"default": "mace_mp", "fallback": "emt"}, 78 | }, 79 | "output": { 80 | "files": { 81 | "directory": "./chemgraph_output", 82 | "formats": ["xyz", "json", "html"], 83 | }, 84 | "visualization": {"enable_3d": True, "viewer": "py3dmol"}, 85 | }, 86 | } 87 | 88 | 89 | def flatten_config(config: Dict[str, Any]) -> Dict[str, Any]: 90 | """Flatten nested configuration for easier access.""" 91 | flattened = {} 92 | 93 | # Handle general settings 94 | if "general" in config: 95 | flattened.update(config["general"]) 96 | 97 | # Handle other sections 98 | for section in ["api", "chemistry", "output"]: 99 | if section in config: 100 | for key, value in config[section].items(): 101 | if isinstance(value, dict): 102 | for subkey, subvalue in value.items(): 103 | flattened[f"{section}_{key}_{subkey}"] = subvalue 104 | else: 105 | flattened[f"{section}_{key}"] = value 106 | 107 | return flattened 108 | -------------------------------------------------------------------------------- /docs/running_local_models.md: -------------------------------------------------------------------------------- 1 | !!! note 2 | This section describes how to set up and run local language models using the vLLM inference server. 3 | 4 | ### Inference Backend Setup (Remote/Local) 5 | 6 | #### Virtual Python Environment 7 | All instructions below must be executed within a Python virtual environment. Ensure the virtual environment uses the same Python version as your project (e.g., Python 3.11). 8 | 9 | **Example 1: Using conda** 10 | ```bash 11 | conda create -n vllm-env python=3.11 -y 12 | conda activate vllm-env 13 | ``` 14 | 15 | **Example 2: Using python venv** 16 | ```bash 17 | python3.11 -m venv vllm-env 18 | source vllm-env/bin/activate # On Windows use `vllm-env\\Scripts\\activate` 19 | ``` 20 | 21 | #### Install Inference Server (vLLM) 22 | vLLM is recommended for serving many transformer models efficiently. 23 | 24 | **Basic vLLM installation from source:** 25 | Make sure your virtual environment is activated. 26 | ```bash 27 | # Ensure git is installed 28 | git clone https://github.com/vllm-project/vllm.git 29 | cd vllm 30 | pip install -e . 31 | ``` 32 | For specific hardware acceleration (e.g., CUDA, ROCm), refer to the [official vLLM installation documentation](https://docs.vllm.ai/en/latest/getting_started/installation.html). 33 | 34 | #### Running the vLLM Server (Standalone) 35 | 36 | A script is provided at `scripts/run_vllm_server.sh` to help start a vLLM server with features like logging, retry attempts, and timeout. This is useful for running vLLM outside of Docker Compose, for example, directly on a machine with GPU access. 37 | 38 | **Before running the script:** 39 | 1. Ensure your vLLM Python virtual environment is activated. 40 | ```bash 41 | # Example: if you used conda 42 | # conda activate vllm-env 43 | # Example: if you used python venv 44 | # source path/to/your/vllm-env/bin/activate 45 | ``` 46 | 2. Make the script executable: 47 | ```bash 48 | chmod +x scripts/run_vllm_server.sh 49 | ``` 50 | 51 | **To run the script:** 52 | 53 | ```bash 54 | ./scripts/run_vllm_server.sh [MODEL_IDENTIFIER] [PORT] [MAX_MODEL_LENGTH] 55 | ``` 56 | 57 | - `[MODEL_IDENTIFIER]` (optional): The Hugging Face model identifier. Defaults to `facebook/opt-125m`. 58 | - `[PORT]` (optional): The port for the vLLM server. Defaults to `8001`. 59 | - `[MAX_MODEL_LENGTH]` (optional): The maximum model length. Defaults to `4096`. 60 | 61 | **Example:** 62 | ```bash 63 | ./scripts/run_vllm_server.sh meta-llama/Meta-Llama-3-8B-Instruct 8001 8192 64 | ``` 65 | 66 | ???+ info "**Important Note on Gated Models (e.g., Llama 3):**" 67 | - Many models, such as those from the Llama family by Meta, are gated and require you to accept their terms of use on Hugging Face and use an access token for download. 68 | 69 | - To use such models with vLLM (either via the script or Docker Compose): 70 | 1. **Hugging Face Account and Token**: Ensure you have a Hugging Face account and have generated an access token with `read` permissions. You can find this in your Hugging Face account settings under "Access Tokens". 71 | 2. **Accept Model License**: Navigate to the Hugging Face page of the specific model you want to use (e.g., `meta-llama/Meta-Llama-3-8B-Instruct`) and accept its license/terms if prompted. 72 | 3. **Environment Variables**: Before running the vLLM server (either via the script or `docker-compose up`), you need to set the following environment variables in your terminal session or within your environment configuration (e.g., `.bashrc`, `.zshrc`, or by passing them to Docker Compose if applicable): 73 | ```bash 74 | export HF_TOKEN="your_hugging_face_token_here" 75 | # Optional: Specify a directory for Hugging Face to download models and cache. 76 | # export HF_HOME="/path/to/your/huggingface_cache_directory" 77 | ``` 78 | vLLM will use these environment variables to authenticate with Hugging Face and download the model weights. 79 | 80 | - The script will: 81 | - Attempt to start the vLLM OpenAI-compatible API server. 82 | - Log output to a file in the `logs/` directory (created if it doesn't exist at the project root). 83 | - The server runs in the background via `nohup`. 84 | 85 | - This standalone script is an alternative to running vLLM via Docker Compose and is primarily for users who manage their vLLM instances directly. -------------------------------------------------------------------------------- /docs/streamlit_web_interface.md: -------------------------------------------------------------------------------- 1 | !!! note 2 | ChemGraph includes a **Streamlit web interface** that provides an intuitive, chat-based UI for interacting with computational chemistry agents. The interface supports 3D molecular visualization, conversation history, and easy access to various ChemGraph workflows. 3 | 4 | ### Features 5 | 6 | - **🧪 Interactive Chat Interface**: Natural language queries for computational chemistry tasks 7 | - **🧬 3D Molecular Visualization**: Interactive molecular structure display using `stmol` and `py3Dmol` 8 | - **📊 Report Integration**: Embedded HTML reports from computational calculations 9 | - **💾 Data Export**: Download molecular structures as XYZ or JSON files 10 | - **🔧 Multiple Workflows**: Support for single-agent, multi-agent, Python REPL, and gRASPA workflows 11 | - **🎨 Modern UI**: Clean, responsive interface with conversation bubbles and molecular properties display 12 | 13 | ### Installation Requirements 14 | 15 | The Streamlit UI dependencies are included by default when you install ChemGraph: 16 | 17 | ```bash 18 | # Install ChemGraph (includes UI dependencies) 19 | pip install -e . 20 | ``` 21 | 22 | **Alternative Installation Options:** 23 | ```bash 24 | # Install only UI dependencies separately (if needed) 25 | pip install -e ".[ui]" 26 | 27 | # Install with UMA support (separate environment recommended) 28 | pip install -e ".[uma]" 29 | ``` 30 | 31 | ### Running the Streamlit Interface 32 | 33 | 1. **Set up your API keys** (same as for notebooks): 34 | ```bash 35 | export OPENAI_API_KEY="your_openai_api_key_here" 36 | export ANTHROPIC_API_KEY="your_anthropic_api_key_here" 37 | ``` 38 | 39 | 2. **Launch the Streamlit app**: 40 | ```bash 41 | streamlit run ui/app.py 42 | ``` 43 | 44 | 3. **Access the interface**: Open your browser to `http://localhost:8501` 45 | 46 | ### Using the Interface 47 | 48 | #### Configuration 49 | - **Model Selection**: Choose from GPT-4o, GPT-4o-mini, or Claude models 50 | - **Workflow Type**: Select single-agent, multi-agent, Python REPL, or gRASPA workflows 51 | 52 | 53 | #### Interaction 54 | 1. **Initialize Agent**: Click "Initialize Agent" in the sidebar to set up your ChemGraph instance 55 | 2. **Ask Questions**: Use the text area to enter computational chemistry queries 56 | 3. **View Results**: See responses in chat bubbles with automatic structure detection 57 | 4. **3D Visualization**: When molecular structures are detected, they're automatically displayed in 3D 58 | 5. **Download Data**: Export structures and calculation results directly from the interface 59 | 60 | #### Example Queries 61 | - "What is the SMILES string for caffeine?" 62 | - "Optimize the geometry of water molecule using DFT" 63 | - "Calculate the single point energy of methane and show the structure" 64 | - "Generate the structure of aspirin and calculate its vibrational frequencies" 65 | 66 | #### Molecular Visualization 67 | The interface automatically detects molecular structure data in agent responses and provides: 68 | - **Interactive 3D Models**: Multiple visualization styles (ball & stick, sphere, stick, wireframe) 69 | - **Structure Information**: Chemical formula, composition, mass, center of mass 70 | - **Export Options**: Download as XYZ files or JSON data 71 | - **Fallback Display**: Table view when 3D visualization is unavailable 72 | 73 | #### Conversation Management 74 | - **History Display**: All queries and responses are preserved in conversation bubbles 75 | - **Structure Detection**: Molecular structures are automatically extracted and visualized 76 | - **Report Integration**: HTML reports from calculations are embedded directly in the interface 77 | - **Debug Information**: Expandable sections show detailed message processing information 78 | 79 | ### Troubleshooting 80 | 81 | **3D Visualization Issues:** 82 | - Ensure `stmol` is installed: `pip install stmol` 83 | - If 3D display fails, the interface falls back to table/text display 84 | - Check browser compatibility for WebGL support 85 | 86 | **Agent Initialization:** 87 | - Verify API keys are set correctly 88 | - Check that ChemGraph package is installed: `pip install -e .` 89 | - Ensure all dependencies are available in your environment 90 | 91 | **Performance:** 92 | - For large molecular systems, visualization may take longer to load 93 | - Use the refresh button if the interface becomes unresponsive 94 | - Clear conversation history to improve performance with many queries 95 | -------------------------------------------------------------------------------- /.github/workflows/conda-tests.yml: -------------------------------------------------------------------------------- 1 | name: Conda Tests 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | test-conda-mace: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: ["3.10", "3.11", "3.12"] 15 | 16 | steps: 17 | - uses: actions/checkout@v4 18 | 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | 24 | - name: Set up Conda 25 | uses: conda-incubator/setup-miniconda@v3 26 | with: 27 | miniconda-version: "latest" 28 | python-version: ${{ matrix.python-version }} 29 | activate-environment: chemgraph-mace 30 | auto-activate-base: false 31 | 32 | - name: Install Conda dependencies for MACE 33 | shell: bash -l {0} 34 | run: | 35 | conda install -c conda-forge nwchem -y 36 | conda install -c conda-forge mamba -y 37 | 38 | - name: Install ChemGraph with MACE support 39 | shell: bash -l {0} 40 | run: | 41 | pip install -e . 42 | pip install pytest 43 | 44 | - name: Run tests with MACE 45 | shell: bash -l {0} 46 | run: | 47 | python -m pytest tests/ -v 48 | 49 | # test-conda-uma: 50 | # runs-on: ubuntu-latest 51 | # strategy: 52 | # matrix: 53 | # python-version: ["3.10", "3.11", "3.12"] 54 | 55 | # steps: 56 | # - uses: actions/checkout@v4 57 | 58 | # - name: Set up Python ${{ matrix.python-version }} 59 | # uses: actions/setup-python@v5 60 | # with: 61 | # python-version: ${{ matrix.python-version }} 62 | 63 | # - name: Set up Conda 64 | # uses: conda-incubator/setup-miniconda@v3 65 | # with: 66 | # miniconda-version: "latest" 67 | # python-version: ${{ matrix.python-version }} 68 | # activate-environment: chemgraph-uma 69 | # auto-activate-base: false 70 | 71 | # - name: Install Conda dependencies for UMA 72 | # shell: bash -l {0} 73 | # run: | 74 | # conda install -c conda-forge nwchem -y 75 | 76 | # - name: Temporarily modify pyproject.toml for UMA e3nn compatibility 77 | # shell: bash -l {0} 78 | # run: | 79 | # echo "Backing up pyproject.toml to pyproject.toml.original..." 80 | # cp pyproject.toml pyproject.toml.original 81 | # echo "Commenting out mace-torch from pyproject.toml..." 82 | # # This sed command finds lines starting with optional whitespace, 83 | # # then "mace-torch>=0.3.13", and prepends a '#' to the matched line. 84 | # sed -i 's/^[[:space:]]*"mace-torch>=0.3.13",/#&/' pyproject.toml 85 | # echo "pyproject.toml after modification:" 86 | # cat pyproject.toml 87 | 88 | # - name: Install ChemGraph with UMA support 89 | # shell: bash -l {0} 90 | # env: 91 | # HF_TOKEN: ${{ secrets.HF_TOKEN }} 92 | # run: | 93 | # pip install -e ".[uma]" 94 | # pip install pytest 95 | # pip install huggingface-hub # Ensure huggingface-cli is available 96 | 97 | # - name: Authenticate with Hugging Face CLI 98 | # shell: bash -l {0} 99 | # env: 100 | # HF_TOKEN: ${{ secrets.HF_TOKEN }} 101 | # run: | 102 | # echo "Attempting Hugging Face CLI login..." 103 | # huggingface-cli login --token $HF_TOKEN 104 | # echo "Login attempted. Verifying whoami..." 105 | # huggingface-cli whoami 106 | 107 | # - name: Run tests with UMA 108 | # shell: bash -l {0} 109 | # env: 110 | # HF_TOKEN: ${{ secrets.HF_TOKEN }} 111 | # run: | 112 | # python -m pytest tests/ -v 113 | 114 | # - name: Restore pyproject.toml 115 | # shell: bash -l {0} 116 | # if: always() # Ensures this step runs even if previous steps fail 117 | # run: | 118 | # echo "Restoring pyproject.toml from pyproject.toml.original..." 119 | # if [ -f pyproject.toml.original ]; then 120 | # mv pyproject.toml.original pyproject.toml 121 | # echo "pyproject.toml restored." 122 | # else 123 | # echo "Backup pyproject.toml.original not found. Cannot restore." 124 | # fi -------------------------------------------------------------------------------- /scripts/evaluations/mock_llm/mock_eval.py: -------------------------------------------------------------------------------- 1 | """Module to evaluate LLM performance on tool-calling workflows.""" 2 | 3 | import pprint 4 | import json 5 | import datetime 6 | import argparse 7 | from langchain_core.utils.function_calling import convert_to_openai_function 8 | from chemgraph.agent.llm_agent import ChemGraph 9 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state 10 | from chemgraph.utils.tool_call_eval import multi_function_checker_with_order 11 | from chemgraph.tools.cheminformatics_tools import ( 12 | molecule_name_to_smiles, 13 | smiles_to_atomsdata, 14 | ) 15 | from chemgraph.tools.ase_tools import ( 16 | run_ase, 17 | file_to_atomsdata, 18 | save_atomsdata_to_file, 19 | ) 20 | 21 | 22 | def evaluate_model( 23 | model_name: str, 24 | input_file: str = "ground_truth_sample.json", 25 | ): 26 | """ 27 | Evaluate the tool-calling behavior of an LLM given a list of queries. 28 | 29 | Parameters 30 | ---------- 31 | model_name : str 32 | Name of the LLM model to use in ChemGraph. 33 | input_file : str 34 | Path to the ground truth sample JSON file. 35 | """ 36 | with open(input_file, "r", encoding="utf-8") as f: 37 | list_of_queries = json.load(f) 38 | 39 | workflow_type = "mock_agent" 40 | cg = ChemGraph( 41 | model_name=model_name, 42 | workflow_type=workflow_type, 43 | structured_output=True, 44 | return_option="state", 45 | ) 46 | 47 | llm_tool_calls = [] 48 | for idx, item in enumerate(list_of_queries): 49 | query = item["query"] 50 | state = cg.run(query, {"configurable": {"thread_id": str(idx)}}) 51 | llm_tool_call = get_workflow_from_state(state) 52 | llm_tool_calls.append(llm_tool_call) 53 | 54 | # Save tool call results 55 | timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 56 | output_file = f"{model_name}_{timestamp}_tool_call.json" 57 | with open(output_file, "w", encoding="utf-8") as wf: 58 | json.dump(llm_tool_calls, wf, indent=4) 59 | print(f"Saved tool calls to {output_file}") 60 | 61 | # Evaluation 62 | toolsets = [ 63 | molecule_name_to_smiles, 64 | run_ase, 65 | smiles_to_atomsdata, 66 | file_to_atomsdata, 67 | save_atomsdata_to_file, 68 | ] 69 | 70 | func_descriptions = [convert_to_openai_function(tool) for tool in toolsets] 71 | accurate_tool_call = 0 72 | eval_details = {} 73 | for idx, toolcall in enumerate(llm_tool_calls): 74 | model_outputs = llm_tool_calls[idx].get("tool_calls", {}) 75 | answers = list_of_queries[idx].get("answer", {}).get("tool_calls", {}) 76 | eval_result = multi_function_checker_with_order( 77 | func_descriptions=func_descriptions, 78 | model_outputs=model_outputs, 79 | answers=answers, 80 | ) 81 | if eval_result["acc_n_toolcalls"] == eval_result["n_toolcalls"]: 82 | accurate_tool_call += 1 83 | eval_details[list_of_queries[idx]["query"]] = eval_result 84 | print(eval_result) 85 | accuracy = accurate_tool_call / len(llm_tool_calls) * 100 86 | 87 | print(f"Accuracy of {model_name}: {accuracy}% ({accurate_tool_call}/10 accurate tool calls)") 88 | 89 | output_eval_file = f"{model_name}_{timestamp}_eval.txt" 90 | 91 | # Cannot do json.dump() due to DeepDiff output not serializable. 92 | with open(output_eval_file, "w", encoding="utf-8") as wf: 93 | pprint.pprint(eval_details, stream=wf, width=120) 94 | print(f"Saved evaluation results to {output_eval_file}") 95 | 96 | return accuracy 97 | 98 | 99 | def main(): 100 | parser = argparse.ArgumentParser( 101 | description="Evaluate ChemGraph tool-calling performance for different LLMs." 102 | ) 103 | parser.add_argument( 104 | "--model_name", 105 | type=str, 106 | required=True, 107 | help="Name of the LLM model to evaluate (e.g., gpt-4o, claude-3.5-haiku)", 108 | ) 109 | parser.add_argument( 110 | "--input_file", 111 | type=str, 112 | default="ground_truth_sample.json", 113 | help="Path to input JSON file of queries", 114 | ) 115 | 116 | args = parser.parse_args() 117 | evaluate_model(args.model_name, args.input_file) 118 | 119 | 120 | if __name__ == "__main__": 121 | main() 122 | -------------------------------------------------------------------------------- /scripts/evaluations/run_llm_workflow/Exp8_from_smiles_to_opt/run_llm_workflow.py: -------------------------------------------------------------------------------- 1 | import json 2 | from chemgraph.agent.llm_agent import ChemGraph 3 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state 4 | import argparse 5 | import datetime 6 | 7 | 8 | def get_query( 9 | smiles: str, 10 | query_name: str = "smiles_to_coord", # options: atomsdata, opt, vib 11 | method: str = "mace_mp", 12 | ) -> str: 13 | """Get query for a SMILES-related task for CompChemAgent 14 | 15 | Args: 16 | smiles (str): SMILES string. 17 | query_name (str, optional): Type of query. Defaults to "atomsdata". Options: "atomsdata", "opt", "vib", "opt_method" and "vib_method". 18 | method (str, optional): The method/level of theory for CompChemAgent to run simulation. Defaults to "mace_mp". 19 | 20 | Returns: 21 | str: formatted query. 22 | """ 23 | query_dict = { 24 | "smiles_to_coord": f"Provide the XYZ coordinates corresponding to this SMILES string: {smiles}", 25 | "smiles_to_opt": f"Perform geometry optimization for this SMILES string {smiles} using NWChem, B3LYP and sto-3g", 26 | "smiles_to_vib": f"Run vibrational frequency calculation for this SMILES string {smiles} using {method}", 27 | "smiles_to_enthalpy": f"Calculate the enthalpy of this SMILES string {smiles} using {method}", 28 | "smiles_to_gibbs": f"Calculate the Gibbs free energy of this SMILES string {smiles} using {method} at T=400K", 29 | "smiles_to_opt_file": f"Perform geometry optimization for this SMILES string {smiles} using {method}. Save the optimized coordinate in an XYZ file.", 30 | } 31 | 32 | return query_dict.get(query_name, "Query not found") # Returns the query or a default message 33 | 34 | 35 | def main(fname: str, n_structures: int): 36 | """ 37 | Run an LLM geometry optimization workflow on a subset of molecules 38 | from the input SMILES dataset. 39 | 40 | Args: 41 | fname (str): Path to the JSON file containing SMILES data. 42 | n_structures (int): Number of molecules to process from the dataset. 43 | """ 44 | # Load SMILES data from the specified JSON file 45 | with open(fname, "r") as f: 46 | smiles_data = json.load(f) 47 | 48 | combined_data = {} 49 | 50 | cca = ChemGraph( 51 | model_name='gpt-4o-mini', 52 | workflow_type="single_agent", 53 | structured_output=True, 54 | return_option="state", 55 | ) 56 | 57 | # Iterate through the first n_structures molecules 58 | for idx, molecule in enumerate(smiles_data[:n_structures]): 59 | print("********************************************") 60 | print( 61 | f"MOLECULE SMILES: {molecule['smiles']} MOLECULE NAME: {molecule['name']}" 62 | ) 63 | print("********************************************") 64 | 65 | smiles = molecule["smiles"] 66 | 67 | query = get_query(smiles, query_name="smiles_to_opt") 68 | state = cca.run(query, config={"configurable": {"thread_id": f"{str(idx)}"}}) 69 | 70 | llm_workflow = get_workflow_from_state(state) 71 | 72 | # Store results in a structured dictionary 73 | state_data = cca.write_state(config={"configurable": {"thread_id": f"{str(idx)}"}}) 74 | 75 | combined_data[smiles] = {"llm_workflow": llm_workflow} 76 | combined_data[smiles]["metadata"] = state_data 77 | 78 | # Save the results to a JSON file 79 | timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 80 | filename = f"llm_workflow_{timestamp}.json" 81 | 82 | # Save the results to a JSON file 83 | with open(filename, "w") as f: 84 | json.dump(combined_data, f, indent=4) 85 | 86 | 87 | if __name__ == "__main__": 88 | # Parse command-line arguments 89 | parser = argparse.ArgumentParser(description="Run geometry optimization on SMILES molecules.") 90 | parser.add_argument( 91 | "--fname", 92 | type=str, 93 | default="data_from_pubchempy.json", 94 | help="Path to the input SMILES JSON file (e.g., smiles_data.json)", 95 | ) 96 | parser.add_argument( 97 | "--n_structures", type=int, default=30, help="Number of molecules to process (default: 30)" 98 | ) 99 | args = parser.parse_args() 100 | 101 | # Call the main function with parsed arguments 102 | main(args.fname, args.n_structures) 103 | -------------------------------------------------------------------------------- /scripts/evaluations/run_llm_workflow/Exp1_from_name_to_smiles/run_llm_workflow.py: -------------------------------------------------------------------------------- 1 | import json 2 | from chemgraph.agent.llm_agent import ChemGraph 3 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state 4 | import argparse 5 | from datetime import datetime 6 | 7 | 8 | def get_query( 9 | name: str, 10 | query_name: str = "atomsdata", # options: atomsdata, opt, vib 11 | method: str = "mace_mp", 12 | ) -> str: 13 | """Get query for a SMILES-related task for CompChemAgent 14 | 15 | Args: 16 | name (str): molecule name. 17 | query_name (str, optional): Type of query. Defaults to "atomsdata". Options: "atomsdata", "opt", "vib", "opt_method" and "vib_method". 18 | method (str, optional): The method/level of theory for CompChemAgent to run simulation. Defaults to "mace_mp". 19 | 20 | Returns: 21 | str: formatted query. 22 | """ 23 | query_dict = { 24 | "name_to_smiles": f"Provide the SMILES string corresponding to this molecule: {name}", 25 | "name_to_coord": f"Provide the XYZ coordinates corresponding to this molecule: {name}", 26 | "name_to_opt": f"Perform geometry optimization for a molecule {name} using {method}", 27 | "name_to_vib": f"Run vibrational frequency calculation for a molecule {name} using {method}", 28 | "name_to_enthalpy": f"Calculate the enthalpy of a molecule {name} using {method}", 29 | "name_to_gibbs": f"Calculate the Gibbs free energy of a molecule {name} using {method} potential at a temperature of 400K", 30 | "name_to_opt_file": f"Perform geometry optimization for a molecule {name} using {method}. Save the optimized coordinate in an XYZ file.", 31 | } 32 | 33 | return query_dict.get(query_name, "Query not found") # Returns the query or a default message 34 | 35 | 36 | def main(fname: str, n_structures: int): 37 | """ 38 | Run an LLM geometry optimization workflow on a subset of molecules 39 | from the input SMILES dataset. 40 | 41 | Args: 42 | fname (str): Path to the JSON file containing SMILES data. 43 | n_structures (int): Number of molecules to process from the dataset. 44 | """ 45 | # Load SMILES data from the specified JSON file 46 | with open(fname, "r") as f: 47 | smiles_data = json.load(f) 48 | 49 | combined_data = {} 50 | 51 | cca = ChemGraph( 52 | model_name='gpt-4o-mini', 53 | workflow_type="single_agent", 54 | structured_output=True, 55 | return_option="state", 56 | ) 57 | 58 | # Iterate through the first n_structures molecules 59 | for idx, molecule in enumerate(smiles_data[:n_structures]): 60 | print("********************************************") 61 | print( 62 | f"MOLECULE SMILES: {molecule['smiles']} MOLECULE NAME: {molecule['name']}" 63 | ) 64 | print("********************************************") 65 | 66 | name = molecule["name"] 67 | 68 | query = get_query(name, query_name="name_to_smiles", method="mace_mp") 69 | state = cca.run(query, config={"configurable": {"thread_id": f"{str(idx)}"}}) 70 | 71 | llm_workflow = get_workflow_from_state(state) 72 | 73 | # Store results in a structured dictionary 74 | state_data = cca.write_state(config={"configurable": {"thread_id": f"{str(idx)}"}}) 75 | 76 | combined_data[name] = {"llm_workflow": llm_workflow} 77 | combined_data[name]["metadata"] = state_data 78 | 79 | timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 80 | filename = f"llm_workflow_{timestamp}.json" 81 | 82 | # Save the results to a JSON file 83 | with open(filename, "w") as f: 84 | json.dump(combined_data, f, indent=4) 85 | 86 | 87 | if __name__ == "__main__": 88 | # Parse command-line arguments 89 | parser = argparse.ArgumentParser(description="Convert a molecule name to atomic coordinates.") 90 | parser.add_argument( 91 | "--fname", 92 | type=str, 93 | default="data_from_pubchempy.json", 94 | help="Path to the input SMILES JSON file (e.g., smiles_data.json)", 95 | ) 96 | parser.add_argument( 97 | "--n_structures", type=int, default=30, help="Number of molecules to process (default: 30)" 98 | ) 99 | args = parser.parse_args() 100 | 101 | # Call the main function with parsed arguments 102 | main(args.fname, args.n_structures) 103 | -------------------------------------------------------------------------------- /scripts/evaluations/run_llm_workflow/Exp10_from_smiles_to_gibbs/run_llm_workflow.py: -------------------------------------------------------------------------------- 1 | import json 2 | from chemgraph.agent.llm_agent import ChemGraph 3 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state 4 | import argparse 5 | import datetime 6 | 7 | 8 | def get_query( 9 | smiles: str, 10 | query_name: str = "smiles_to_coord", # options: atomsdata, opt, vib 11 | method: str = "mace_mp", 12 | ) -> str: 13 | """Get query for a SMILES-related task for CompChemAgent 14 | 15 | Args: 16 | smiles (str): SMILES string. 17 | query_name (str, optional): Type of query. Defaults to "atomsdata". Options: "atomsdata", "opt", "vib", "opt_method" and "vib_method". 18 | method (str, optional): The method/level of theory for CompChemAgent to run simulation. Defaults to "mace_mp". 19 | 20 | Returns: 21 | str: formatted query. 22 | """ 23 | query_dict = { 24 | "smiles_to_coord": f"Provide the XYZ coordinates corresponding to this SMILES string: {smiles}", 25 | "smiles_to_opt": f"Perform geometry optimization for this SMILES string {smiles} using {method}", 26 | "smiles_to_vib": f"Run vibrational frequency calculation for this SMILES string {smiles} using {method}", 27 | "smiles_to_enthalpy": f"Calculate the enthalpy of this SMILES string {smiles} using {method}", 28 | "smiles_to_gibbs": f"Calculate the Gibbs free energy of this SMILES string {smiles} using {method} at T=800K", 29 | "smiles_to_opt_file": f"Perform geometry optimization for this SMILES string {smiles} using {method}. Save the optimized coordinate in an XYZ file.", 30 | } 31 | 32 | return query_dict.get(query_name, "Query not found") # Returns the query or a default message 33 | 34 | 35 | def main(fname: str, n_structures: int): 36 | """ 37 | Run an LLM geometry optimization workflow on a subset of molecules 38 | from the input SMILES dataset. 39 | 40 | Args: 41 | fname (str): Path to the JSON file containing SMILES data. 42 | n_structures (int): Number of molecules to process from the dataset. 43 | """ 44 | # Load SMILES data from the specified JSON file 45 | with open(fname, "r") as f: 46 | smiles_data = json.load(f) 47 | 48 | combined_data = {} 49 | 50 | cca = ChemGraph( 51 | model_name='gpt-4o-mini', 52 | workflow_type="single_agent", 53 | structured_output=True, 54 | return_option="state", 55 | ) 56 | 57 | # Iterate through the first n_structures molecules 58 | for idx, molecule in enumerate(smiles_data[:n_structures]): 59 | print("********************************************") 60 | print( 61 | f"MOLECULE SMILES: {molecule['smiles']} MOLECULE NAME: {molecule['name']}" 62 | ) 63 | print("********************************************") 64 | 65 | smiles = molecule["smiles"] 66 | 67 | query = get_query(smiles, query_name="smiles_to_gibbs", method="mace_mp") 68 | state = cca.run(query, config={"configurable": {"thread_id": f"{str(idx)}"}}) 69 | 70 | llm_workflow = get_workflow_from_state(state) 71 | 72 | # Store results in a structured dictionary 73 | state_data = cca.write_state(config={"configurable": {"thread_id": f"{str(idx)}"}}) 74 | 75 | combined_data[smiles] = {"llm_workflow": llm_workflow} 76 | combined_data[smiles]["metadata"] = state_data 77 | 78 | # Save the results to a JSON file 79 | timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 80 | filename = f"llm_workflow_{timestamp}.json" 81 | 82 | # Save the results to a JSON file 83 | with open(filename, "w") as f: 84 | json.dump(combined_data, f, indent=4) 85 | 86 | 87 | if __name__ == "__main__": 88 | # Parse command-line arguments 89 | parser = argparse.ArgumentParser(description="Run vibrational frequency on SMILES molecules.") 90 | parser.add_argument( 91 | "--fname", 92 | type=str, 93 | default="data_from_pubchempy.json", 94 | help="Path to the input SMILES JSON file (e.g., smiles_data.json)", 95 | ) 96 | parser.add_argument( 97 | "--n_structures", type=int, default=15, help="Number of molecules to process (default: 15)" 98 | ) 99 | args = parser.parse_args() 100 | 101 | # Call the main function with parsed arguments 102 | main(args.fname, args.n_structures) 103 | -------------------------------------------------------------------------------- /scripts/evaluations/run_llm_workflow/Exp7_from_smiles_to_coords/run_llm_workflow.py: -------------------------------------------------------------------------------- 1 | import json 2 | from chemgraph.agent.llm_agent import ChemGraph 3 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state 4 | import argparse 5 | import datetime 6 | 7 | 8 | def get_query( 9 | smiles: str, 10 | query_name: str = "smiles_to_coord", # options: atomsdata, opt, vib 11 | method: str = "mace_mp", 12 | ) -> str: 13 | """Get query for a SMILES-related task for CompChemAgent 14 | 15 | Args: 16 | smiles (str): SMILES string. 17 | query_name (str, optional): Type of query. Defaults to "atomsdata". Options: "atomsdata", "opt", "vib", "opt_method" and "vib_method". 18 | method (str, optional): The method/level of theory for CompChemAgent to run simulation. Defaults to "mace_mp". 19 | 20 | Returns: 21 | str: formatted query. 22 | """ 23 | query_dict = { 24 | "smiles_to_coord": f"Provide the XYZ coordinates corresponding to this SMILES string: {smiles}", 25 | "smiles_to_opt": f"Perform geometry optimization for this SMILES string {smiles} using {method}", 26 | "smiles_to_vib": f"Run vibrational frequency calculation for this SMILES string {smiles} using {method}", 27 | "smiles_to_enthalpy": f"Calculate the enthalpy of this SMILES string {smiles} using {method}", 28 | "smiles_to_gibbs": f"Calculate the Gibbs free energy of this SMILES string {smiles} using {method} at T=400K", 29 | "smiles_to_opt_file": f"Perform geometry optimization for this SMILES string {smiles} using {method}. Save the optimized coordinate in an XYZ file.", 30 | } 31 | 32 | return query_dict.get(query_name, "Query not found") # Returns the query or a default message 33 | 34 | 35 | def main(fname: str, n_structures: int): 36 | """ 37 | Run an LLM geometry optimization workflow on a subset of molecules 38 | from the input SMILES dataset. 39 | 40 | Args: 41 | fname (str): Path to the JSON file containing SMILES data. 42 | n_structures (int): Number of molecules to process from the dataset. 43 | """ 44 | # Load SMILES data from the specified JSON file 45 | with open(fname, "r") as f: 46 | smiles_data = json.load(f) 47 | 48 | combined_data = {} 49 | 50 | cca = ChemGraph( 51 | model_name='gpt-4o-mini', 52 | workflow_type="single_agent", 53 | structured_output=True, 54 | return_option="state", 55 | ) 56 | 57 | # Iterate through the first n_structures molecules 58 | for idx, molecule in enumerate(smiles_data[:n_structures]): 59 | print("********************************************") 60 | print( 61 | f"MOLECULE SMILES: {molecule['smiles']} MOLECULE NAME: {molecule['name']}" 62 | ) 63 | print("********************************************") 64 | 65 | smiles = molecule["smiles"] 66 | 67 | query = get_query(smiles, query_name="smiles_to_coord", method="mace_mp") 68 | state = cca.run(query, config={"configurable": {"thread_id": f"{str(idx)}"}}) 69 | 70 | llm_workflow = get_workflow_from_state(state) 71 | 72 | # Store results in a structured dictionary 73 | state_data = cca.write_state(config={"configurable": {"thread_id": f"{str(idx)}"}}) 74 | 75 | combined_data[smiles] = {"llm_workflow": llm_workflow} 76 | combined_data[smiles]["metadata"] = state_data 77 | 78 | # Save the results to a JSON file 79 | timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 80 | filename = f"llm_workflow_{timestamp}.json" 81 | 82 | # Save the results to a JSON file 83 | with open(filename, "w") as f: 84 | json.dump(combined_data, f, indent=4) 85 | 86 | 87 | if __name__ == "__main__": 88 | # Parse command-line arguments 89 | parser = argparse.ArgumentParser(description="Run geometry optimization on SMILES molecules.") 90 | parser.add_argument( 91 | "--fname", 92 | type=str, 93 | default="data_from_pubchempy.json", 94 | help="Path to the input SMILES JSON file (e.g., smiles_data.json)", 95 | ) 96 | parser.add_argument( 97 | "--n_structures", type=int, default=30, help="Number of molecules to process (default: 30)" 98 | ) 99 | args = parser.parse_args() 100 | 101 | # Call the main function with parsed arguments 102 | main(args.fname, args.n_structures) 103 | -------------------------------------------------------------------------------- /src/chemgraph/utils/get_workflow_from_llm.py: -------------------------------------------------------------------------------- 1 | import json 2 | from langchain.schema.messages import AIMessage 3 | import logging 4 | from chemgraph.utils.logging_config import setup_logger 5 | 6 | logger = setup_logger(__name__) 7 | 8 | 9 | def get_workflow_from_log(file_path: str) -> dict: 10 | """Convert a run_logs file to a workflow dictionary for evaluations. 11 | 12 | This function reads a JSON log file containing tool calls and their results, 13 | and converts it into a standardized workflow dictionary format. 14 | 15 | Parameters 16 | ---------- 17 | file_path : str 18 | Path to the run logs file in JSON format 19 | 20 | Returns 21 | ------- 22 | dict 23 | A dictionary containing: 24 | - tool_calls: List of tool call arguments 25 | - result: The final result or answer from the workflow 26 | 27 | Notes 28 | ----- 29 | The function expects the log file to contain: 30 | - A 'state' list with tool calls and their arguments 31 | - A final message with either a JSON 'answer' field or direct content 32 | """ 33 | with open(file_path, "r") as f: 34 | data = json.load(f) 35 | # Extract tool names and arguments 36 | workflow_dict = {"tool_calls": []} 37 | for state in data.get("state", []): 38 | tool_calls = state.get("tool_calls", []) 39 | for call in tool_calls: 40 | name = call.get("name") 41 | args = call.get("args") 42 | dat = {} 43 | dat[name] = args 44 | workflow_dict["tool_calls"].append(args) 45 | last_message = data.get("state", [])[-1] 46 | try: 47 | if "answer" in last_message["content"]: 48 | result_data = json.loads(last_message["content"]) 49 | workflow_dict["result"] = result_data.get("answer") 50 | except Exception as e: 51 | result_data = last_message["content"] 52 | workflow_dict["result"] = result_data 53 | logging.debug(f"Exception thrown while parsing result: {e}") 54 | 55 | return workflow_dict 56 | 57 | 58 | def get_workflow_from_state(state) -> dict: 59 | """Convert a state object to a workflow dictionary. 60 | 61 | This function processes a state object containing AIMessages with tool calls 62 | and converts it into a standardized workflow dictionary format. 63 | 64 | Parameters 65 | ---------- 66 | state : list 67 | List of messages, including AIMessages containing tool calls 68 | 69 | Returns 70 | ------- 71 | dict 72 | A dictionary containing: 73 | - tool_calls: List of dictionaries mapping tool names to their arguments 74 | - result: The final result or answer from the workflow 75 | 76 | Notes 77 | ----- 78 | The function processes: 79 | - AIMessages containing tool calls 80 | - The final message's content, which may be: 81 | - A JSON string with an 'answer' field 82 | - A JSON string with direct content 83 | - A plain string 84 | - Any other content type 85 | """ 86 | workflow_dict = {"tool_calls": []} 87 | 88 | def recurse(obj): 89 | if isinstance(obj, dict): 90 | # Extract tool_calls if it's an AI message 91 | if obj.get("type") == "ai": 92 | tool_calls = obj.get("tool_calls", []) 93 | for call in tool_calls: 94 | name = call.get("name") 95 | args = call.get("args", {}) 96 | workflow_dict["tool_calls"].append({name: args}) 97 | # Recurse into all values 98 | for v in obj.values(): 99 | recurse(v) 100 | elif isinstance(obj, list): 101 | for item in obj: 102 | recurse(item) 103 | 104 | recurse(state) 105 | 106 | last_message = state["messages"][-1] 107 | 108 | content = last_message.get("content", {}) 109 | 110 | if isinstance(content, str): 111 | try: 112 | content = json.loads(content) 113 | except json.JSONDecodeError: 114 | pass # keep content as-is if it's not valid JSON 115 | 116 | # Extract result (just the value of the "answer" key if it exists) 117 | if isinstance(content, dict) and "answer" in content: 118 | workflow_dict["result"] = content["answer"] 119 | else: 120 | workflow_dict["result"] = content 121 | 122 | return workflow_dict 123 | --------------------------------------------------------------------------------