├── src
    ├── chemgraph
    │   ├── __init__.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── atomsdata.py
    │   │   ├── graspa_input.py
    │   │   ├── calculators
    │   │   │   ├── emt_calc.py
    │   │   │   ├── aimnet2_calc.py
    │   │   │   ├── mopac_calc.py
    │   │   │   ├── psi4_calc.py
    │   │   │   ├── fairchem_calc.py
    │   │   │   └── nwchem_calc.py
    │   │   ├── supported_models.py
    │   │   └── agent_response.py
    │   ├── tools
    │   │   ├── __init__.py
    │   │   ├── files
    │   │   │   ├── __init__.py
    │   │   │   └── template
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── force_field.def
    │   │   │   │   ├── methane.def
    │   │   │   │   ├── N2.def
    │   │   │   │   ├── CO2.def
    │   │   │   │   ├── TIP4P.def
    │   │   │   │   └── simulation.input
    │   │   ├── local_model_loader.py
    │   │   ├── alcf_loader.py
    │   │   ├── generic_tools.py
    │   │   ├── cheminformatics_tools.py
    │   │   ├── anthropic_loader.py
    │   │   ├── groq_loader.py
    │   │   └── gemini_loader.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── logging_config.py
    │   │   └── get_workflow_from_llm.py
    │   ├── state
    │   │   ├── multi_agent_state.py
    │   │   └── state.py
    │   ├── prompt
    │   │   ├── llama_prompt.py
    │   │   └── single_agent_prompt.py
    │   └── graphs
    │   │   └── mock_agent.py
    └── ui
    │   ├── __init__.py
    │   └── config.py
├── tests
    ├── __init__.py
    ├── conftest.py
    ├── test_llm_agent.py
    └── test_calculators.py
├── docs
    ├── license.md
    ├── code_formatting_and_linting.md
    ├── citation.md
    ├── acknowledgements.md
    ├── index.md
    ├── project_structure.md
    ├── example_usage.md
    ├── running_local_models.md
    └── streamlit_web_interface.md
├── .pre-commit-config.yaml
├── scripts
    └── evaluations
    │   ├── generate_evaluation_data
    │       ├── Exp12
    │       │   └── find_error.py
    │       ├── Exp6
    │       │   └── manual_files
    │       │   │   ├── 2,3,3,3-tetrafluoropropanoic acid.xyz
    │       │   │   ├── (2E,4Z)-3-chlorohexa-2,4-dienedioate.xyz
    │       │   │   ├── 4-bromo-6,8-dioxabicyclo[3.2.1]octane.xyz
    │       │   │   ├── O-ethyl N-prop-2-enylcarbamothioate.xyz
    │       │   │   ├── 2-thiophen-2-yl-3,1-benzoxazin-4-one.xyz
    │       │   │   ├── 2-[4-(hydroxymethyl)phenoxy]acetic acid.xyz
    │       │   │   ├── 4-hydroxy-3-methyl-2-prop-2-enylcyclopent-2-en-1-one.xyz
    │       │   │   ├── 2-ethyl-4-phenyl-1,3-thiazole.xyz
    │       │   │   ├── 6-pyridin-2-ylpyridine-3-sulfonic acid.xyz
    │       │   │   ├── 5-(5-fluoro-2-methoxyphenyl)-1H-pyrazol-3-amine.xyz
    │       │   │   ├── 2-(5-chloropyridin-2-yl)-1H-quinolin-4-one.xyz
    │       │   │   ├── 2-[difluoromethyl(propan-2-yloxy)phosphoryl]oxypropane.xyz
    │       │   │   ├── 12,16-dioxatetracyclo[11.2.1.02,11.03,8]hexadeca-2(11),3,5,7,9-pentaene.xyz
    │       │   │   ├── 3-(4-methylphenyl)-5-pyridin-4-yl-1,2,4-oxadiazole.xyz
    │       │   │   ├── (E)-1-(5-bromo-2-hydroxyphenyl)-3-(4-fluorophenyl)prop-2-en-1-one.xyz
    │       │   │   ├── 1-benzyl-5-nitroindole.xyz
    │       │   │   ├── 2-[(3,4-dichlorophenyl)methyl-(2-hydroxyethyl)amino]ethanol.xyz
    │       │   │   ├── 7-methoxy-1,2-dimethyl-9H-pyrido[3,4-b]indol-2-ium.xyz
    │       │   │   ├── [(E)-(2-chloro-1-methylindol-3-yl)methylideneamino] 3-chlorobenzoate.xyz
    │       │   │   ├── N-benzyl-N-methyl-3,5-dinitrobenzamide.xyz
    │       │   │   └── N-butyl-N-ethyl-3-methyl-2-nitrobenzamide.xyz
    │       └── Exp11
    │       │   └── manual_files
    │       │       ├── C(C(C(=O)O)O)S.xyz
    │       │       ├── CC(COC)O.xyz
    │       │       ├── COC(=O)NS(=O)(=O)OC.xyz
    │       │       ├── CC1=C(C=C(C=C1Cl)[N+](=O)[O-])Cl.xyz
    │       │       ├── CCOC(C(F)(F)F)(C(F)(F)F)O.xyz
    │       │       ├── COC1=C(C=C(C=C1)C(=O)Cl)OC.xyz
    │       │       ├── C1=CC=C(C=C1)OC2=C(C(=O)C(C2(Cl)Cl)(Cl)Cl)Cl.xyz
    │       │       ├── C1=CC(=C(N=C1)Cl)C(=O)NC2=NC=C(C=C2)Cl.xyz
    │       │       ├── C1=CC2=CN(N=C2C=C1)C3=CC(=CC=C3)F.xyz
    │       │       ├── C1=CC=C(C=C1)N2N=C(N=N2)C3=CN=CC=C3.xyz
    │       │       ├── CCCCC1=C(C=C(S1)C)C.xyz
    │       │       ├── CN1C2=C(C=C(C=C2)F)SC1=NC(=O)C3=C(SC(=C3)Cl)Cl.xyz
    │       │       ├── C1CC(C1)(C2=CC=CC3=CC=CC=C32)O.xyz
    │       │       ├── CN1C2=C(C(=O)NC1=O)N(C(=S)N2)CCOC.xyz
    │       │       └── C1=CC=C(C(=C1)C2=NC3=C(C=CC(=C3)F)NC2=O)N.xyz
    │   ├── pubchempy
    │       └── get_molecule_from_pubchempy.py
    │   ├── run_llm_workflow
    │       ├── Exp12_from_reaction_to_enthalpy
    │       │   └── run_llm_workflow.py
    │       ├── Exp14_from_reaction_to_enthalpy_multiagent
    │       │   └── run_llm_workflow.py
    │       ├── Exp13_from_reaction_to_gibbs
    │       │   └── run_llm_workflow.py
    │       ├── Exp15_from_reaction_to_gibbs_multi_agent
    │       │   └── run_llm_workflow.py
    │       ├── Exp8_from_smiles_to_opt
    │       │   └── run_llm_workflow.py
    │       ├── Exp1_from_name_to_smiles
    │       │   └── run_llm_workflow.py
    │       ├── Exp10_from_smiles_to_gibbs
    │       │   └── run_llm_workflow.py
    │       └── Exp7_from_smiles_to_coords
    │       │   └── run_llm_workflow.py
    │   └── mock_llm
    │       └── mock_eval.py
├── .github
    └── workflows
    │   ├── tests.yml
    │   ├── ci.yml
    │   ├── doc_ci.yml
    │   └── conda-tests.yml
├── .gitignore
├── environment.yml
├── docker-compose.yml
├── Dockerfile
├── notebooks
    ├── Demo_infrared_spectrum.ipynb
    └── cif_files
    │   └── calf-20_pacmof.cif
├── config.toml
├── pyproject.toml
├── mkdocs.yml
└── Dockerfile.arm


/src/chemgraph/__init__.py:
--------------------------------------------------------------------------------
1 |  


--------------------------------------------------------------------------------
/src/chemgraph/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/chemgraph/tools/__init__.py:
--------------------------------------------------------------------------------
1 |  


--------------------------------------------------------------------------------
/src/chemgraph/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/chemgraph/tools/files/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/chemgraph/tools/files/template/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # Empty file to make tests a package
2 | 


--------------------------------------------------------------------------------
/docs/license.md:
--------------------------------------------------------------------------------
1 | !!! info
2 |     This project is licensed under the Apache 2.0 License.


--------------------------------------------------------------------------------
/src/chemgraph/tools/files/template/force_field.def:
--------------------------------------------------------------------------------
1 | # rules to overwrite
2 | 0
3 | # number of defined interactions
4 | 0
5 | # mixing rules to overwrite
6 | 0
7 | 


--------------------------------------------------------------------------------
/src/ui/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | ChemGraph UI Package
3 | 
4 | This package contains the user interface components for ChemGraph including
5 | the Streamlit web app and command-line interface.
6 | """
7 | 
8 | __version__ = "0.1.0"
9 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/astral-sh/ruff-pre-commit
 3 |   # Ruff version.
 4 |   rev: v0.9.8
 5 |   hooks:
 6 |     # Run the linter.
 7 |     - id: ruff
 8 |     # Run the formatter.
 9 |     - id: ruff-format
10 | 


--------------------------------------------------------------------------------
/docs/code_formatting_and_linting.md:
--------------------------------------------------------------------------------
1 | This project uses [Ruff](https://github.com/astral-sh/ruff) for **both formatting and linting**. To ensure all code follows our style guidelines, install the pre-commit hook:
2 | 
3 | ```sh
4 | pip install pre-commit
5 | pre-commit install
6 | ```


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp12/find_error.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import sys
 3 | 
 4 | with open(sys.argv[1], "r") as rf:
 5 |     data = json.load(rf)
 6 | 
 7 | for item in data:
 8 |     if "ERROR" in data[item]["manual_workflow"]["result"]['value']:
 9 |         print(item)
10 | 


--------------------------------------------------------------------------------
/src/chemgraph/state/multi_agent_state.py:
--------------------------------------------------------------------------------
 1 | from typing import TypedDict, Annotated
 2 | from langgraph.graph import add_messages
 3 | 
 4 | 
 5 | class ManagerWorkerState(TypedDict):
 6 |     messages: Annotated[list, add_messages]
 7 |     worker_result: Annotated[list, add_messages]
 8 |     current_task_index: int
 9 |     task_list: list
10 |     worker_channel: dict[str, Annotated[list[str], add_messages]]
11 |     current_worker: str
12 | 


--------------------------------------------------------------------------------
/docs/citation.md:
--------------------------------------------------------------------------------
 1 | If you use ChemGraph in your research, please cite our work:
 2 |     
 3 |     ```bibtex
 4 |     @article{pham2025chemgraph,
 5 |     title={ChemGraph: An Agentic Framework for Computational Chemistry Workflows},
 6 |     author={Pham, Thang D and Tanikanti, Aditya and Keçeli, Murat},
 7 |     journal={arXiv preprint arXiv:2506.06363},
 8 |     year={2025}
 9 |     url={https://arxiv.org/abs/2506.06363}
10 |     }
11 |     ```


--------------------------------------------------------------------------------
/docs/acknowledgements.md:
--------------------------------------------------------------------------------
1 | !!! info
2 |     This research used resources of the Argonne Leadership Computing Facility, a U.S.
3 |     Department of Energy (DOE) Office of Science user facility at Argonne National
4 |     Laboratory and is based on research supported by the U.S. DOE Office of Science-
5 |     Advanced Scientific Computing Research Program, under Contract No. DE-AC02-
6 |     06CH11357. Our work leverages ALCF Inference Endpoints, which provide a robust API
7 |     for LLM inference on ALCF HPC clusters via Globus Compute. We are thankful to Serkan
8 |     Altuntaş for his contributions to the user interface of ChemGraph and for insightful
9 |     discussions on AIOps.


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | !!! info "ChemGraph"
2 | 
3 |     **ChemGraph** is an agentic framework that can automate molecular simulation workflows using large language models (LLMs). Built on top of `LangGraph` and `ASE`, ChemGraph allows users to perform complex computational chemistry tasks, from structure generation to thermochemistry calculations, with a natural language interface. 
4 | 
5 | !!! info "ChemGraph"
6 | 
7 |     ChemGraph supports diverse simulation backends, including ab initio quantum chemistry methods (e.g. coupled-cluster, DFT via NWChem, ORCA), semi-empirical methods (e.g., XTB via TBLite), and machine learning potentials (e.g, MACE, UMA) through a modular integration with `ASE`.


--------------------------------------------------------------------------------
/docs/project_structure.md:
--------------------------------------------------------------------------------
 1 | ```
 2 | chemgraph/
 3 | │
 4 | ├── src/                       # Source code
 5 | │   ├── chemgraph/             # Top-level package
 6 | │   │   ├── agent/             # Agent-based task management
 7 | │   │   ├── graphs/            # Workflow graph utilities
 8 | │   │   ├── models/            # Different Pydantic models
 9 | │   │   ├── prompt/            # Agent prompt
10 | │   │   ├── state/             # Agent state
11 | │   │   ├── tools/             # Tools for molecular simulations
12 | │   │   ├── utils/             # Other utility functions
13 | │
14 | ├── pyproject.toml             # Project configuration
15 | └── README.md                  # Project documentation
16 | ```


--------------------------------------------------------------------------------
/src/chemgraph/tools/files/template/methane.def:
--------------------------------------------------------------------------------
 1 | # critical constants: Temperature [T], Pressure [Pa], and Acentric factor [-]
 2 | 190.564
 3 | 4599200.0
 4 | 0.01142
 5 | # Number Of Atoms
 6 | 1
 7 | # Number Of Groups
 8 | 1
 9 | # Alkane-group
10 | rigid
11 | # number of atoms
12 | 1
13 | # atomic positions
14 | 0 CH4_sp3 0.0 0.0 0.0
15 | # Chiral centers Bond  BondDipoles Bend  UrayBradley InvBend  Torsion Imp. Torsion Bond/Bond Bond/Bend Bend/Bend Bond/Torsion Bend/Torsion IntraVDW Intra ch-ch Intra ch-bd Intra bd-bd
16 |                0    0            0    0            0       0        0            0         0         0         0            0            0        0           0           0           0
17 | # Number of config moves
18 | 0


--------------------------------------------------------------------------------
/src/chemgraph/state/state.py:
--------------------------------------------------------------------------------
 1 | from typing import TypedDict, Annotated
 2 | from langgraph.graph import add_messages
 3 | from langgraph.managed.is_last_step import RemainingSteps
 4 | 
 5 | 
 6 | class State(TypedDict):
 7 |     messages: Annotated[list, add_messages]
 8 |     remaining_steps: RemainingSteps
 9 | 
10 | 
11 | class MultiAgentState(TypedDict):
12 |     question: str
13 |     first_router_response: Annotated[list, add_messages]
14 |     regular_response: Annotated[list, add_messages]
15 |     feedback_response: Annotated[list, add_messages]
16 |     geometry_response: Annotated[list, add_messages]
17 |     parameter_response: Annotated[list, add_messages]
18 |     opt_response: Annotated[list, add_messages]
19 |     end_response: Annotated[list, add_messages]
20 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import warnings
 3 | from ase import Atoms
 4 | 
 5 | # Configure pytest-asyncio
 6 | #pytest_plugins = ("pytest_asyncio",)
 7 | 
 8 | 
 9 | @pytest.fixture(autouse=True)
10 | def setup_test_env():
11 |     """Setup any test environment variables or configurations needed"""
12 |     # Filter numpy deprecation warnings
13 |     warnings.filterwarnings(
14 |         "ignore",
15 |         message="In future, it will be an error for 'np.bool_' scalars to be interpreted as an index",
16 |         category=DeprecationWarning,
17 |     )
18 |     pass
19 | 
20 | 
21 | @pytest.fixture
22 | def simple_h2_molecule():
23 |     """Fixture providing a simple H2 molecule for testing"""
24 |     return Atoms("H2", positions=[[0, 0, 0], [0, 0, 1]])
25 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       matrix:
14 |         python-version: ["3.10", "3.11", "3.12"]
15 | 
16 |     steps:
17 |     - uses: actions/checkout@v4
18 |     
19 |     - name: Set up Python ${{ matrix.python-version }}
20 |       uses: actions/setup-python@v5
21 |       with:
22 |         python-version: ${{ matrix.python-version }}
23 |     
24 |     - name: Install dependencies
25 |       run: |
26 |         python -m pip install --upgrade pip
27 |         pip install -e .
28 |             
29 |     - name: Run tests
30 |       run: |
31 |         python -m pytest tests/ -v 


--------------------------------------------------------------------------------
/src/chemgraph/models/atomsdata.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | from typing import List, Optional, Union
 3 | 
 4 | 
 5 | class AtomsData(BaseModel):
 6 |     """AtomsData object inherited from Pydantic BaseModel. Used to store atomic data (from ASE Atoms object or QCElemental Molecule object) that cannot be parsed via LLM Schema."""
 7 | 
 8 |     numbers: List[int] = Field(..., description="Atomic numbers")
 9 |     positions: List[List[float]] = Field(..., description="Atomic positions")
10 |     cell: Optional[Union[List[List[float]], None]] = Field(
11 |         default=None, description="Cell vectors or None"
12 |     )
13 |     pbc: Optional[Union[List[bool], None]] = Field(
14 |         default=None, description="Periodic boundary conditions or None"
15 |     )
16 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/2,3,3,3-tetrafluoropropanoic acid.xyz:
--------------------------------------------------------------------------------
 1 | 11
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -0.42921437       0.54576328      -0.18097090
 4 | C        0.65590010       0.39948988       0.88313998
 5 | O        0.44556271       0.05142040       2.02654375
 6 | O        1.87921107       0.70368394       0.39568866
 7 | C       -0.76626418      -0.82752519      -0.77327532
 8 | F       -1.67797985      -0.69202319      -1.77310340
 9 | F        0.36255084      -1.39606172      -1.30338990
10 | F       -1.26672222      -1.66521933       0.16955693
11 | F       -1.59824360       1.09397592       0.37648418
12 | H       -0.09445923       1.21703144      -0.97603048
13 | H        2.48965873       0.56946458       1.15535650
14 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / cache files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *.pyo
 5 | *.pyd
 6 | *.pyc
 7 | **/__pycache__/
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | eggs/
15 | *.egg-info/
16 | .installed.cfg
17 | *.egg
18 | 
19 | # IDEs / editors
20 | .vscode/
21 | .idea/
22 | *.iml
23 | 
24 | # Pytest cache
25 | .pytest_cache/
26 | 
27 | # MacOS
28 | .DS_Store
29 | 
30 | # Jupyter Notebook checkpoints and modified notebooks
31 | .ipynb_checkpoints/
32 | 
33 | # Generated molecular structure files
34 | *.xyz
35 | # env
36 | chemgraph-env/
37 | # env
38 | .env
39 | # Log files
40 | *run_logs/
41 | *vib/
42 | plots/
43 | initial_evaluations/
44 | test/
45 | test_outputs/
46 | *ir/
47 | 
48 | .venv
49 | combine*
50 | vllm/
51 | logs/
52 | error_log.txt
53 | .env
54 | test.csv
55 | nwchem/
56 | nwchem.nwi
57 | nwchem.nwo
58 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp11/manual_files/C(C(C(=O)O)O)S.xyz:
--------------------------------------------------------------------------------
 1 | 13
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C        1.00384778      -0.38523592      -0.54797190
 4 | C        0.04914690       0.28959097       0.44763385
 5 | C       -1.39897450      -0.09286661       0.11887822
 6 | O       -2.01916183      -0.89337859       0.80273504
 7 | O       -1.89856412       0.52232032      -0.96933914
 8 | O        0.34120627      -0.10532630       1.76844762
 9 | S        2.72560723       0.06029128      -0.26856786
10 | H        0.68866902      -0.13608385      -1.57227673
11 | H        0.95454275      -1.47788978      -0.43084357
12 | H        0.16681909       1.37609294       0.32082756
13 | H       -2.80171964       0.16101959      -1.09142179
14 | H       -0.41176814      -0.67262040       2.02275157
15 | H        2.60034919       1.35408635      -0.60085285
16 | 


--------------------------------------------------------------------------------
/src/chemgraph/tools/files/template/N2.def:
--------------------------------------------------------------------------------
 1 | # critical constants: Temperature [T], Pressure [Pa], and Acentric factor [-]
 2 | 126.192
 3 | 3395800.0
 4 | 0.0372
 5 | #Number Of Atoms
 6 | 3
 7 | # Number of groups
 8 | 1
 9 | # N2-group
10 | rigid
11 | # number of atoms
12 | 3
13 | # atomic positions
14 | 0 N_n2    0.0           0.0           0.55
15 | 1 N_com   0.0           0.0           0.0
16 | 2 N_n2    0.0           0.0          -0.55
17 | # Chiral centers Bond  BondDipoles Bend  UrayBradley InvBend  Torsion Imp. Torsion Bond/Bond Stretch/Bend Bend/Bend Stretch/Torsion Bend/Torsion IntraVDW IntraCoulomb
18 |                0    2            0    0            0       0        0            0         0            0         0               0            0        0            0
19 | # Bond stretch: atom n1-n2, type, parameters
20 | 0 1 RIGID_BOND
21 | 1 2 RIGID_BOND
22 | # Number of config moves
23 | 0
24 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: chemgraph
 2 | channels:
 3 |   - conda-forge
 4 |   - defaults
 5 | dependencies:
 6 |   - python=3.10
 7 |   - pip
 8 |   - numpy=2.2.6
 9 |   - pandas=2.2.3
10 |   - pytest=8.4.1
11 |   - rich=14.1.0
12 |   - toml=0.10.2
13 |   - tblite
14 |   - nwchem
15 |   - pip:
16 |     - ase==3.25.0
17 |     - rdkit==2025.3.3
18 |     - langgraph==0.4.7
19 |     - langchain-openai==0.3.27
20 |     - langchain-ollama==0.3.4
21 |     - langchain-anthropic==0.3.17
22 |     - langchain-google-genai==2.1.7
23 |     - langchain-experimental==0.3.4
24 |     - pydantic==2.11.7
25 |     - pubchempy @ git+https://github.com/keceli/PubChemPy.git@main
26 |     - pyppeteer==2.0.0
27 |     - numexpr==2.11.0
28 |     - deepdiff==8.5.0
29 |     - pymatgen==2025.3.10
30 |     - mace-torch==0.3.13
31 |     - streamlit==1.48.1
32 |     - stmol==0.0.9
33 |     - ipython-genutils==0.2.0
34 |     - langsmith==0.3.45
35 | 


--------------------------------------------------------------------------------
/src/chemgraph/tools/files/template/CO2.def:
--------------------------------------------------------------------------------
 1 | # critical constants: Temperature [T], Pressure [Pa], and Acentric factor [-]
 2 | 304.1282
 3 | 7377300.0
 4 | 0.22394
 5 | #Number Of Atoms
 6 |  3
 7 | # Number of groups
 8 | 1
 9 | # CO2-group
10 | rigid
11 | # number of atoms
12 | 3
13 | # atomic positions
14 | 0 O_co2     0.0           0.0           1.16
15 | 1 C_co2     0.0           0.0           0.0
16 | 2 O_co2     0.0           0.0          -1.16
17 | # Chiral centers Bond  BondDipoles Bend  UrayBradley InvBend  Torsion Imp. Torsion Bond/Bond Stretch/Bend Bend/Bend Stretch/Torsion Bend/Torsion IntraVDW IntraCoulomb
18 |                0    2            0    0            0       0        0            0         0            0         0               0            0        0            0
19 | # Bond stretch: atom n1-n2, type, parameters
20 | 0 1 RIGID_BOND
21 | 1 2 RIGID_BOND
22 | # Number of config moves
23 | 0
24 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: ci 
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master 
 6 |       - main
 7 | permissions:
 8 |   contents: write
 9 | jobs:
10 |   deploy:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |       - name: Configure Git Credentials
15 |         run: |
16 |           git config user.name github-actions[bot]
17 |           git config user.email 41898282+github-actions[bot]@users.noreply.github.com
18 |       - uses: actions/setup-python@v5
19 |         with:
20 |           python-version: 3.x
21 |       - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 
22 |       - uses: actions/cache@v4
23 |         with:
24 |           key: mkdocs-material-${{ env.cache_id }}
25 |           path: ~/.cache 
26 |           restore-keys: |
27 |             mkdocs-material-
28 |       - run: pip install mkdocs-material mkdocstrings 
29 |       - run: mkdocs gh-deploy --force


--------------------------------------------------------------------------------
/.github/workflows/doc_ci.yml:
--------------------------------------------------------------------------------
 1 | name: ci 
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master 
 6 |       - main
 7 | permissions:
 8 |   contents: write
 9 | jobs:
10 |   deploy:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |       - name: Configure Git Credentials
15 |         run: |
16 |           git config user.name github-actions[bot]
17 |           git config user.email 41898282+github-actions[bot]@users.noreply.github.com
18 |       - uses: actions/setup-python@v5
19 |         with:
20 |           python-version: 3.x
21 |       - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 
22 |       - uses: actions/cache@v4
23 |         with:
24 |           key: mkdocs-material-${{ env.cache_id }}
25 |           path: ~/.cache 
26 |           restore-keys: |
27 |             mkdocs-material-
28 |       - run: pip install mkdocs-material mkdocstrings 
29 |       - run: mkdocs gh-deploy --force
30 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/(2E,4Z)-3-chlorohexa-2,4-dienedioate.xyz:
--------------------------------------------------------------------------------
 1 | 14
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -0.64730736       0.09619728      -0.33420021
 4 | C       -1.80821817      -0.58266614      -0.15480842
 5 | C       -3.09305511       0.05431106      -0.62691524
 6 | O       -3.14716006       1.17966897      -1.17746243
 7 | O       -4.13916989      -0.63191938      -0.42927409
 8 | C        0.69181070      -0.33036490       0.04320689
 9 | C        1.74013497       0.50736498      -0.22733803
10 | C        3.22920399       0.21442251       0.10470721
11 | O        3.86137097       1.21920583      -0.29979015
12 | O        3.58026368      -0.82311811       0.65083607
13 | Cl       0.86979259      -1.86063274       0.80520889
14 | H       -0.72356606       1.07414240      -0.81637298
15 | H       -1.87870796      -1.56170131       0.30911345
16 | H        1.46460771       1.44508954      -0.71558121
17 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp11/manual_files/CC(COC)O.xyz:
--------------------------------------------------------------------------------
 1 | 16
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -0.81962116       1.70764166      -0.06452879
 4 | C       -0.80183077       0.41645332      -0.86626935
 5 | C       -0.04037893      -0.71141845      -0.17098697
 6 | O        1.32912270      -0.33050277      -0.02459644
 7 | C        1.93364947      -1.01377730       1.10128185
 8 | O       -2.17574572      -0.00834329      -0.98496038
 9 | H       -1.38285981       2.48233917      -0.60525242
10 | H        0.21382505       2.04932411       0.10318951
11 | H       -1.31441058       1.53469647       0.90262650
12 | H       -0.35878195       0.58491092      -1.86857514
13 | H       -0.11901067      -1.63098879      -0.77714979
14 | H       -0.51420646      -0.91312095       0.80831435
15 | H        1.83881233      -2.11001251       1.00967694
16 | H        1.46716848      -0.69092466       2.04797261
17 | H        2.99702259      -0.74404813       1.12365620
18 | H       -2.25275458      -0.62222881      -1.73439870
19 | 


--------------------------------------------------------------------------------
/src/chemgraph/tools/files/template/TIP4P.def:
--------------------------------------------------------------------------------
 1 | # critical constants: Temperature [T], Pressure [Pa], and Acentric factor [-]
 2 | 647.14
 3 | 22064000.0
 4 | -0.217000
 5 | # total number Of atoms
 6 | 4
 7 | # Number of groups
 8 | 1
 9 | # water-group
10 | rigid
11 | # number of atoms
12 | 4
13 | # atomic positions
14 | 0 Ow       0.0                     0.0                     0.0
15 | 1 Lw       0.0                     0.15                    0.0
16 | 2 Hw       0.75695                 0.58588                 0.0
17 | 3 Hw      -0.75695                 0.58588                 0.0
18 | # Chiral centers Bond  BondDipoles Bend  UrayBradley InvBend  Torsion Imp. Torsion Bond/Bond Stretch/Bend Bend/Bend Stretch/Torsion Bend/Torsion IntraVDW IntraCoulomb
19 |                0    3            0    0            0       0        0            0         0            0         0               0            0        0            0
20 | # Bond stretch: atom n1-n2, type, parameters
21 | 0 1 RIGID_BOND
22 | 0 2 RIGID_BOND
23 | 0 3 RIGID_BOND
24 | # Number of config moves
25 | 0
26 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp11/manual_files/COC(=O)NS(=O)(=O)OC.xyz:
--------------------------------------------------------------------------------
 1 | 17
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C        3.40533405       0.62715737      -0.92584574
 4 | O        1.97621695       0.51194174      -1.22767368
 5 | C        1.30129897      -0.09239765      -0.22027639
 6 | O        1.77688841      -0.47267031       0.83971654
 7 | N       -0.03609888      -0.21259915      -0.55390420
 8 | S       -1.14799451      -1.00638160       0.41687490
 9 | O       -1.37768557      -0.23607091       1.61254663
10 | O       -0.81675854      -2.39929729       0.49060642
11 | O       -2.39449186      -0.89522648      -0.57161233
12 | C       -3.23563589       0.32307626      -0.50934639
13 | H        3.85126829       1.13355478      -1.78904861
14 | H        3.83844326      -0.37145746      -0.78043720
15 | H        3.54542981       1.20628117      -0.00401067
16 | H       -0.36123434       0.08341704      -1.47605076
17 | H       -4.08454324       0.11950718      -1.17156706
18 | H       -2.67593481       1.19401016      -0.88183713
19 | H       -3.56450209       0.48715514       0.52186590
20 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp11/manual_files/CC1=C(C=C(C=C1Cl)[N+](=O)[O-])Cl.xyz:
--------------------------------------------------------------------------------
 1 | 17
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -2.68647667       0.17089214       0.22470915
 4 | C       -1.20242586       0.07862890       0.09273598
 5 | C       -0.37191167       1.20048682       0.26911239
 6 | C        1.01788531       1.14102607       0.12475770
 7 | C        1.59285018      -0.09457504      -0.16774733
 8 | C        0.82815472      -1.25066055      -0.31580521
 9 | C       -0.55791260      -1.14548511      -0.16308530
10 | Cl      -1.49399421      -2.59111101      -0.27856442
11 | N        3.02655750      -0.18440015      -0.30170633
12 | O        3.51466410      -1.29832641      -0.55202879
13 | O        3.68754379       0.85648266      -0.15552081
14 | Cl      -1.07273434       2.71781299       0.69938171
15 | H       -2.98894010       0.00301689       1.27265224
16 | H       -3.03903940       1.16740477      -0.06948307
17 | H       -3.17485341      -0.59394289      -0.39213375
18 | H        1.62857409       2.03027778       0.24665059
19 | H        1.29205856      -2.20752785      -0.53392476
20 | 


--------------------------------------------------------------------------------
/src/chemgraph/tools/files/template/simulation.input:
--------------------------------------------------------------------------------
 1 | NumberOfInitializationCycles NCYCLE
 2 | NumberOfEquilibrationCycles  0
 3 | NumberOfProductionCycles     NCYCLE
 4 | UseMaxStep no
 5 | MaxStepPerCycle 1
 6 | 
 7 | RestartFile no
 8 | BMCBiasingMethod LJ_Biasing
 9 | NumberOfTrialPositions 10
10 | NumberOfTrialOrientations 10
11 | NumberOfBlocks 1
12 | AdsorbateAllocateSpace 30240
13 | 
14 | NumberOfSimulations 1
15 | SingleSimulation yes
16 | DifferentFrameworks yes
17 | 
18 | UseChargesFromCIFFile         yes
19 | InputFileType cif
20 | FrameworkName CIFFILE
21 | UnitCells 0 UC_X UC_Y UC_Z
22 | ChargeMethod Ewald
23 | Temperature TEMPERATURE
24 | Pressure PRESSURE
25 | OverlapCriteria 1e5
26 | CutOffVDW CUTOFF
27 | CutOffCoulomb CUTOFF
28 | EwaldPrecision 1e-6
29 | 
30 | Component 0 MoleculeName              ADSORBATE
31 |              IdealGasRosenbluthWeight  1.0
32 |              FugacityCoefficient       PR-EOS
33 |              TranslationProbability    1.0
34 |              RotationProbability       1.0
35 |              ReinsertionProbability    1.0
36 |              SwapProbability           2.0
37 |              CreateNumberOfMolecules   0
38 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/4-bromo-6,8-dioxabicyclo[3.2.1]octane.xyz:
--------------------------------------------------------------------------------
 1 | 18
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C        0.05053435       1.35046328       0.29006857
 4 | C        1.41008147       0.74682489      -0.09417775
 5 | C        1.38471943      -0.79256398      -0.01996791
 6 | C       -0.01594470      -1.32155878      -0.44930532
 7 | O       -0.86557039      -1.40848065       0.71071331
 8 | C       -1.81522497      -0.34973720       0.61502010
 9 | C       -1.13229767       0.60354391      -0.37612824
10 | O       -0.61117015      -0.36591474      -1.32002558
11 | Br       2.79794226      -1.58316052      -1.12649141
12 | H        0.01291449       2.41044487      -0.00392254
13 | H       -0.08046942       1.35504892       1.38240538
14 | H        2.20569322       1.14622396       0.55641724
15 | H        1.64740958       1.06407165      -1.11889638
16 | H        1.59244953      -1.14933620       1.00162077
17 | H        0.03020595      -2.32154432      -0.92546482
18 | H       -2.03724324       0.05624007       1.61311610
19 | H       -2.75467334      -0.71895227       0.17056087
20 | H       -1.81935639       1.27838711      -0.90554236
21 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp11/manual_files/CCOC(C(F)(F)F)(C(F)(F)F)O.xyz:
--------------------------------------------------------------------------------
 1 | 19
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C        2.70030713       0.26420240       0.21550443
 4 | C        1.43675465       0.15520309      -0.62108370
 5 | O        0.42525697      -0.35554679       0.27728375
 6 | C       -0.88743996       0.07463389       0.05375973
 7 | C       -1.50096126      -0.62150599      -1.19550431
 8 | F       -2.74150900      -0.15316444      -1.46849014
 9 | F       -1.57039007      -1.96403342      -0.99365374
10 | F       -0.73365277      -0.40794227      -2.29786761
11 | C       -1.62030256      -0.35394994       1.36869014
12 | F       -1.31014302      -1.61845503       1.73425300
13 | F       -1.23012067       0.47423038       2.38899338
14 | F       -2.96650396      -0.24735492       1.25306464
15 | O       -1.03733050       1.45014855      -0.14122354
16 | H        2.95266276      -0.70786968       0.65999632
17 | H        3.52521910       0.58885142      -0.43363079
18 | H        2.58048812       0.99973563       1.02316216
19 | H        1.56514840      -0.55661283      -1.45192659
20 | H        1.11882015       1.11321011      -1.05041468
21 | H       -0.70630350       1.86621984       0.67908754
22 | 


--------------------------------------------------------------------------------
/tests/test_llm_agent.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from chemgraph.agent.llm_agent import ChemGraph
 3 | from unittest.mock import Mock, patch
 4 | from langchain_core.messages import HumanMessage, AIMessage
 5 | 
 6 | 
 7 | @pytest.fixture
 8 | def mock_llm():
 9 |     return Mock()
10 | 
11 | 
12 | def test_chemgraph_initialization():
13 |     with patch("chemgraph.agent.llm_agent.load_openai_model") as mock_load:
14 |         mock_load.return_value = Mock()
15 |         agent = ChemGraph(model_name="gpt-4o-mini")
16 |         assert hasattr(agent, "workflow")
17 | 
18 | 
19 | def test_agent_query(mock_llm):
20 |     with patch("chemgraph.agent.llm_agent.load_openai_model") as mock_load:
21 |         # Set up the mock chain
22 |         mock_chain = Mock()
23 |         mock_chain.invoke.return_value = AIMessage(content="Test response")
24 |         mock_llm.bind_tools.return_value = mock_chain
25 |         mock_load.return_value = mock_llm
26 | 
27 |         agent = ChemGraph(model_name="gpt-4o-mini")
28 |         response = agent.run("What is the SMILES string for water?")
29 |         assert isinstance(response, AIMessage)
30 |         assert response.content == "Test response"
31 |         mock_llm.bind_tools.assert_called_once()
32 |         mock_chain.invoke.assert_called_once()
33 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/O-ethyl N-prop-2-enylcarbamothioate.xyz:
--------------------------------------------------------------------------------
 1 | 20
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -3.39341162       0.53753488      -1.16676706
 4 | C       -2.64858950       0.24585368       0.12713894
 5 | O       -1.28060367      -0.10710400      -0.27943644
 6 | C       -0.41118834      -0.41851446       0.70734394
 7 | S       -0.79121949      -0.47843632       2.32779723
 8 | N        0.81379453      -0.67016100       0.19236523
 9 | C        1.99751944      -0.93293789       0.97641690
10 | C        2.94752830       0.25475090       0.89960236
11 | C        4.14391390       0.06239887       0.33644359
12 | H       -2.93768038       1.37688481      -1.71511847
13 | H       -3.41550381      -0.34552904      -1.82431634
14 | H       -4.42613714       0.80679724      -0.90439041
15 | H       -3.07749169      -0.59535081       0.69225971
16 | H       -2.59989473       1.11309930       0.80355507
17 | H        0.93764227      -0.55583905      -0.81486908
18 | H        1.69932610      -1.15639047       2.01357573
19 | H        2.54664319      -1.78246346       0.56297740
20 | H        2.69414250       1.26514757       1.25467221
21 | H        3.74623274       0.59392115      -1.91883556
22 | H        3.45523892       0.78567152      -2.59226925
23 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp11/manual_files/COC1=C(C=C(C=C1)C(=O)Cl)OC.xyz:
--------------------------------------------------------------------------------
 1 | 22
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C        3.37740223       0.54684544      -0.07018300
 4 | O        2.05480450       1.11300858      -0.04362760
 5 | C        1.04444535       0.21032138      -0.00788364
 6 | C       -0.26044010       0.75858704       0.01843820
 7 | C       -1.35818667      -0.08155260       0.05564732
 8 | C       -1.21310052      -1.48843915       0.06829408
 9 | C        0.08495637      -2.03172675       0.04208580
10 | C        1.18735401      -1.17666659       0.00459286
11 | C       -2.46518744      -2.27070884       0.10869525
12 | O       -3.57388155      -1.77356471       0.13072714
13 | Cl      -2.27854677      -4.01877942       0.12420847
14 | O       -0.30502424       2.11722340       0.00382685
15 | C       -1.62545853       2.69424367       0.03019294
16 | H        3.52299145      -0.08267384      -0.96152693
17 | H        3.56777236      -0.06195562       0.82709370
18 | H        4.07464924       1.39279150      -0.09743876
19 | H       -2.37593390       0.30726415       0.07663077
20 | H        0.22512270      -3.11020569       0.05105814
21 | H        2.18673797      -1.61944371      -0.01529997
22 | H       -2.17128809       2.40735421       0.94264359
23 | H       -1.48311612       3.78143391       0.01404878
24 | H       -2.21607225       2.38664361      -0.84693860
25 | 


--------------------------------------------------------------------------------
/src/chemgraph/utils/logging_config.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | 
 4 | 
 5 | def setup_logger(name=None, level=logging.INFO):
 6 |     """Set up a logger with consistent formatting.
 7 | 
 8 |     This function configures a logger with a standard format that includes
 9 |     timestamp, logger name, log level, and message. It ensures that handlers
10 |     are not duplicated if the logger already exists.
11 | 
12 |     Parameters
13 |     ----------
14 |     name : str, optional
15 |         Logger name. If None, returns the root logger, by default None
16 |     level : int, optional
17 |         Logging level (e.g., logging.INFO, logging.DEBUG), by default logging.INFO
18 | 
19 |     Returns
20 |     -------
21 |     logging.Logger
22 |         Configured logger instance with the specified name and level
23 | 
24 |     Notes
25 |     -----
26 |     The logger format includes:
27 |     - Timestamp
28 |     - Logger name
29 |     - Log level
30 |     - Message
31 |     """
32 |     logger = logging.getLogger(name)
33 | 
34 |     if not logger.handlers:  # Only add handler if none exists
35 |         handler = logging.StreamHandler(sys.stdout)
36 |         formatter = logging.Formatter(
37 |             "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
38 |         )
39 |         handler.setFormatter(formatter)
40 |         logger.addHandler(handler)
41 | 
42 |     logger.setLevel(level)
43 |     return logger
44 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.8'
 2 | 
 3 | services:
 4 |   jupyter_lab:
 5 |     build:
 6 |       context: .
 7 |       dockerfile: Dockerfile
 8 |     ports:
 9 |       - "8888:8888"
10 |     volumes:
11 |       - .:/app
12 |     environment:
13 |       - VLLM_BASE_URL=http://vllm_server:8000/v1
14 |       - OPENAI_API_KEY=dummy-key
15 |     depends_on:
16 |       - vllm_server
17 | 
18 |   vllm_server:
19 |     build:
20 |       context: ./vllm
21 |       dockerfile: docker/Dockerfile.arm
22 |     command: ["--host", "0.0.0.0", "--port", "8000", "--model", "meta-llama/Llama-3.2-3B-Instruct", "--enable-auto-tool-choice","--tool-call-parser","llama3_json","--max-model-len", "10240","--tensor-parallel-size", "1","--max-num-seqs", "16"]
23 |     ports:
24 |       - "8001:8000"
25 |     privileged: true
26 |     shm_size: '8g'
27 |     deploy:
28 |       resources:
29 |         limits:
30 |           memory: 12G
31 |         reservations:
32 |           memory: 10G
33 |     environment:
34 |       - VLLM_LOG_LEVEL=debug
35 |       - HF_TOKEN=${HF_TOKEN}
36 |       - OMP_NUM_THREADS=8
37 |     networks:
38 |       default:
39 |         aliases:
40 |           - vllm_server_alias
41 | 
42 | # Networks allow services to communicate with each other using their service names as hostnames.
43 | # A default network is created if not specified, but explicit definition can be useful.
44 | networks:
45 |   default:
46 |     driver: bridge 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/2-thiophen-2-yl-3,1-benzoxazin-4-one.xyz:
--------------------------------------------------------------------------------
 1 | 23
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -4.06752083       0.13006338       0.01315105
 4 | C       -3.51086393       1.39092744       0.28974183
 5 | C       -2.12383516       1.55881851       0.34132498
 6 | C       -1.27074009       0.47428239       0.11838257
 7 | C       -1.83936129      -0.80038599      -0.16130370
 8 | C       -3.22824435      -0.96411271      -0.21201573
 9 | C       -0.93769778      -1.92040845      -0.39130996
10 | O       -1.23700688      -3.07576321      -0.64238879
11 | O        0.44531847      -1.63558237      -0.31469849
12 | C        0.87703335      -0.35008296      -0.03422954
13 | N        0.09768286       0.65957113       0.17348829
14 | C        2.30098693      -0.19408625       0.01521710
15 | C        3.02718893       0.95975335       0.27076680
16 | C        4.43744507       0.72494617       0.23627216
17 | C        4.76948510      -0.58285675      -0.04039612
18 | S        3.37783510      -1.55895869      -0.26531752
19 | H       -5.15032389      -0.00564620      -0.02811293
20 | H       -4.15089363       2.25843463       0.46854744
21 | H       -1.66690699       2.52523897       0.55366450
22 | H       -3.61407711      -1.96000847      -0.42987805
23 | H        2.53191842       1.91272794       0.46952253
24 | H        5.18459135       1.50157741       0.41118314
25 | H        5.74798636      -1.04844927      -0.12922444
26 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/2-[4-(hydroxymethyl)phenoxy]acetic acid.xyz:
--------------------------------------------------------------------------------
 1 | 23
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -1.78228967      -0.75223556       1.10702194
 4 | C       -1.74492850      -0.25555097      -0.19536297
 5 | C       -0.53020607       0.19222813      -0.72144465
 6 | C        0.61900232       0.20222132       0.07297350
 7 | C        0.56324549      -0.30354175       1.37174734
 8 | C       -0.63649548      -0.78988066       1.91067415
 9 | C       -0.66688232      -1.31192533       3.32928594
10 | O        0.41442244      -2.20390563       3.59751162
11 | O       -0.54451828       0.64278466      -2.02706650
12 | C        0.68515029       0.52706420      -2.74097578
13 | C        1.41565263       1.85779203      -2.69751733
14 | O        2.32581429       2.13652964      -1.92935212
15 | O        0.93049709       2.72992011      -3.61503581
16 | H       -2.73352582      -1.12489478       1.50428686
17 | H       -2.63539304      -0.22368545      -0.82651100
18 | H        1.54426136       0.61315310      -0.33172193
19 | H        1.46479924      -0.32747775       1.99190520
20 | H       -1.64183826      -1.79143380       3.52230154
21 | H       -0.57784551      -0.47253349       4.03226710
22 | H        0.30594094      -2.93103087       2.95770420
23 | H        0.43359152       0.28207530      -3.77709473
24 | H        1.33733310      -0.24752212      -2.31684392
25 | H        1.45421224       3.55184965      -3.50636556
26 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp11/manual_files/C1=CC=C(C=C1)OC2=C(C(=O)C(C2(Cl)Cl)(Cl)Cl)Cl.xyz:
--------------------------------------------------------------------------------
 1 | 23
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -3.56028835       0.65992908       0.65641427
 4 | C       -2.78692304       1.27890179      -0.33119018
 5 | C       -1.58956060       0.69864223      -0.75687624
 6 | C       -1.16918984      -0.48337974      -0.15724839
 7 | C       -1.93526879      -1.13831634       0.80180308
 8 | C       -3.13080049      -0.54992661       1.21629167
 9 | O        0.02946156      -1.08870856      -0.57756017
10 | C        1.23999002      -0.60096298      -0.27072982
11 | C        2.32950230      -0.98651283      -0.98023353
12 | C        3.52645303      -0.35141525      -0.41805433
13 | O        4.63509796      -0.25740970      -0.89351721
14 | C        3.11164968       0.20600576       1.00342424
15 | C        1.56812921       0.43969637       0.80533755
16 | Cl       1.23560201       2.07203889       0.07449963
17 | Cl       0.64786200       0.28709380       2.29967967
18 | Cl       3.99822768       1.61859332       1.50013029
19 | Cl       3.45745649      -1.14248915       2.15706557
20 | Cl       2.32002809      -2.00143552      -2.31883786
21 | H       -4.49724684       1.11378642       0.98562907
22 | H       -3.11156362       2.21510464      -0.79413058
23 | H       -0.99355451       1.15619636      -1.54846164
24 | H       -1.59982095      -2.09353761       1.21393000
25 | H       -3.72524302      -1.05189439       1.98458019
26 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/4-hydroxy-3-methyl-2-prop-2-enylcyclopent-2-en-1-one.xyz:
--------------------------------------------------------------------------------
 1 | 23
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -0.92244947       1.97684007       0.60877263
 4 | C       -0.79756539       0.63783133      -0.02944534
 5 | C        0.31358535      -0.07482890      -0.31142830
 6 | C       -0.06729455      -1.35748620      -0.97177334
 7 | O        0.71555426      -2.11435733      -1.52932536
 8 | C       -1.58688029      -1.52417641      -0.80692148
 9 | C       -2.03614725      -0.06729797      -0.56677878
10 | O       -2.36806189       0.59562618      -1.78598563
11 | C        1.75548110       0.26862115      -0.16188391
12 | C        2.37896899      -0.65464485       0.84778169
13 | C        2.81861778      -0.25022789       2.04591157
14 | H       -1.43667702       1.91447720       1.58122917
15 | H        0.05405460       2.45486066       0.77317668
16 | H       -1.53419308       2.63135234      -0.03333287
17 | H       -1.76290013      -2.11918185       0.10404728
18 | H       -2.06706157      -2.05384412      -1.63977647
19 | H       -2.87903414       0.02395405       0.14059553
20 | H       -3.11684305       0.14392181      -2.20995419
21 | H        2.23428497       0.10130341      -1.14089620
22 | H        1.88529751       1.31703000       0.13158503
23 | H        2.42615522      -1.70126022       0.54640042
24 | H        3.22722205      -0.94342530       2.77234941
25 | H        2.76588597       0.79491285       2.34803955
26 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/2-ethyl-4-phenyl-1,3-thiazole.xyz:
--------------------------------------------------------------------------------
 1 | 24
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -4.14824094      -0.29450964      -0.15922779
 4 | C       -3.29146738      -0.14796810       1.10768600
 5 | C       -1.96763110       0.46371552       0.77350119
 6 | N       -0.88847843      -0.17617800       0.44271421
 7 | C        0.16961172       0.68354696       0.12584331
 8 | C       -0.16090922       2.02146441       0.20358312
 9 | S       -1.79159508       2.24165718       0.67186174
10 | C        1.44541834       0.11027264      -0.26357949
11 | C        1.59384707      -1.28743380      -0.34360504
12 | C        2.81360354      -1.85261252      -0.72069918
13 | C        3.90868507      -1.03669609      -1.03342931
14 | C        3.77403588       0.35589618      -0.95986411
15 | C        2.55702852       0.92384341      -0.57778207
16 | H       -3.71555993      -1.02583215      -0.85498393
17 | H       -5.15788058      -0.62554355       0.10932295
18 | H       -4.23235517       0.67265295      -0.67938920
19 | H       -3.13200699      -1.11776532       1.60017333
20 | H       -3.80041613       0.51467108       1.81830900
21 | H        0.46303797       2.88983524       0.00787692
22 | H        0.72904555      -1.91145778      -0.10263084
23 | H        2.89910752      -2.94092802      -0.76930451
24 | H        4.85906592      -1.48269342      -1.33281346
25 | H        4.61617818       1.01004052      -1.19987556
26 | H        2.45787566       2.01202230      -0.51776280
27 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/6-pyridin-2-ylpyridine-3-sulfonic acid.xyz:
--------------------------------------------------------------------------------
 1 | 24
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C        3.79593614      -0.60949634       1.06970341
 4 | C        4.48942328      -0.23981251      -0.08774576
 5 | C        3.73258219       0.15344118      -1.20428098
 6 | N        2.40096692       0.19139189      -1.20585929
 7 | C        1.72952927      -0.16369064      -0.10869843
 8 | C        2.40087325      -0.57475639       1.06410633
 9 | C        0.24502031      -0.11519031      -0.15848658
10 | N       -0.41717277      -0.56407022       0.91521668
11 | C       -1.75346908      -0.54063571       0.92797473
12 | C       -2.51089278      -0.04785012      -0.15178099
13 | C       -1.82601022       0.42263858      -1.29632739
14 | C       -0.43837594       0.37974143      -1.29862143
15 | S       -4.25492746       0.02542083      -0.05691741
16 | O       -4.83259927      -0.07646214      -1.38004319
17 | O       -4.72015017      -0.81571577       1.01711055
18 | O       -4.43604861       1.57640280       0.40533922
19 | H        4.32818796      -0.92212403       1.96880978
20 | H        5.57606247      -0.25081042      -0.13807716
21 | H        4.22390837       0.45304780      -2.13843362
22 | H        1.79435051      -0.85127914       1.92347927
23 | H       -2.25622132      -0.92502685       1.81906926
24 | H       -2.39322918       0.78910056      -2.15182275
25 | H        0.16301595       0.71785458      -2.14003609
26 | H       -5.04075983       1.98788092      -0.24775374
27 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp11/manual_files/C1=CC(=C(N=C1)Cl)C(=O)NC2=NC=C(C=C2)Cl.xyz:
--------------------------------------------------------------------------------
 1 | 24
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -4.02171952      -0.46021289      -0.56177786
 4 | C       -2.66335269      -0.19973692      -0.75307494
 5 | C       -1.94551166       0.51278673       0.23072664
 6 | C       -2.65980629       0.90829175       1.38076987
 7 | N       -3.95029163       0.65820536       1.56009862
 8 | C       -4.62382289      -0.00460828       0.61985273
 9 | Cl      -1.84840370       1.73219144       2.65689389
10 | C       -0.49989603       0.89598562       0.03221158
11 | O       -0.11313625       2.06044678      -0.06063592
12 | N        0.30041071      -0.21211083      -0.06865179
13 | C        1.69180402      -0.27615047      -0.27231777
14 | N        2.16145240      -1.51855584      -0.38140873
15 | C        3.45311444      -1.71892730      -0.60268825
16 | C        4.37468872      -0.65702058      -0.72597599
17 | C        3.88217326       0.65153612      -0.59042618
18 | C        2.52319550       0.84208545      -0.35598403
19 | Cl       6.02659929      -0.96715221      -1.02639818
20 | H       -4.59837760      -0.99557761      -1.31408746
21 | H       -2.14806725      -0.52620363      -1.66163700
22 | H       -5.68812465      -0.17499253       0.81945498
23 | H       -0.12842093      -1.13121179       0.03781445
24 | H        3.80716411      -2.74920171      -0.69599836
25 | H        4.57181691       1.49439049      -0.67523289
26 | H        2.09651174       1.83574284      -0.24560294
27 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda3:latest
 2 | 
 3 | # Set the working directory in the container
 4 | WORKDIR /app
 5 | 
 6 | # Copy project files
 7 | COPY . /app
 8 | 
 9 | # Install system dependencies
10 | RUN apt-get update && apt-get install -y --no-install-recommends \
11 |     build-essential \
12 |     git \
13 |     gfortran \
14 |     liblapack-dev \
15 |     pkg-config \
16 |     cmake \
17 |     # Dependencies for headless Chrome (pyppeteer)
18 |     libx11-xcb1 libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 libgbm1 libasound2 \
19 |     && rm -rf /var/lib/apt/lists/*
20 | 
21 | # Create conda environment with tblite, rdkit and other dependencies that are hard to install with pip
22 | RUN conda install -c conda-forge -c rdkit -c pytorch \
23 |     python=3.11 \
24 |     "pytorch<2.6" \
25 |     cpuonly \
26 |     tblite=0.4.0 \
27 |     rdkit \
28 |     -y
29 | 
30 | # Install Python dependencies using modified pyproject.toml (excluding problematic packages)
31 | RUN grep -v "tblite\|rdkit\|torch<2.6" pyproject.toml > temp_pyproject.toml && \
32 |     mv temp_pyproject.toml pyproject.toml
33 | 
34 | # Install packages using pip
35 | RUN pip install --no-cache-dir .
36 | 
37 | # Install JupyterLab
38 | RUN pip install --no-cache-dir jupyterlab
39 | 
40 | # Expose JupyterLab port
41 | EXPOSE 8888
42 | 
43 | # Command to run JupyterLab
44 | CMD ["jupyter", "lab", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root", "--LabApp.token=''"]


--------------------------------------------------------------------------------
/src/chemgraph/models/graspa_input.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | 
 3 | 
 4 | class GRASPAInputSchema(BaseModel):
 5 |     output_path: str = Field(
 6 |         description="Absolute or relative path to the directory where gRASPA output files will be stored. If not provided, the output will be stored in the current working directory."
 7 |     )
 8 |     cif_path: str = Field(
 9 |         description="Absolute or relative path to the directory where the CIF file is stored."
10 |     )
11 |     mof_name: str = Field(description="Name of the MOF excluding .cif extension")
12 |     adsorbate: str = Field(
13 |         default='CO2', description="Name of the adsorbate molecule. Only support CO2, H2, CH4 and N2."
14 |     )
15 |     temperature: float = Field(default=300, description="Simulation temperature in Kelvin.")
16 |     pressure: float = Field(default=1e5, description="Simulation pressure in Pascal.")
17 |     n_cycle: int = Field(
18 |         default=100, description="Number of Monte Carlo steps to run in the GCMC simulation."
19 |     )
20 |     cutoff: float = Field(default=12.8, description="The LJ and Coulomb cutoff in Angstrom")
21 |     graspa_cmd: str= Field(
22 |         default="/eagle/projects/HPCBot/thang/soft/gRASPA/src_clean/nvc_main.x > raspa.err 2> raspa.log",
23 |         description="The command to run gRASPA. If not provided, the default command will be used."
24 |     )
25 |     graspa_version: str = Field(
26 |         default="cuda",
27 |         description="The version of gRASPA to use. Only support 'cuda' and 'sycl'."
28 |     )


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp11/manual_files/C1=CC2=CN(N=C2C=C1)C3=CC(=CC=C3)F.xyz:
--------------------------------------------------------------------------------
 1 | 25
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -4.28024087       0.69776109      -0.31950683
 4 | C       -3.10706483       1.45628322      -0.34819174
 5 | C       -1.87705954       0.80532049      -0.20737098
 6 | C       -0.53420916       1.20684142      -0.17232760
 7 | N        0.22390118       0.08775505      -0.00244734
 8 | N       -0.55824779      -1.05826166       0.07980126
 9 | C       -1.82915870      -0.62892868      -0.04405301
10 | C       -3.00961072      -1.38029928      -0.02473540
11 | C       -4.22892055      -0.70553360      -0.15923964
12 | C        1.62974213      -0.01837770       0.09661175
13 | C        2.22024037      -1.27322520       0.28756368
14 | C        3.61130271      -1.34983551       0.38686495
15 | C        4.42679233      -0.21059977       0.29890806
16 | C        3.80113682       1.02453253       0.10685678
17 | C        2.41851145       1.13886220       0.00503758
18 | F        4.18308033      -2.57491101       0.57557461
19 | H       -5.24136675       1.20627674      -0.42242692
20 | H       -3.12728453       2.53832603      -0.47417802
21 | H       -0.08722373       2.19349083      -0.25305767
22 | H       -2.96255947      -2.46028725       0.09500423
23 | H       -5.15165654      -1.29085382      -0.13883039
24 | H        1.59614985      -2.16554744       0.35855318
25 | H        5.51086988      -0.30575986       0.38146332
26 | H        4.40954185       1.92921870       0.03604563
27 | H        1.96333430       2.11936289      -0.14191951
28 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/5-(5-fluoro-2-methoxyphenyl)-1H-pyrazol-3-amine.xyz:
--------------------------------------------------------------------------------
 1 | 25
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -1.43088448       1.87061261       2.10546559
 4 | O       -0.60388531       1.15997605       1.15007001
 5 | C       -1.19176927       0.13467001       0.47587289
 6 | C       -0.35784511      -0.65638573      -0.35791118
 7 | C       -0.90961910      -1.77321863      -1.01696726
 8 | C       -2.26690539      -2.07040621      -0.88129079
 9 | C       -3.10320287      -1.26670718      -0.09565682
10 | C       -2.54737068      -0.17371877       0.56911574
11 | F       -2.76061124      -3.15960742      -1.54087026
12 | C        1.04395260      -0.31050151      -0.54556744
13 | C        1.79603667       0.84456074      -0.27828168
14 | C        3.11526289       0.54547210      -0.74974181
15 | N        3.17651823      -0.68059875      -1.27414623
16 | N        1.90601782      -1.17908142      -1.14080088
17 | N        4.20254044       1.36069175      -0.75194880
18 | H       -2.25562082       2.39632493       1.60469240
19 | H       -0.76603321       2.59452227       2.58878732
20 | H       -1.84792260       1.18583300       2.85836734
21 | H       -0.30741884      -2.42313699      -1.66312180
22 | H       -4.16683341      -1.50140446      -0.00942188
23 | H       -3.19050250       0.45556474       1.18537077
24 | H        1.43646116       1.75709618       0.17798504
25 | H        1.71253194      -2.14910709      -1.41084135
26 | H        5.09141651       0.89507494      -0.89009317
27 | H        4.22568655       2.14347486      -0.10906574
28 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp11/manual_files/C1=CC=C(C=C1)N2N=C(N=N2)C3=CN=CC=C3.xyz:
--------------------------------------------------------------------------------
 1 | 26
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C        4.92321522       0.30533439       0.09541241
 4 | C        4.44503916       0.73203908      -1.15611121
 5 | C        3.07553148       0.73581239      -1.44227193
 6 | C        2.17567214       0.30547875      -0.45859628
 7 | C        2.62656914      -0.12425991       0.79564493
 8 | C        4.00070894      -0.12110427       1.06398536
 9 | N        0.83037803       0.31224885      -0.74799986
10 | N       -0.11041664      -0.07819680       0.11943749
11 | C       -1.26687139       0.08811860      -0.56382656
12 | N       -1.00834222       0.57307727      -1.82460026
13 | N        0.27986723       0.71250636      -1.94249039
14 | C       -2.57968852      -0.21371161      -0.01125863
15 | C       -2.66403517      -0.70510182       1.30198882
16 | N       -3.82509036      -1.00566381       1.88165416
17 | C       -4.95280515      -0.83777366       1.19989667
18 | C       -4.97775287      -0.34994079      -0.12572729
19 | C       -3.76807939      -0.03103853      -0.74524477
20 | H        5.99149423       0.30626592       0.30814470
21 | H        5.13568023       1.06910317      -1.92945063
22 | H        2.68885827       1.06382088      -2.40816172
23 | H        1.89015526      -0.45131404       1.53266139
24 | H        4.34079728      -0.45794899       2.04441783
25 | H       -1.74362497      -0.85049217       1.88105950
26 | H       -5.87871524      -1.09931619       1.72131342
27 | H       -5.92549049      -0.22864822      -0.64506508
28 | H       -3.70305418       0.35070516      -1.76554779
29 | 


--------------------------------------------------------------------------------
/src/chemgraph/tools/local_model_loader.py:
--------------------------------------------------------------------------------
 1 | from langchain_ollama import ChatOllama
 2 | from chemgraph.models.supported_models import supported_ollama_models
 3 | 
 4 | 
 5 | def load_ollama_model(model_name: str, temperature: float) -> ChatOllama:
 6 |     """Load an Ollama chat model into LangChain.
 7 | 
 8 |     This function loads a local Ollama model and configures it for use with
 9 |     LangChain. It verifies that the requested model is supported before
10 |     attempting to load it.
11 | 
12 |     Parameters
13 |     ----------
14 |     model_name : str
15 |         The name of the Ollama model to load. See supported_ollama_models for list
16 |         of supported models.
17 |     temperature : float
18 |         Controls the randomness of the generated text. Higher values (e.g., 0.8)
19 |         make the output more random, while lower values (e.g., 0.2) make it more
20 |         deterministic.
21 | 
22 |     Returns
23 |     -------
24 |     ChatOllama
25 |         An instance of LangChain's ChatOllama model.
26 | 
27 |     Raises
28 |     ------
29 |     ValueError
30 |         If the specified model is not in the list of supported models.
31 | 
32 |     Notes
33 |     -----
34 |     The model must be installed locally using Ollama before it can be loaded.
35 |     """
36 |     if model_name not in supported_ollama_models:
37 |         raise ValueError(
38 |             f"Unsupported model '{model_name}'. Supported models are: {supported_ollama_models}."
39 |         )
40 | 
41 |     llm = ChatOllama(
42 |         model=model_name,
43 |         temperature=temperature,
44 |     )
45 |     print(f"Successfully loaded model: {model_name}")
46 |     return llm
47 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp11/manual_files/CCCCC1=C(C=C(S1)C)C.xyz:
--------------------------------------------------------------------------------
 1 | 27
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -4.13360146       0.54251440      -0.49789302
 4 | C       -3.15698444      -0.24747785       0.36445580
 5 | C       -1.72611062       0.18387998       0.07924535
 6 | C       -0.72765225      -0.62188234       0.91222156
 7 | C        0.68596682      -0.24803776       0.60652748
 8 | C        1.55383493      -0.78795473      -0.29971067
 9 | C        2.82736616      -0.09416980      -0.32602527
10 | C        2.89715098       0.97207579       0.52667981
11 | S        1.38723720       1.13178082       1.39260564
12 | C        4.01170170       1.91628639       0.78961249
13 | C        1.20873746      -1.96614516      -1.15407525
14 | H       -5.17232524       0.24667568      -0.30221240
15 | H       -3.93449459       0.37748807      -1.56793217
16 | H       -4.05021381       1.62170966      -0.29940954
17 | H       -3.25597275      -1.32825632       0.16514537
18 | H       -3.37903582      -0.10454052       1.43536299
19 | H       -1.51426405       0.05491856      -0.99704139
20 | H       -1.59392496       1.25998862       0.29041354
21 | H       -0.93922402      -0.45183738       1.97921507
22 | H       -0.88907324      -1.69366220       0.72221507
23 | H        3.64998560      -0.40114698      -0.97907849
24 | H        4.31182508       1.89824821       1.84453749
25 | H        4.87454869       1.64183274       0.17738270
26 | H        3.73033022       2.94945922       0.55186358
27 | H        0.27677065      -1.80151411      -1.71582264
28 | H        2.00320649      -2.18020660      -1.87833920
29 | H        1.05421529      -2.87002640      -0.54643583
30 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/2-(5-chloropyridin-2-yl)-1H-quinolin-4-one.xyz:
--------------------------------------------------------------------------------
 1 | 27
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -4.45283597      -0.24918104       0.67526354
 4 | C       -4.09197634       0.96804644       0.07001052
 5 | C       -2.75447487       1.23312608      -0.22030760
 6 | C       -1.76212117       0.28348657       0.09149315
 7 | C       -2.12183769      -0.93862154       0.69884765
 8 | C       -3.47064835      -1.18631545       0.98259463
 9 | C       -1.09125009      -1.95103078       1.03470372
10 | O       -1.37591705      -3.03986318       1.57131260
11 | C        0.25720656      -1.56187711       0.68594190
12 | C        0.57409685      -0.35621451       0.09154223
13 | N       -0.42676415       0.52775310      -0.18898551
14 | C        1.96623241       0.03194877      -0.26226534
15 | N        2.91229901      -0.86600499       0.03156717
16 | C        4.18550648      -0.62263723      -0.24067508
17 | C        4.62332840       0.57046685      -0.84450118
18 | C        3.65328814       1.52976976      -1.16352875
19 | C        2.32063787       1.25572104      -0.86969003
20 | Cl       6.27668953       0.84230861      -1.17770588
21 | H       -5.49714619      -0.46604632       0.90649222
22 | H       -4.84958944       1.71422872      -0.17777604
23 | H       -2.47266814       2.18045158      -0.69147453
24 | H       -3.70304593      -2.14277046       1.45174078
25 | H        1.06142067      -2.25361740       0.90282135
26 | H       -0.19975567       1.41852127      -0.62725967
27 | H        4.91601273      -1.39479574       0.02244836
28 | H        3.95490258       2.46692406      -1.63249593
29 | H        1.56840983       2.00622290      -1.12011427
30 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp11/manual_files/CN1C2=C(C=C(C=C2)F)SC1=NC(=O)C3=C(SC(=C3)Cl)Cl.xyz:
--------------------------------------------------------------------------------
 1 | 28
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -0.88810101      -2.05386180       0.20954546
 4 | N       -1.07406374      -0.62984432      -0.02471721
 5 | C       -2.33268645      -0.04054808      -0.10950492
 6 | C       -2.27396702       1.33460913      -0.37724971
 7 | C       -3.41484963       2.11920131      -0.49523359
 8 | C       -4.64881952       1.48583299      -0.32694931
 9 | C       -4.74533377       0.11205244      -0.05757523
10 | C       -3.57668739      -0.64849400       0.05014505
11 | F       -5.77932437       2.25104731      -0.43115528
12 | S       -0.63002869       1.87897073      -0.52600771
13 | C       -0.03744017       0.22967582      -0.19954787
14 | N        1.19580568      -0.19012975      -0.09472671
15 | C        2.27730851       0.63844275      -0.27479333
16 | O        2.22890617       1.83367752      -0.61013140
17 | C        3.58448388      -0.00514490      -0.04003410
18 | C        3.87953641      -1.29517132       0.37825122
19 | S        5.60087942      -1.54987262       0.49382495
20 | C        5.94195583       0.08164026      -0.01857810
21 | C        4.78738942       0.77961410      -0.25714462
22 | Cl       7.53428618       0.56882171      -0.14762264
23 | Cl       2.89695132      -2.59205533       0.78383952
24 | H        0.18046490      -2.26146426       0.32549742
25 | H       -1.41382169      -2.34912791       1.12397106
26 | H       -1.27785650      -2.62708468      -0.64022427
27 | H       -3.35896528       3.18845300      -0.70863137
28 | H       -5.72215961      -0.36579301       0.06753720
29 | H       -3.67218274      -1.71550850       0.26257354
30 | H        4.73831984       1.82206142      -0.57889930
31 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/2-[difluoromethyl(propan-2-yloxy)phosphoryl]oxypropane.xyz:
--------------------------------------------------------------------------------
 1 | 28
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -3.11656194      -0.30863386      -0.73427443
 4 | C       -2.33927007       0.92199733      -0.31605861
 5 | C       -3.13388231       1.83995672       0.59553404
 6 | O       -1.15184614       0.55239311       0.48426285
 7 | P       -0.05687130      -0.43414235      -0.09580116
 8 | O       -0.11439903      -0.71279925      -1.55081588
 9 | C       -0.24803525      -1.88569886       1.04258026
10 | F       -1.45812879      -2.51580720       0.86798195
11 | F        0.74502073      -2.81433749       0.82261136
12 | O        1.29441084       0.21536952       0.43596362
13 | C        2.54989418      -0.27482348      -0.16236182
14 | C        2.84614232       0.60142233      -1.35621512
15 | C        3.58587299      -0.17244580       0.93824806
16 | H       -3.41243418      -0.89782445       0.14492961
17 | H       -2.53128723      -0.93714123      -1.41558991
18 | H       -4.02469860       0.03044783      -1.24920774
19 | H       -1.96281840       1.46389551      -1.19590275
20 | H       -4.03412042       2.16831820       0.05683973
21 | H       -2.55098936       2.72288128       0.88864308
22 | H       -3.44126950       1.30777851       1.50847249
23 | H       -0.18429630      -1.54976859       2.08931449
24 | H        2.39772454      -1.31914016      -0.46850949
25 | H        3.78872067       0.26600162      -1.80877356
26 | H        2.03948326       0.52545445      -2.09615326
27 | H        2.95604410       1.65045353      -1.04357890
28 | H        3.68904007       0.86847173       1.27617425
29 | H        4.54904481      -0.51755197       0.53815407
30 | H        3.31951031      -0.79472698       1.80353280
31 | 


--------------------------------------------------------------------------------
/src/chemgraph/tools/alcf_loader.py:
--------------------------------------------------------------------------------
 1 | from langchain_openai import ChatOpenAI
 2 | from chemgraph.models.supported_models import supported_alcf_models
 3 | 
 4 | 
 5 | def load_alcf_model(model_name: str, base_url: str, api_key: str = None) -> ChatOpenAI:
 6 |     """
 7 |     Load an models from ALCF inference endpoints (https://github.com/argonne-lcf/inference-endpoints).
 8 | 
 9 |     Parameters
10 |     ----------
11 |     model_name : str
12 |         The name of the model to load. See supported_alcf_models for list of supported models.
13 |     base_url : str
14 |         The base URL of the API endpoint.
15 |     api_key : str, optional
16 |         The OpenAI API key. If not provided, the function will attempt to retrieve it
17 |         from the environment variable `OPENAI_API_KEY`.
18 | 
19 |     Returns
20 |     -------
21 |     ChatOpenAI
22 |         An instance of LangChain's ChatOpenAI model.
23 | 
24 |     Raises
25 |     ------
26 |     ValueError
27 |         If the API key is not provided and cannot be retrieved from the environment.
28 |     """
29 | 
30 |     if api_key is None:
31 |         raise ValueError("API key (access token) is not found")
32 | 
33 |     if model_name not in supported_alcf_models:
34 |         raise ValueError(
35 |             f"Model {model_name} is not supported on ALCF yet. Supported models are: {supported_alcf_models}"
36 |         )
37 |     try:
38 |         llm = ChatOpenAI(
39 |             model=model_name,
40 |             base_url=base_url,
41 |             api_key=api_key,
42 |         )
43 |         print(llm.max_tokens)
44 |         print(f"Successfully loaded model: {model_name} from {base_url}")
45 | 
46 |     except Exception as e:
47 |         print(f"Error with loading {model_name}")
48 |         print(e)
49 | 
50 |     return llm
51 | 


--------------------------------------------------------------------------------
/notebooks/Demo_infrared_spectrum.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "id": "953a0ae8-c496-4286-8619-17844af03c4c",
 7 |    "metadata": {},
 8 |    "outputs": [],
 9 |    "source": [
10 |     "from chemgraph.agent.llm_agent import ChemGraph\n",
11 |     "\n",
12 |     "workflow_type = \"multi_agent\"\n",
13 |     "\n",
14 |     "cg = ChemGraph(\n",
15 |     "    model_name='gpt-4o', \n",
16 |     "    workflow_type = workflow_type, \n",
17 |     "    structured_output=False, \n",
18 |     "    return_option=\"state\",\n",
19 |     "    )\n",
20 |     "cg.visualize()"
21 |    ]
22 |   },
23 |   {
24 |    "cell_type": "code",
25 |    "execution_count": null,
26 |    "id": "5f385ade-f22d-4ecc-840a-5d3dca57b8d5",
27 |    "metadata": {},
28 |    "outputs": [],
29 |    "source": [
30 |     "# Calculate the infrared absorption spectrum of water\n",
31 |     "query = \"Calculate the infrared absorption spectrum of water using ASE and TBLite\"\n",
32 |     "result = cg.run(query, config={\"configurable\": {\"thread_id\": 3}})"
33 |    ]
34 |   },
35 |   {
36 |    "cell_type": "code",
37 |    "execution_count": null,
38 |    "id": "ac27ce46",
39 |    "metadata": {},
40 |    "outputs": [],
41 |    "source": []
42 |   }
43 |  ],
44 |  "metadata": {
45 |   "kernelspec": {
46 |    "display_name": "chemgraph",
47 |    "language": "python",
48 |    "name": "python3"
49 |   },
50 |   "language_info": {
51 |    "codemirror_mode": {
52 |     "name": "ipython",
53 |     "version": 3
54 |    },
55 |    "file_extension": ".py",
56 |    "mimetype": "text/x-python",
57 |    "name": "python",
58 |    "nbconvert_exporter": "python",
59 |    "pygments_lexer": "ipython3",
60 |    "version": "3.10.18"
61 |   }
62 |  },
63 |  "nbformat": 4,
64 |  "nbformat_minor": 5
65 | }
66 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/12,16-dioxatetracyclo[11.2.1.02,11.03,8]hexadeca-2(11),3,5,7,9-pentaene.xyz:
--------------------------------------------------------------------------------
 1 | 28
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C        2.18649854      -0.27145213      -1.26134769
 4 | C        3.11410383      -0.67645617      -0.09224732
 5 | C        2.90503000       0.45667328       0.94572969
 6 | O        2.41835999       1.55374675       0.17431488
 7 | C        1.41764663       0.93187514      -0.66750549
 8 | C        0.30052783       0.44906967       0.23105597
 9 | C        0.63498550       0.07540467       1.52298316
10 | O        1.92965845       0.09884244       1.95372934
11 | C       -0.34594372      -0.37651939       2.43515896
12 | C       -1.64874923      -0.48279829       2.02364362
13 | C       -2.03381836      -0.18344221       0.69126154
14 | C       -3.36817871      -0.33199942       0.24856769
15 | C       -3.71286788      -0.11089598      -1.06790333
16 | C       -2.72581596       0.26913677      -2.00032332
17 | C       -1.41799063       0.43427211      -1.58478402
18 | C       -1.03073762       0.25210615      -0.23245660
19 | H        2.78309032       0.09828369      -2.10565705
20 | H        1.51802244      -1.06163873      -1.63243190
21 | H        2.92497658      -1.66806429       0.33862326
22 | H        4.16775612      -0.64641028      -0.40128872
23 | H        3.81860123       0.73572781       1.50474667
24 | H        1.08318543       1.65724571      -1.41852205
25 | H       -0.01674861      -0.62844262       3.44065269
26 | H       -2.41975619      -0.83049131       2.71562758
27 | H       -4.11505406      -0.64036768       0.98361211
28 | H       -4.74834180      -0.24132105      -1.38764473
29 | H       -2.97448523       0.42973425      -3.05157470
30 | H       -0.64395490       0.70818113      -2.30602023
31 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp11/manual_files/C1CC(C1)(C2=CC=CC3=CC=CC=C32)O.xyz:
--------------------------------------------------------------------------------
 1 | 29
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -3.32437796       0.29799744       1.07308272
 4 | C       -2.85672815       0.69184829      -0.34087939
 5 | C       -1.59468160      -0.17634537      -0.12646757
 6 | C       -1.87957021      -0.13327105       1.39927860
 7 | C       -0.25569700       0.31885007      -0.62376215
 8 | C       -0.16651635       1.34694612      -1.54330498
 9 | C        1.08196512       1.90777099      -1.89044702
10 | C        2.23397873       1.41688652      -1.32446168
11 | C        2.19688356       0.32054255      -0.43151507
12 | C        3.38290655      -0.20112761       0.13618644
13 | C        3.35535183      -1.30654441       0.95905664
14 | C        2.13029875      -1.94860362       1.22965095
15 | C        0.95773796      -1.45443734       0.68773664
16 | C        0.94495485      -0.28920860      -0.11391632
17 | O       -1.89222330      -1.48976961      -0.54129030
18 | H       -4.00275674      -0.56455527       1.04627807
19 | H       -3.73078310       1.10816832       1.68688585
20 | H       -2.60526695       1.75853153      -0.38740162
21 | H       -3.47751169       0.37931063      -1.18525871
22 | H       -1.73969165      -1.08979042       1.91145637
23 | H       -1.31002167       0.67395033       1.88229807
24 | H       -1.07505402       1.74867022      -1.99700570
25 | H        1.11580820       2.73867471      -2.59813466
26 | H        3.20811307       1.84860119      -1.56079367
27 | H        4.32439228       0.29746311      -0.10382124
28 | H        4.28129027      -1.69303237       1.38802840
29 | H        2.09337507      -2.83858214       1.86188190
30 | H        0.01410155      -1.97058138       0.86778136
31 | H       -1.41027742      -1.69836285      -1.36114196
32 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp11/manual_files/CN1C2=C(C(=O)NC1=O)N(C(=S)N2)CCOC.xyz:
--------------------------------------------------------------------------------
 1 | 29
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C        3.43125121       1.85659763      -0.04076604
 4 | N        2.57854521       0.68275966       0.06556276
 5 | C        1.38078866       0.57964864      -0.55099334
 6 | C        0.56547477      -0.52893030      -0.50170189
 7 | C        0.92264548      -1.68367985       0.25387835
 8 | O        0.28282563      -2.73318935       0.38041523
 9 | N        2.15125973      -1.50240997       0.90469080
10 | C        3.01252388      -0.40595582       0.85231560
11 | O        4.08733659      -0.38207014       1.45119189
12 | N       -0.54206437      -0.24236969      -1.27040320
13 | C       -0.47657724       1.03841099      -1.76023834
14 | S       -1.59210187       1.85674665      -2.63195526
15 | N        0.77372641       1.52016511      -1.32417641
16 | C       -1.71480491      -1.09618954      -1.41574130
17 | C       -2.50271925      -1.11372775      -0.11969700
18 | O       -2.60406830       0.22939551       0.35357713
19 | C       -3.57075603       0.31160638       1.41330681
20 | H        3.00171112       2.69005227       0.52850684
21 | H        3.53477905       2.13214006      -1.09645752
22 | H        4.40363324       1.58976566       0.37386014
23 | H        2.48400974      -2.28020567       1.48261451
24 | H        1.05877174       2.48570627      -1.49696687
25 | H       -1.39519310      -2.10855111      -1.68387579
26 | H       -2.30461643      -0.65847069      -2.22610965
27 | H       -2.00357632      -1.75117368       0.62828846
28 | H       -3.50315815      -1.53246666      -0.31157002
29 | H       -4.58752741       0.07016517       1.05721689
30 | H       -3.56358447       1.34535174       1.78534927
31 | H       -3.30853462      -0.36912151       2.24412058
32 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp11/manual_files/C1=CC=C(C(=C1)C2=NC3=C(C=CC(=C3)F)NC2=O)N.xyz:
--------------------------------------------------------------------------------
 1 | 29
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -4.02938231       1.28992441       0.05757502
 4 | C       -4.59162838       0.01158521       0.16071654
 5 | C       -3.76256907      -1.10292695       0.21176614
 6 | C       -2.35711773      -0.98818438       0.15324218
 7 | C       -1.76452036       0.33542259       0.09858286
 8 | C       -2.64310250       1.43122830       0.02079840
 9 | C       -0.31519789       0.50935286       0.06018861
10 | N        0.45932733      -0.52571509      -0.22370305
11 | C        1.81179981      -0.44622405      -0.33083766
12 | C        2.47397279       0.78395030      -0.09938841
13 | C        3.86203968       0.86678910      -0.21416942
14 | C        4.61698190      -0.25119629      -0.55568550
15 | C        3.95935402      -1.47601846      -0.77887828
16 | C        2.57904411      -1.58443879      -0.66929225
17 | F        4.68119860      -2.58811219      -1.11127876
18 | N        1.67366150       1.84942752       0.24382517
19 | C        0.29350029       1.83069696       0.35360918
20 | O       -0.29869235       2.86258787       0.69657243
21 | N       -1.58672253      -2.09652470       0.20100952
22 | H       -4.65789368       2.18106040       0.00751747
23 | H       -5.67364570      -0.12626890       0.20357945
24 | H       -4.17202297      -2.11231016       0.28961876
25 | H       -2.19974833       2.42444786      -0.04423974
26 | H        4.35325829       1.82400881      -0.03337554
27 | H        5.70288785      -0.18155132      -0.64710621
28 | H        2.06892746      -2.53430042      -0.84003660
29 | H        2.10344700       2.75224137       0.45325047
30 | H       -1.99827828      -3.00301402       0.01897851
31 | H       -0.58887859      -1.93593788      -0.01480556
32 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/3-(4-methylphenyl)-5-pyridin-4-yl-1,2,4-oxadiazole.xyz:
--------------------------------------------------------------------------------
 1 | 29
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C        5.67830305       0.39194717      -0.30580249
 4 | C        4.22060466       0.11294013      -0.09922305
 5 | C        3.23599815       0.95391405      -0.63950495
 6 | C        1.87587750       0.67776800      -0.47672575
 7 | C        1.48319743      -0.46145916       0.23614962
 8 | C        2.44723034      -1.31847103       0.78468861
 9 | C        3.80303731      -1.02425094       0.61084484
10 | C        0.06475641      -0.77769424       0.41786970
11 | N       -0.38792218      -1.82130028       1.06731845
12 | O       -1.83822633      -1.65117563       0.94593610
13 | C       -2.06570271      -0.51314260       0.22790574
14 | N       -0.91665307       0.05304610      -0.11612726
15 | C       -3.40385396      -0.03249089      -0.08662581
16 | C       -3.55732927       1.14717769      -0.83490453
17 | C       -4.85536963       1.59496803      -1.12836845
18 | N       -5.96445142       0.95871171      -0.73315721
19 | C       -5.81318078      -0.16158503      -0.02307638
20 | C       -4.56690321      -0.70442933       0.32962179
21 | H        5.86680514       1.46351878      -0.43519839
22 | H        6.27364598       0.02577482       0.53839401
23 | H        6.04526216      -0.11863135      -1.20996784
24 | H        3.54334113       1.84474720      -1.19479344
25 | H        1.09724619       1.32339992      -0.89046868
26 | H        2.08654937      -2.19250234       1.33092356
27 | H        4.56340877      -1.68547447       1.03701236
28 | H       -2.67807499       1.69685754      -1.17708665
29 | H       -4.99687599       2.51184978      -1.71071285
30 | H       -6.73103650      -0.66881279       0.29102242
31 | H       -4.50968353      -1.62520082       0.91209027
32 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/(E)-1-(5-bromo-2-hydroxyphenyl)-3-(4-fluorophenyl)prop-2-en-1-one.xyz:
--------------------------------------------------------------------------------
 1 | 29
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -3.03211903      -1.06098584      -0.29219236
 4 | C       -4.35781944      -0.86956820      -0.67304482
 5 | C       -4.85365791       0.43343553      -0.76682192
 6 | C       -4.02924746       1.53270763      -0.48236520
 7 | C       -2.71153607       1.31306802      -0.11010990
 8 | C       -2.16710428       0.01501800      -0.00416667
 9 | C       -0.76996905      -0.17816120       0.34646871
10 | C       -0.17537867      -1.37845180       0.57538141
11 | C        1.24364050      -1.59135243       0.93805734
12 | O        1.56944410      -2.56303586       1.62818402
13 | C        2.27469214      -0.60235124       0.47720606
14 | C        2.33890694      -0.08821277      -0.82853657
15 | C        3.33856397       0.83714956      -1.16271887
16 | C        4.32105892       1.19348998      -0.24714808
17 | C        4.32113775       0.59578101       1.02252265
18 | C        3.32400653      -0.31825307       1.36401233
19 | Br       5.67503933       1.00577762       2.26347482
20 | O        1.43668825      -0.52586862      -1.76419351
21 | F       -6.15245147       0.64390340      -1.14334310
22 | H       -2.65267619      -2.08218709      -0.22829337
23 | H       -4.99886776      -1.72729476      -0.90100137
24 | H       -4.42486342       2.55033256      -0.56170820
25 | H       -2.07357044       2.17597832       0.10583139
26 | H       -0.17004909       0.73674608       0.40642242
27 | H       -0.76827345      -2.29264833       0.64060306
28 | H        3.34822731       1.26606706      -2.17219390
29 | H        5.11012009       1.90665238      -0.48706158
30 | H        3.35955653      -0.79563132       2.34633573
31 | H        1.67650135      -0.13210462      -2.62156677
32 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/1-benzyl-5-nitroindole.xyz:
--------------------------------------------------------------------------------
 1 | 31
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -3.83550144       1.28694623       1.22766501
 4 | C       -3.69365463      -0.10407484       1.32784616
 5 | C       -2.88720063      -0.79241377       0.41469456
 6 | C       -2.20871392      -0.09183451      -0.58532509
 7 | C       -2.37374428       1.29252252      -0.70524484
 8 | C       -3.17342826       1.98420415       0.20931041
 9 | C       -1.26302740      -0.83047475      -1.50150844
10 | N       -0.04399908      -1.26754106      -0.80759101
11 | C        0.13764305      -2.48108921      -0.18371592
12 | C        1.38042210      -2.49080314       0.43292571
13 | C        2.00418155      -1.22018501       0.18858192
14 | C        1.06851997      -0.47012255      -0.60406251
15 | C        1.32509304       0.83343025      -1.03402470
16 | C        2.53229622       1.42738362      -0.66121878
17 | C        3.46456795       0.69846190       0.12247404
18 | C        3.20785838      -0.62307824       0.54861454
19 | N        4.69541875       1.31731314       0.48259085
20 | O        4.91730373       2.49116363       0.09954673
21 | O        5.52143117       0.67096304       1.17097513
22 | H       -4.46503284       1.82544697       1.93867607
23 | H       -4.20865677      -0.65689427       2.11574637
24 | H       -2.76812853      -1.87781596       0.47816516
25 | H       -1.85862238       1.81610481      -1.51567920
26 | H       -3.27639858       3.06767860       0.12168668
27 | H       -0.95840885      -0.17834252      -2.32467112
28 | H       -1.75125312      -1.73044736      -1.89599723
29 | H       -0.62532814      -3.25114350      -0.23069386
30 | H        1.81772932      -3.29815387       1.00327688
31 | H        0.59746035       1.36745740      -1.64255700
32 | H        2.78001476       2.44230030      -0.95855363
33 | H        3.94115850      -1.15696198       1.14751591
34 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/2-[(3,4-dichlorophenyl)methyl-(2-hydroxyethyl)amino]ethanol.xyz:
--------------------------------------------------------------------------------
 1 | 31
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C        1.41113719       0.30441220       1.00289002
 4 | C        2.47059921      -0.27620603       1.70558428
 5 | C        3.58473472      -0.77301007       1.01716525
 6 | C        3.63079667      -0.66764851      -0.40067747
 7 | C        2.56308069      -0.07352057      -1.08179085
 8 | C        1.44483513       0.38733918      -0.38442607
 9 | C        0.26571228       1.00283839      -1.13060152
10 | N       -0.99223124       0.37333679      -0.80620107
11 | C       -2.15085300       1.26796039      -0.75997395
12 | C       -2.15658672       2.02066522       0.56562004
13 | O       -1.82677253       1.20433917       1.68481878
14 | C       -1.22398160      -0.98664877      -1.26884511
15 | C       -1.44614435      -1.94930691      -0.11662729
16 | O       -2.39107692      -1.34489096       0.78707470
17 | Cl       4.96527505      -1.26105103      -1.28358624
18 | Cl       4.86713822      -1.49498966       1.87896628
19 | H        0.54137125       0.69589860       1.53784332
20 | H        2.43173712      -0.35089725       2.78774167
21 | H        2.59627398       0.01048808      -2.16756402
22 | H        0.46023354       0.94327311      -2.21726974
23 | H        0.17436071       2.06569061      -0.85824868
24 | H       -3.05516676       0.65363025      -0.85187841
25 | H       -2.08873661       1.99439674      -1.58638079
26 | H       -3.15630082       2.45080623       0.73737625
27 | H       -1.41713141       2.83256145       0.52818149
28 | H       -2.05943980       0.28013971       1.41307976
29 | H       -2.12490335      -0.99305252      -1.90506145
30 | H       -0.35350126      -1.31206533      -1.85341286
31 | H       -1.86694299      -2.88883540      -0.50074602
32 | H       -0.50013162      -2.13608660       0.41333282
33 | H       -2.59738478      -1.97956651       1.49306566
34 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/7-methoxy-1,2-dimethyl-9H-pyrido[3,4-b]indol-2-ium.xyz:
--------------------------------------------------------------------------------
 1 | 32
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -2.91071297       1.93710299       0.39682845
 4 | C       -2.37610623       0.56400510       0.20288669
 5 | N       -3.23287414      -0.45124443      -0.09367959
 6 | C       -2.81445118      -1.73453628      -0.28198818
 7 | C       -1.47995456      -2.08785943      -0.19082162
 8 | C       -0.55787649      -1.08428071       0.09057440
 9 | C       -1.03098669       0.24163831       0.28348975
10 | N        0.06556436       1.05433635       0.51753142
11 | C        1.22862332       0.30072620       0.48241581
12 | C        0.86899480      -1.04671013       0.21917684
13 | C        1.86620396      -2.02586178       0.12268138
14 | C        3.19080140      -1.65624659       0.28499981
15 | C        3.54558618      -0.29765054       0.54254973
16 | C        2.54473066       0.70412448       0.64841113
17 | O        4.87161812      -0.02938629       0.67115349
18 | C        5.27891528       1.33582958       0.91275840
19 | C       -4.66458255      -0.16266480      -0.23897319
20 | H       -3.70377543       1.97400320       1.15621315
21 | H       -3.31748469       2.36336887      -0.53258825
22 | H       -2.10367014       2.59596153       0.73244698
23 | H       -3.60981494      -2.44459959      -0.50688697
24 | H       -1.15662290      -3.11290842      -0.33853443
25 | H        0.04003619       2.05538232       0.68957074
26 | H        1.57732056      -3.05620092      -0.07698955
27 | H        4.00116176      -2.37860213       0.21873845
28 | H        2.78339960       1.75190441       0.84730761
29 | H        6.36564822       1.27918514       0.98204041
30 | H        4.85548567       1.71643709       1.85313750
31 | H        4.98351023       1.99146315       0.08117460
32 | H       -4.85257231       0.51372598      -1.08263087
33 | H       -5.07166112       0.29533922       0.66986375
34 | H       -5.18445398      -1.10578187      -0.42218245
35 | 


--------------------------------------------------------------------------------
/src/chemgraph/models/calculators/emt_calc.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | 
 3 | 
 4 | class EMTCalc(BaseModel):
 5 |     """Effective Medium Theory (EMT) calculator configuration.
 6 | 
 7 |     This class defines the configuration parameters for the EMT calculator,
 8 |     which is a simple empirical potential for metals. It provides a fast
 9 |     approximation for metallic systems.
10 | 
11 |     Parameters
12 |     ----------
13 |     calculator_type : str, optional
14 |         Calculator type. Currently supports only 'emt', by default 'emt'
15 |     asap_cutoff : bool, optional
16 |         If True, the cutoff mimics how ASAP does it; the global cutoff is
17 |         chosen from the largest atom present in the simulation, by default False
18 | 
19 |     Notes
20 |     -----
21 |     The EMT calculator is a simple empirical potential that works well for
22 |     metallic systems. It is particularly useful for quick calculations and
23 |     as a starting point for more accurate methods.
24 |     """
25 | 
26 |     calculator_type: str = Field(
27 |         default="emt", description="Calculator type. Currently supports only 'emt'."
28 |     )
29 |     asap_cutoff: bool = Field(
30 |         default=False,
31 |         description="If True, the cutoff mimics how ASAP does it; the global cutoff is chosen from the largest atom present in the simulation.",
32 |     )
33 | 
34 |     def get_calculator(self):
35 |         """Get an ASE-compatible EMT calculator instance.
36 | 
37 |         Returns
38 |         -------
39 |         EMT
40 |             An ASE-compatible EMT calculator instance with the specified
41 |             configuration parameters
42 | 
43 |         Raises
44 |         ------
45 |         ValueError
46 |             If an invalid calculator_type is specified
47 |         """
48 |         if self.calculator_type != "emt":
49 |             raise ValueError("Invalid calculator_type. The only valid option is 'emt'.")
50 | 
51 |         from ase.calculators.emt import EMT
52 | 
53 |         return EMT(asap_cutoff=self.asap_cutoff)
54 | 


--------------------------------------------------------------------------------
/config.toml:
--------------------------------------------------------------------------------
 1 | [general]
 2 | model = "gemini-2.5-flash"
 3 | workflow = "single_agent"
 4 | output = "state"
 5 | structured = true
 6 | report = true
 7 | thread = 1
 8 | recursion_limit = 20
 9 | verbose = false
10 | 
11 | [logging]
12 | level = "INFO"
13 | file = "./chemgraph.log"
14 | console = true
15 | format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
16 | 
17 | [features]
18 | enable_experimental = false
19 | enable_cache = true
20 | cache_dir = "./cache"
21 | cache_expiry = 24
22 | 
23 | [security]
24 | validate_keys = true
25 | rate_limit = true
26 | max_requests_per_minute = 60
27 | 
28 | [api.openai]
29 | base_url = "https://api.openai.com/v1"
30 | timeout = 30
31 | 
32 | [api.groq]
33 | base_url = "https://api.groq.com/openai/v1"
34 | timeout = 30
35 | 
36 | [api.anthropic]
37 | base_url = "https://api.anthropic.com"
38 | timeout = 30
39 | 
40 | [api.google]
41 | base_url = "https://generativelanguage.googleapis.com/v1beta"
42 | timeout = 30
43 | 
44 | [api.local]
45 | base_url = "http://localhost:11434"
46 | timeout = 60
47 | 
48 | [chemistry.optimization]
49 | method = "BFGS"
50 | fmax = 0.05
51 | steps = 200
52 | 
53 | [chemistry.frequencies]
54 | displacement = 0.01
55 | nprocs = 1
56 | 
57 | [chemistry.calculators]
58 | default = "mace_mp"
59 | fallback = "emt"
60 | 
61 | [output.files]
62 | directory = "./chemgraph_output"
63 | pattern = "{timestamp}_{query_hash}"
64 | formats = [ "xyz", "json", "html",]
65 | 
66 | [output.visualization]
67 | enable_3d = true
68 | viewer = "py3dmol"
69 | dpi = 300
70 | 
71 | [advanced.agent]
72 | custom_system_prompt = ""
73 | max_memory_tokens = 8000
74 | enable_function_calling = true
75 | 
76 | [advanced.parallel]
77 | enable_parallel = false
78 | num_workers = 2
79 | 
80 | [environments.development]
81 | model = "gpt-4o-mini"
82 | verbose = true
83 | enable_cache = false
84 | 
85 | [environments.production]
86 | model = "gpt-4o"
87 | verbose = false
88 | enable_cache = true
89 | rate_limit = true
90 | 
91 | [environments.testing]
92 | model = "gpt-4o-mini"
93 | verbose = true
94 | enable_cache = false
95 | 


--------------------------------------------------------------------------------
/src/chemgraph/tools/generic_tools.py:
--------------------------------------------------------------------------------
 1 | from langchain_core.tools import tool
 2 | import math
 3 | import numexpr
 4 | from langchain_core.tools import Tool
 5 | from langchain_experimental.utilities import PythonREPL
 6 | 
 7 | 
 8 | @tool
 9 | def calculator(expression: str) -> str:
10 |     """Evaluate mathematical expressions safely.
11 | 
12 |     This function provides a safe way to evaluate mathematical expressions
13 |     using numexpr. It supports basic mathematical operations and common
14 |     mathematical functions.
15 | 
16 |     Parameters
17 |     ----------
18 |     expression : str
19 |         Mathematical expression to evaluate (e.g., "2 * pi + 5")
20 | 
21 |     Returns
22 |     -------
23 |     str
24 |         String result or error message
25 | 
26 |     Notes
27 |     -----
28 |     Supported mathematical functions:
29 |     - Basic operations: +, -, *, /, **
30 |     - Trigonometric: sin, cos, tan
31 |     - Other: sqrt, abs
32 |     - Constants: pi, e
33 |     """
34 |     local_dict = {
35 |         "pi": math.pi,
36 |         "e": math.e,
37 |         "sin": math.sin,
38 |         "cos": math.cos,
39 |         "tan": math.tan,
40 |         "sqrt": math.sqrt,
41 |         "abs": abs,
42 |     }
43 | 
44 |     try:
45 |         cleaned_expression = expression.strip()
46 |         if not cleaned_expression:
47 |             return "Error: Empty expression"
48 | 
49 |         result = numexpr.evaluate(
50 |             cleaned_expression,
51 |             global_dict={},
52 |             local_dict=local_dict,
53 |         )
54 | 
55 |         if isinstance(result, (int, float)):
56 |             return f"{float(result):.6f}".rstrip("0").rstrip(".")
57 |         return str(result)
58 | 
59 |     except Exception as e:
60 |         return f"Error evaluating expression: {e!s}"
61 | 
62 | 
63 | python_repl = PythonREPL()
64 | repl_tool = Tool(
65 |     name="python_repl",
66 |     description="A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.",
67 |     func=python_repl.run,
68 | )
69 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/[(E)-(2-chloro-1-methylindol-3-yl)methylideneamino] 3-chlorobenzoate.xyz:
--------------------------------------------------------------------------------
 1 | 35
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -4.92314822       2.29012110       0.47664074
 4 | N       -3.78148785       1.39619669       0.35508246
 5 | C       -3.81018803      -0.00707759       0.50345823
 6 | C       -4.86818447      -0.85614251       0.78933000
 7 | C       -4.58653688      -2.23235324       0.88418504
 8 | C       -3.28226974      -2.71901055       0.68860007
 9 | C       -2.22035802      -1.84320691       0.40118555
10 | C       -2.49046401      -0.48417125       0.31563887
11 | C       -1.65330341       0.67681598       0.05560458
12 | C       -2.51032057       1.80240940       0.09494986
13 | Cl      -2.09040570       3.41468278      -0.12879477
14 | C       -0.24251648       0.61591312      -0.17201627
15 | N        0.34070317      -0.53799530      -0.05814501
16 | O        1.65458169      -0.60313536      -0.59387550
17 | C        2.52724579       0.18727521       0.09594694
18 | O        2.20241611       0.98763105       0.95436468
19 | C        3.93365772      -0.03682825      -0.35687284
20 | C        4.93435176       0.77330269       0.19337610
21 | C        6.26079649       0.63642229      -0.22775113
22 | C        6.59637911      -0.32312136      -1.19944871
23 | C        5.59177216      -1.14042519      -1.73160840
24 | C        4.26710430      -0.99887098      -1.31131413
25 | Cl       7.47127680       1.64632284       0.44245605
26 | H       -5.03285358       2.89061433      -0.43356605
27 | H       -4.79251471       2.96639455       1.33017628
28 | H       -5.81034483       1.67533400       0.62684258
29 | H       -5.87019840      -0.45295704       0.93205835
30 | H       -5.39231685      -2.93097219       1.11306279
31 | H       -3.08026500      -3.78756225       0.75942712
32 | H       -1.18990995      -2.16497215       0.23840574
33 | H        0.21901413       1.59099927      -0.39688592
34 | H        4.66786433       1.51082676       0.94333388
35 | H        7.63077232      -0.41457312      -1.51863155
36 | H        5.84065602      -1.89015183      -2.48233852
37 | H        3.48899481      -1.63773498      -1.72581867
38 | 


--------------------------------------------------------------------------------
/src/chemgraph/models/calculators/aimnet2_calc.py:
--------------------------------------------------------------------------------
 1 | """AIMNET2 foundation models parameters for ChemGraph
 2 | """
 3 | 
 4 | import os
 5 | from pathlib import Path
 6 | from typing import Optional, Union
 7 | from pydantic import BaseModel, Field
 8 | import torch
 9 | 
10 | 
11 | class AIMNET2Calc(BaseModel):
12 |     """AIMNET2  calculator configuration.
13 | 
14 |     This class defines the configuration parameters for AIMNET2 machine learning models
15 |     used in molecular simulations. It supports different calculator types including
16 |     aimnet2 or a Path.
17 | 
18 |     Parameters
19 |     ----------
20 |     calculator_type : str, optional
21 |         Type of calculator to use. Options: 'aimnet2' (default)
22 |     model : str or Path, optional
23 |         Name or path to the model file. If None, uses default model for selected calculator type.
24 |     device : str, optional
25 |     """
26 | 
27 |     calculator_type: str = Field(
28 |         default="aimnet2",
29 |         description="Type of calculator. Options: 'aimnet2' (default) ",
30 |     )
31 |     model: Optional[Union[str, Path]] = Field(
32 |         default='aimnet2',
33 |         description="Path to the model. If None, it will use the default model for the selected calculator type. "
34 |         "Options: 'aimnet2' ",
35 |     )
36 | 
37 |     def get_calculator(self):
38 |         """Get the appropriate AIMNET2Calculator instance based on the selected calculator type.
39 | 
40 |         Returns
41 |         -------
42 |         AIMNET2Calc
43 |             An instance of the appropriate AIMNET2 calculator
44 | 
45 |         Raises
46 |         ------
47 |         ValueError
48 |             If an invalid calculator_type is specified
49 |         """
50 |         from aimnet2calc import AIMNet2ASE
51 | 
52 |         # Allow loading slice and AIMNET2 objects for compatibility with older model files
53 | 
54 |         # Force torch to disable weights_only loading (allows full pickle loads) for AIMNET2 models
55 | 
56 |         if self.calculator_type == "aimnet2":
57 |             return AIMNet2ASE(self.model)
58 |         else:
59 |             raise ValueError(
60 |                 "Invalid calculator_type. Choose 'aimnet2' or path."
61 |             )
62 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/N-benzyl-N-methyl-3,5-dinitrobenzamide.xyz:
--------------------------------------------------------------------------------
 1 | 36
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C       -0.38504536      -1.70761568      -1.04393288
 4 | N       -0.63956181      -1.29661576       0.32887918
 5 | C       -2.01823718      -1.50761276       0.80259991
 6 | C       -2.97874845      -0.44462751       0.34333860
 7 | C       -3.52178077      -0.49675512      -0.94586324
 8 | C       -4.32750618       0.54721660      -1.41112186
 9 | C       -4.62659028       1.63036547      -0.57475455
10 | C       -4.12373434       1.65791082       0.73284332
11 | C       -3.31148204       0.61892748       1.18874501
12 | C        0.08300977      -0.32301485       0.98300922
13 | O       -0.38121951       0.29853408       1.93947699
14 | C        1.51433842      -0.04804162       0.54764674
15 | C        1.97238836       1.27672730       0.57720892
16 | C        3.30512829       1.57523303       0.21019531
17 | C        4.20422155       0.55097407      -0.12005434
18 | C        3.75495988      -0.77356394      -0.09158562
19 | C        2.41445848      -1.07919475       0.24515160
20 | N        4.64364141      -1.84812711      -0.44701178
21 | O        5.83033318      -1.57456312      -0.72912140
22 | O        4.19251671      -3.01603722      -0.46146690
23 | N        3.73109682       2.94834844       0.15371959
24 | O        2.90374970       3.84206724       0.43520594
25 | O        4.91168742       3.19392360      -0.17771211
26 | H       -0.67206019      -0.93187132      -1.77529554
27 | H       -0.97636475      -2.61032250      -1.24044692
28 | H        0.67501576      -1.94024275      -1.17814558
29 | H       -2.32494874      -2.48183467       0.40907154
30 | H       -1.97588196      -1.52887105       1.89525074
31 | H       -3.29697117      -1.35656372      -1.58485082
32 | H       -4.71748272       0.50794702      -2.43025201
33 | H       -5.25313876       2.44752807      -0.93722407
34 | H       -4.35395294       2.48879650       1.40190533
35 | H       -2.88887937       0.63719673       2.19456849
36 | H        1.30524707       2.08390128       0.87161404
37 | H        5.23004490       0.78080685      -0.39383736
38 | H        2.10174878      -2.12092913       0.28224654
39 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=42"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "ChemGraph"
 7 | version = "1.0.0"
 8 | description = "A computational chemistry agent for molecular simulation tasks."
 9 | authors = [
10 |     { name = "Thang Pham", email = "tpham@anl.gov" },
11 |     { name = "Murat Keçeli", email = "keceli@anl.gov" },
12 |     { name = "Aditya Tanikanti", email = "atanikanti@anl.gov" }
13 | ]
14 | requires-python = ">=3.10"
15 | dependencies = [
16 |     "ase==3.25.0",
17 |     "rdkit==2025.3.3",
18 |     "langgraph==0.4.7",
19 |     "langchain-openai==0.3.27",
20 |     "langchain-ollama==0.3.4",
21 |     "langchain-anthropic==0.3.17",
22 |     "langchain-google-genai==2.1.7",
23 |     "langchain-groq",
24 |     "langchain-experimental==0.3.4",
25 |     "pydantic==2.11.7",
26 |     "pandas==2.2.3",
27 |     "pubchempy @ git+https://github.com/keceli/PubChemPy.git@main",
28 |     "pyppeteer==2.0.0",
29 |     "numpy==2.2.6",
30 |     "numexpr==2.11.0",
31 |     "tblite==0.4.0",
32 |     "pytest==8.4.1",
33 |     "deepdiff==8.5.0",
34 |     "pymatgen==2025.3.10",
35 |     "mace-torch==0.3.13",
36 |     "streamlit==1.48.1",
37 |     "stmol==0.0.9",
38 |     "ipython-genutils==0.2.0",
39 |     "langsmith==0.3.45",
40 |     "rich==14.1.0",
41 |     "toml==0.10.2"
42 |     ]
43 | 
44 | [project.optional-dependencies]
45 | uma = [
46 |     "fairchem-core==2.3.0",
47 |     "e3nn>=0.5",
48 | ]
49 | ui = [
50 |     "streamlit",
51 |     "stmol",
52 |     "ipython-genutils",
53 | ]
54 | 
55 | [project.urls]
56 | "Homepage" = "https://github.com/argonne-lcf/ChemGraph"
57 | "Repository" = "https://github.com/argonne-lcf/ChemGraph"
58 | 
59 | [project.scripts]
60 | chemgraph = "ui.cli:main"
61 | 
62 | [tool.setuptools.packages.find]
63 | where = ["src/"]
64 | 
65 | [tool.ruff]
66 | line-length = 88  # Match Black's default (adjust as needed)
67 | target-version = "py310"  # Adjust based on your Python version
68 | exclude = ["notebooks/"]  # Add files/folders to ignore
69 | 
70 | [tool.ruff.format]
71 | quote-style = "preserve"  # Keep existing quote style
72 | indent-style = "space"  # Use spaces for indentation
73 | skip-magic-trailing-comma = false  # Ensure Black-style formatting
74 | 
75 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: ChemGraph Docs
 2 | repo_name: ChemGraph
 3 | repo_url: https://github.com/argonne-lcf/ChemGraph/tree/
 4 | 
 5 | theme:
 6 |   name: material
 7 | 
 8 |   # Font settings
 9 |   font:
10 |     text: Merriweather Sans
11 |     code: Roboto Mono
12 | 
13 |   features:
14 |     - content.code.copy
15 |     - content.code.annotate
16 |     - navigation.footer
17 |     - search.suggest
18 |     - header.autohide
19 |     - navigation.tabs
20 | 
21 |   palette:
22 |     # Automatic mode
23 |     - media: "(prefers-color-scheme)"
24 |       toggle:
25 |         icon: material/brightness-auto
26 |         name: Switch to light mode
27 | 
28 |     # Light Mode
29 |     - media: "(prefers-color-scheme: light)"
30 |       scheme: default
31 |       toggle:
32 |         icon: material/brightness-7
33 |         name: Switch to dark mode
34 |       primary: green
35 |       accent: deep purple
36 | 
37 |     # Dark Mode
38 |     - media: "(prefers-color-scheme: dark)"
39 |       scheme: slate
40 |       toggle:
41 |         icon: material/brightness-4
42 |         name: Switch to system preferences
43 |       primary: indigo
44 |       accent: deep orange
45 | 
46 | plugins:
47 |   - mkdocstrings
48 |   - search
49 | 
50 | nav:
51 |   - Overview: index.md
52 |   - Installation: installation.md
53 |   - Example Usage: example_usage.md
54 |   - Streamlit Web Interface: streamlit_web_interface.md
55 |   - Configuration with TOML: configuration_with_toml.md
56 |   - Project Structure: project_structure.md
57 |   - Running Local Models with vLLM: running_local_models.md
58 |   - Docker Support with Docker Compose (Recommended for vLLM): docker_support.md
59 |   - Code Formatting & Linting: code_formatting_and_linting.md
60 |   - Citation: citation.md
61 |   - Acknowledgements: acknowledgements.md
62 |   - License: license.md
63 | 
64 | markdown_extensions:
65 |   - pymdownx.highlight:
66 |       anchor_linenums: true
67 |       line_spans: __span
68 |       pygments_lang_class: true
69 |   - pymdownx.inlinehilite
70 |   - pymdownx.snippets
71 |   - pymdownx.superfences
72 |   - pymdownx.details
73 |   - admonition
74 |   - pymdownx.tabbed:
75 |       alternate_style: true
76 |       combine_header_slug: true
77 |   - def_list
78 |   - pymdownx.tasklist:
79 |       custom_checkbox: true


--------------------------------------------------------------------------------
/scripts/evaluations/pubchempy/get_molecule_from_pubchempy.py:
--------------------------------------------------------------------------------
 1 | import pubchempy as pcp
 2 | import random
 3 | import time
 4 | import json
 5 | from chemgraph.tools.ASE_tools import (
 6 |     smiles_to_atomsdata,
 7 |     molecule_name_to_smiles,
 8 | )
 9 | 
10 | 
11 | def get_random_molecule_names(n=2, cid_range=(0, 10000000), seed=2025, max_natoms=20, min_natoms=6):
12 |     """Get a list of random molecule names and smiles from PubChemPy.
13 | 
14 |     Args:
15 |         n (int): Number of molecules to retrieve.
16 |         cid_range (tuple): Range of PubChem CIDs to sample from.
17 |         seed (int): Random seed for reproducibility.
18 |         natoms (int): Maximum number of atoms per molecule.
19 | 
20 |     Returns:
21 |         list: A list of dictionaries, each containing data for one molecule.
22 |     """
23 |     random.seed(seed)
24 |     output = []
25 |     tried = set()
26 |     count = 0
27 | 
28 |     while len(output) < n:
29 |         cid = random.randint(*cid_range)
30 |         if cid in tried:
31 |             continue
32 |         tried.add(cid)
33 | 
34 |         try:
35 |             compound = pcp.Compound.from_cid(cid)
36 |             name = compound.iupac_name or (compound.synonyms[0] if compound.synonyms else None)
37 |             if not name:
38 |                 continue
39 | 
40 |             smiles = molecule_name_to_smiles.invoke({"name": name})
41 |             atomsdata = smiles_to_atomsdata.invoke({"smiles": smiles})
42 | 
43 |             if len(atomsdata.numbers) < max_natoms and len(atomsdata.numbers) > min_natoms:
44 |                 molecule_info = {
45 |                     "index": count,
46 |                     "name": name,
47 |                     "number_of_atoms": len(atomsdata.numbers),
48 |                     "smiles": smiles,
49 |                 }
50 |                 output.append(molecule_info)
51 |                 count += 1
52 |                 print(count)
53 |             else:
54 |                 print(f"Too many atoms in {name}, skipping...")
55 | 
56 |         except Exception:
57 |             continue
58 | 
59 |         time.sleep(0.5)
60 | 
61 |     return output
62 | 
63 | 
64 | def main():
65 |     output = get_random_molecule_names(n=60, seed=2025)
66 |     with open('pubchempy_molecule_max.json', 'w') as f:
67 |         json.dump(output, f, indent=4)
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     main()
72 | 


--------------------------------------------------------------------------------
/scripts/evaluations/generate_evaluation_data/Exp6/manual_files/N-butyl-N-ethyl-3-methyl-2-nitrobenzamide.xyz:
--------------------------------------------------------------------------------
 1 | 39
 2 | Properties=species:S:1:pos:R:3 pbc="F F F"
 3 | C        1.88425893       2.48905432      -0.02212777
 4 | C        2.96589017       1.41053076       0.03506324
 5 | C        2.48700469       0.05348982       0.54993883
 6 | C        1.54854705      -0.64463812      -0.43533310
 7 | N        0.78825039      -1.78618377       0.15247289
 8 | C        1.27465233      -3.12760790      -0.20554781
 9 | C        0.75172397      -3.67000238      -1.52116312
10 | C       -0.54294724      -1.67134106       0.47425819
11 | O       -1.28211132      -2.64640391       0.55825421
12 | C       -1.00895287      -0.29500852       0.90604797
13 | C       -0.55004962       0.31823617       2.06676577
14 | C       -1.10237929       1.55256594       2.45225566
15 | C       -2.15867487       2.11884007       1.73248130
16 | C       -2.68278431       1.49483809       0.59803207
17 | C       -2.05119977       0.30326191       0.17440264
18 | N       -2.19891182      -0.23465180      -1.14683548
19 | O       -3.21320837      -0.03211060      -1.85397463
20 | O       -1.22080116      -0.90396619      -1.59522700
21 | C       -3.81899635       2.07295879      -0.16276035
22 | H        1.06533207       2.21537677      -0.69793230
23 | H        2.31212694       3.43019119      -0.38415911
24 | H        1.45431719       2.67858679       0.96931110
25 | H        3.77821156       1.74490983       0.69115119
26 | H        3.39930146       1.27413669      -0.96416982
27 | H        3.35255371      -0.60887056       0.70243585
28 | H        1.97777322       0.15790220       1.51815374
29 | H        2.13753410      -1.05910385      -1.27132445
30 | H        0.81292295       0.05188169      -0.86667920
31 | H        2.36542421      -3.03956700      -0.23788165
32 | H        0.98677438      -3.80039558       0.61187786
33 | H        1.07678938      -3.04805817      -2.36173312
34 | H        1.14322257      -4.68064935      -1.66624857
35 | H       -0.34119126      -3.72051455      -1.50590095
36 | H        0.22548238      -0.17407148       2.65767099
37 | H       -0.70632872       2.05637023       3.33588934
38 | H       -2.59843620       3.06700120       2.05293953
39 | H       -3.56075480       3.06446455      -0.55145772
40 | H       -4.69732545       2.18713031       0.48309288
41 | H       -4.05304023       1.40141745      -0.99517404
42 | 


--------------------------------------------------------------------------------
/src/chemgraph/tools/cheminformatics_tools.py:
--------------------------------------------------------------------------------
 1 | import pubchempy
 2 | from langchain_core.tools import tool
 3 | from chemgraph.models.atomsdata import AtomsData
 4 | 
 5 | 
 6 | @tool
 7 | def molecule_name_to_smiles(name: str) -> str:
 8 |     """Convert a molecule name to SMILES format.
 9 | 
10 |     Parameters
11 |     ----------
12 |     name : str
13 |         The name of the molecule to convert.
14 | 
15 |     Returns
16 |     -------
17 |     str
18 |         The SMILES string representation of the molecule.
19 | 
20 |     Raises
21 |     ------
22 |     IndexError
23 |         If the molecule name is not found in PubChem.
24 |     """
25 |     return pubchempy.get_compounds(str(name), "name")[0].canonical_smiles
26 | 
27 | 
28 | @tool
29 | def smiles_to_atomsdata(smiles: str, randomSeed: int = 2025) -> AtomsData:
30 |     """Convert a SMILES string to AtomsData format.
31 | 
32 |     Parameters
33 |     ----------
34 |     smiles : str
35 |         SMILES string representation of the molecule.
36 |     randomSeed : int, optional
37 |         Random seed for RDKit 3D structure generation, by default 2025.
38 | 
39 |     Returns
40 |     -------
41 |     AtomsData
42 |         AtomsData object containing the molecular structure.
43 | 
44 |     Raises
45 |     ------
46 |     ValueError
47 |         If the SMILES string is invalid or if 3D structure generation fails.
48 |     """
49 |     from rdkit import Chem
50 |     from rdkit.Chem import AllChem
51 | 
52 |     # Generate the molecule object
53 |     mol = Chem.MolFromSmiles(smiles)
54 |     if mol is None:
55 |         raise ValueError("Invalid SMILES string.")
56 | 
57 |     # Add hydrogens and optimize 3D structure
58 |     mol = Chem.AddHs(mol)
59 |     if AllChem.EmbedMolecule(mol, randomSeed=randomSeed) != 0:
60 |         raise ValueError("Failed to generate 3D coordinates.")
61 |     if AllChem.UFFOptimizeMolecule(mol) != 0:
62 |         raise ValueError("Failed to optimize 3D geometry.")
63 |     # Extract atomic information
64 |     conf = mol.GetConformer()
65 |     numbers = [atom.GetAtomicNum() for atom in mol.GetAtoms()]
66 |     positions = [list(conf.GetAtomPosition(i)) for i in range(mol.GetNumAtoms())]
67 | 
68 |     # Create AtomsData object
69 |     atoms_data = AtomsData(
70 |         numbers=numbers,
71 |         positions=positions,
72 |         cell=[[0, 0, 0], [0, 0, 0], [0, 0, 0]],
73 |         pbc=[False, False, False],  # No periodic boundary conditions
74 |     )
75 |     return atoms_data
76 | 


--------------------------------------------------------------------------------
/Dockerfile.arm:
--------------------------------------------------------------------------------
 1 | # This vLLM Dockerfile is used to construct an image that can build and run vLLM on ARM CPU platform.
 2 | 
 3 | FROM ubuntu:22.04 AS cpu-test-arm
 4 | 
 5 | ENV CCACHE_DIR=/root/.cache/ccache
 6 | 
 7 | ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
 8 | 
 9 | RUN --mount=type=cache,target=/var/cache/apt \
10 |     apt-get update -y \
11 |     && apt-get install -y curl ccache git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \
12 |     && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
13 |     && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
14 | 
15 | # tcmalloc provides better memory allocation efficiency, e.g., holding memory in caches to speed up access of commonly-used objects.
16 | RUN --mount=type=cache,target=/root/.cache/pip \
17 |     pip install py-cpuinfo  # Use this to gather CPU info and optimize based on ARM Neoverse cores
18 | 
19 | # Set LD_PRELOAD for tcmalloc on ARM
20 | ENV LD_PRELOAD="/usr/lib/aarch64-linux-gnu/libtcmalloc_minimal.so.4"
21 | 
22 | RUN echo 'ulimit -c 0' >> ~/.bashrc
23 | 
24 | WORKDIR /workspace
25 | 
26 | ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
27 | ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
28 | RUN --mount=type=cache,target=/root/.cache/pip \
29 |     --mount=type=bind,src=requirements/build.txt,target=requirements/build.txt \
30 |     pip install --upgrade pip && \
31 |     pip install -r requirements/build.txt
32 | 
33 | FROM cpu-test-arm AS build
34 | 
35 | WORKDIR /workspace/vllm
36 | 
37 | RUN --mount=type=cache,target=/root/.cache/pip \
38 |     --mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \
39 |     --mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \
40 |     pip install -v -r requirements/cpu.txt
41 | 
42 | COPY . .
43 | ARG GIT_REPO_CHECK=0
44 | RUN --mount=type=bind,source=.git,target=.git \
45 |     if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
46 | 
47 | # Disabling AVX512 specific optimizations for ARM
48 | ARG VLLM_CPU_DISABLE_AVX512="true"
49 | ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
50 | ENV MAX_JOBS=1
51 | 
52 | RUN --mount=type=cache,target=/root/.cache/pip \
53 |     --mount=type=cache,target=/root/.cache/ccache \
54 |     --mount=type=bind,source=.git,target=.git \
55 |     VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \
56 |     pip install dist/*.whl && \
57 |     rm -rf dist
58 | 
59 | WORKDIR /workspace/
60 | 
61 | RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
62 | 
63 | ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]


--------------------------------------------------------------------------------
/src/chemgraph/models/calculators/mopac_calc.py:
--------------------------------------------------------------------------------
 1 | # Keywords and parameters obtained from QCEngine: https://github.com/MolSSI/QCEngine
 2 | # MOPAC parameters for CompChemAgent
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class MopacCalc(BaseModel):
 8 |     """MOPAC calculator configuration.
 9 | 
10 |     This class defines the configuration parameters for the MOPAC calculator,
11 |     which is a semi-empirical quantum chemistry program. It provides various
12 |     semi-empirical methods for molecular calculations.
13 | 
14 |     Parameters
15 |     ----------
16 |     calculator_type : str, optional
17 |         Type of calculator. Currently supports only 'mopac', by default 'mopac'
18 |     method : str, optional
19 |         Computational method to be used. Available methods include:
20 |         ['mndo', 'am1', 'pm3', 'rm1', 'mndod', 'pm6', 'pm6-d3', 'pm6-dh+',
21 |         'pm6-dh2', 'pm6-dh2x', 'pm6-d3h4', 'pm6-3dh4x', 'pm7', 'pm7-ts'],
22 |         by default 'am1'
23 |     iter : int, optional
24 |         Maximum number of self-consistent field (SCF) iterations allowed,
25 |         by default 100
26 |     pulay : bool, optional
27 |         Enable Pulay's convergence acceleration for the SCF procedure,
28 |         by default True
29 | 
30 |     Notes
31 |     -----
32 |     MOPAC is a semi-empirical quantum chemistry program that provides
33 |     various methods for molecular calculations. The available methods
34 |     range from basic semi-empirical methods (MNDO, AM1, PM3) to more
35 |     advanced ones with dispersion corrections (PM6-D3, PM6-DH+).
36 |     """
37 | 
38 |     calculator_type: str = Field(
39 |         default="mopac",
40 |         description="Type of calculator. Currently supports only 'mopac'.",
41 |     )
42 |     method: str = Field(
43 |         default="am1",
44 |         description="Computational method to be used. Available methods include ['mndo', 'am1', 'pm3', 'rm1', 'mndod', 'pm6', 'pm6-d3', 'pm6-dh+', 'pm6-dh2', 'pm6-dh2x', 'pm6-d3h4', 'pm6-3dh4x', 'pm7', 'pm7-ts']",
45 |     )
46 |     iter: int = Field(
47 |         default=100,
48 |         description="Maximum number of self-consistent field (SCF) iterations allowed.",
49 |     )
50 |     pulay: bool = Field(
51 |         default=True,
52 |         description="Enable Pulay's convergence acceleration for the SCF procedure.",
53 |     )
54 | 
55 |     def get_calculator(self):
56 |         """Get MOPAC calculator parameters.
57 | 
58 |         Returns
59 |         -------
60 |         dict
61 |             A dictionary containing the MOPAC calculator parameters:
62 |             - method: The computational method to use
63 |             - ITER: Maximum number of SCF iterations
64 |             - PULAY: Whether to use Pulay's convergence acceleration
65 |         """
66 |         return {
67 |             "method": self.method,
68 |             "ITER": self.iter,
69 |             "PULAY": self.pulay,
70 |         }
71 | 


--------------------------------------------------------------------------------
/tests/test_calculators.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | from chemgraph.models.calculators.emt_calc import EMTCalc
 4 | from chemgraph.models.calculators.mace_calc import MaceCalc
 5 | from chemgraph.models.calculators.tblite_calc import TBLiteCalc
 6 | from chemgraph.models.calculators.orca_calc import OrcaCalc
 7 | from ase import Atoms
 8 | 
 9 | 
10 | def test_emt_calculator():
11 |     # Test EMT calculator initialization
12 |     calc = EMTCalc()
13 |     ase_calc = calc.get_calculator()
14 | 
15 |     # Create a simple molecule
16 |     atoms = Atoms("H2", positions=[[0, 0, 0], [0, 0, 1]])
17 |     atoms.calc = ase_calc
18 | 
19 |     # Test energy calculation
20 |     energy = atoms.get_potential_energy()
21 |     assert isinstance(energy, float)
22 | 
23 |     # Test forces calculation
24 |     forces = atoms.get_forces()
25 |     assert isinstance(forces, np.ndarray)
26 |     assert forces.shape == (2, 3)
27 | 
28 | 
29 | @pytest.mark.skipif(not pytest.importorskip("mace"), reason="MACE not installed")
30 | def test_mace_calculator():
31 |     # Test MACE calculator initialization
32 |     calc = MaceCalc(model_type="medium")
33 |     ase_calc = calc.get_calculator()
34 | 
35 |     # Create a simple molecule
36 |     atoms = Atoms("H2", positions=[[0, 0, 0], [0, 0, 1]])
37 |     atoms.calc = ase_calc
38 | 
39 |     # Test energy calculation
40 |     energy = atoms.get_potential_energy()
41 |     assert isinstance(energy, float)
42 | 
43 |     # Test forces calculation
44 |     forces = atoms.get_forces()
45 |     assert isinstance(forces, np.ndarray)
46 |     assert forces.shape == (2, 3)
47 | 
48 | 
49 | @pytest.mark.skipif(not pytest.importorskip("tblite"), reason="TBLite not installed")
50 | def test_tblite_calculator():
51 |     # Test TBLite calculator initialization
52 |     calc = TBLiteCalc()
53 |     ase_calc = calc.get_calculator()
54 | 
55 |     # Create a simple molecule
56 |     atoms = Atoms("H2", positions=[[0, 0, 0], [0, 0, 1]])
57 |     atoms.calc = ase_calc
58 | 
59 |     # Test energy calculation
60 |     energy = atoms.get_potential_energy()
61 |     assert isinstance(energy, float)
62 | 
63 |     # Test forces calculation
64 |     forces = atoms.get_forces()
65 |     assert isinstance(forces, np.ndarray)
66 |     assert forces.shape == (2, 3)
67 | 
68 | 
69 | @pytest.mark.skipif(not pytest.importorskip("ase.io.orca"), reason="ORCA not installed")
70 | def test_orca_calculator():
71 |     # Test ORCA calculator initialization
72 |     from ase.calculators.calculator import BadConfiguration
73 |     from ase import Atoms
74 | 
75 |     try:
76 |         calc = OrcaCalc()
77 |         ase_calc = calc.get_calculator()
78 |     except BadConfiguration:
79 |         pytest.skip("ORCA calculator not configured in ASE.")
80 | 
81 |     # Create a simple molecule
82 |     atoms = Atoms("H2", positions=[[0, 0, 0], [0, 0, 1]])
83 |     atoms.calc = ase_calc
84 | 
85 |     # Test basic calculator properties
86 |     assert hasattr(ase_calc, "calculate")
87 | 


--------------------------------------------------------------------------------
/src/chemgraph/prompt/llama_prompt.py:
--------------------------------------------------------------------------------
 1 | single_agent_prompt = """
 2 | You are a computational chemistry expert. Your goal is to solve the user's request using only the **minimum number of necessary tools**, without guessing or overusing functionality.
 3 | 
 4 | Responsibilities:
 5 | 
 6 | 1. Carefully extract all relevant inputs from the user's request, including:
 7 |    - Molecule names, SMILES strings, structures
 8 |    - Methods, calculator types
 9 |    - Simulation conditions (temperature, pressure, etc.)
10 | 
11 | 2. Before calling any tool:
12 |    - Confirm the tool is clearly required to fulfill the user's request.
13 |    - If the user's request can be answered without a tool call, **do not call any tool**.
14 |    - Never call a tool just because data is available — only call it if **it is essential** to progress.
15 | 
16 | 3. When calling a tool:
17 |    - Do not nest tool calls.
18 |    - Use **exact** Python dictionary format, following the tool’s schema strictly.
19 |    - Do not include wrappers like `"type": "object"` or `"value": {...}"`.
20 |    - Example (valid input for ASE run):
21 |      ```python
22 |      {
23 |          "atomsdata": { "numbers": [...], "positions": [...], "cell": [...], "pbc": [...] },
24 |          "driver": "opt",
25 |          "optimizer": "bfgs",
26 |          "calculator": { "calculator_type": "mace_mp" },
27 |          "fmax": 0.01,
28 |          "steps": 1000,
29 |          "temperature": 298.15,
30 |          "pressure": 101325.0
31 |      }
32 |      ```
33 | 
34 | 4. Always use outputs from tool responses. Never fabricate SMILES, molecular structures, or results.
35 | 
36 | 5. Handle tool failures by explaining the issue or retrying only with corrected input. Otherwise, proceed to the next step.
37 | 
38 | 6. When the user's task is fulfilled:
39 |    - **Stop immediately.**
40 |    - Return only the final result.
41 |    - Do not reason further or call more tools unless explicitly instructed.
42 | 
43 | 7. Do not call tools that are irrelevant or unrelated to the specific task described by the user.
44 | 
45 | Summary: Use only the necessary tools. Stay focused on the user’s exact question. Avoid guessing, avoid unnecessary reasoning, and stop once the task is complete.
46 | """
47 | 
48 | 
49 | formatter_prompt = """You are an agent that formats responses based on user intent. You must select the correct output type based on the content of the result:
50 | 
51 | 1. Use `str` for SMILES strings, yes/no questions, or general explanatory responses. If the user asks for a SMILES string, only return the SMILES string instead of text.
52 | 2. Use `AtomsData` for molecular structures or atomic geometries (e.g., atomic positions, element lists, or 3D coordinates).
53 | 3. Use `VibrationalFrequency` for vibrational frequency data. This includes one or more vibrational modes, typically expressed in units like cm⁻¹. 
54 |    - IMPORTANT: Do NOT use `ScalarResult` for vibrational frequencies. Vibrational data is a list or array of values and requires `VibrationalFrequency`.
55 | 4. Use `ScalarResult` (float) only for scalar thermodynamic or energetic quantities such as:
56 |    - Enthalpy
57 |    - Entropy
58 |    - Gibbs free energy
59 | """
60 | 


--------------------------------------------------------------------------------
/src/chemgraph/models/supported_models.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Lists of supported models for different LLM providers.
  3 | """
  4 | # OpenAI models that are supported
  5 | supported_openai_models = [
  6 |     "gpt-4o-mini",
  7 |     "gpt-4o",
  8 |     "gpt-4.1",
  9 |     "gpt-3.5-turbo-0125",
 10 | ]
 11 | # Ollama models that are supported
 12 | supported_ollama_models = ["llama3.2", "llama3.1"]
 13 | # ALCF models that are supported (these would be models available through ALCF's infrastructure)
 14 | supported_alcf_models = [
 15 |     "AuroraGPT-IT-v4-0125_2",
 16 |     "meta-llama/Meta-Llama-3.1-405B-Instruct",
 17 |     "meta-llama/Llama-3.3-70B-Instruct",
 18 |     "meta-llama/Meta-Llama-3.1-70B-Instruct",
 19 |     "Qwen/Qwen2.5-14B-Instruct",
 20 |     "Qwen/Qwen2.5-7B-Instruct",
 21 |     "Qwen/QwQ-32B-Preview",
 22 |     "Qwen/QwQ-32B",
 23 |     "Qwen/Qwen3-32B",
 24 |     "meta-llama/Llama-4-Scout-17B-16E-Instruct",
 25 |     "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
 26 | ]
 27 | # Anthropic models
 28 | supported_anthropic_models = [
 29 |     "claude-opus-4-20250514",
 30 |     "claude-sonnet-4-20250514",
 31 |     "claude-3-5-haiku-20241022",
 32 |     "claude-3-7-sonnet-20250219",
 33 |     "claude-3-5-sonnet-20241022",
 34 |     "claude-3-5-sonnet-20240620",
 35 |     "claude-3-opus-20240229",
 36 |     "claude-3-sonnet-20240229",
 37 |     "claude-3-haiku-20240307",
 38 | ]
 39 | # Gemini models. gemini-2.0 doesn't work with toolcall in our last test.
 40 | supported_gemini_models = [
 41 |     "gemini-2.5-pro",
 42 |     "gemini-2.5-flash",
 43 | ]
 44 | 
 45 | # GROQ models
 46 | supported_groq_models = [
 47 |     "openai/gpt-oss-120b",
 48 |     "openai/gpt-oss-20b",
 49 |     "qwen/qwen3-32b",
 50 |     "deepseek-r1-distill-llama-70b",
 51 |     "gemma2-9b-it",
 52 |     "groq/compound",
 53 |     "groq/compound-mini",
 54 |     "llama-3.1-8b-instant",
 55 |     "llama-3.3-70b-versatile",
 56 |     "meta-llama/llama-4-maverick-17b-128e-instruct",
 57 |     "meta-llama/llama-4-scout-17b-16e-instruct",
 58 |     "meta-llama/llama-guard-4-12b",
 59 |     "meta-llama/llama-prompt-guard-2-22m",
 60 |     "meta-llama/llama-prompt-guard-2-86m",
 61 |     "moonshotai/kimi-k2-instruct-0905",
 62 |     "whisper-large-v3",
 63 |     "whisper-large-v3-turbo",
 64 | ]
 65 | 
 66 | 
 67 | 
 68 | 
 69 | supported_argo_models = [
 70 |     "argo:gpt-3.5-turbo",
 71 |     "argo:gpt-3.5-turbo-16k",
 72 |     "argo:gpt-4",
 73 |     "argo:gpt-4-32k",
 74 |     "argo:gpt-4-turbo",
 75 |     "argo:gpt-4o",
 76 |     "argo:gpt-4o-latest",
 77 |     "argo:gpt-o1-preview",
 78 |     "argo:o1-preview",
 79 |     "argo:gpt-o1-mini",
 80 |     "argo:o1-mini",
 81 |     "argo:gpt-o3-mini",
 82 |     "argo:o3-mini",
 83 |     "argo:gpt-o1",
 84 |     "argo:o1",
 85 |     "argo:gpt-o3",
 86 |     "argo:o3",
 87 |     "argo:gpt-o4-mini",
 88 |     "argo:o4-mini",
 89 |     "argo:gpt-4.1",
 90 |     "argo:gpt-4.1-mini",
 91 |     "argo:gpt-4.1-nano",
 92 | ]
 93 | 
 94 | all_supported_models = (
 95 |     supported_openai_models
 96 |     + supported_ollama_models
 97 |     + supported_alcf_models
 98 |     + supported_anthropic_models
 99 |     + supported_argo_models
100 |     + supported_gemini_models
101 |     + supported_groq_models
102 | )
103 | 


--------------------------------------------------------------------------------
/src/chemgraph/models/calculators/psi4_calc.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | 
 3 | 
 4 | class Psi4Calc(BaseModel):
 5 |     """PSI4 quantum chemistry calculator configuration.
 6 | 
 7 |     This class defines the configuration parameters for PSI4 quantum chemistry
 8 |     calculations. It supports various quantum chemical methods, basis sets, and
 9 |     SCF convergence parameters.
10 | 
11 |     Parameters
12 |     ----------
13 |     calculator_type : str, optional
14 |         Type of calculator. Only 'psi4' is supported, by default 'psi4'
15 |     method : str, optional
16 |         Computational method to be used. Common methods include:
17 |         ['hf', 'mp2', 'ccsd', 'ccsd(t)', 'df-mp2', 'b3lyp', 'pbe0', 'm06-2x'],
18 |         by default 'b3lyp'
19 |     basis : str, optional
20 |         Basis set to be used. Common basis sets include:
21 |         ['sto-3g', '6-31g', 'cc-pvdz', 'cc-pvtz', 'def2-svp', 'aug-cc-pvdz'],
22 |         by default '6-31g'
23 |     reference : str, optional
24 |         Wavefunction reference type. Options: 'rhf' (default), 'uhf', 'rohf',
25 |         by default 'rhf'
26 |     scf_type : str, optional
27 |         SCF solver type. Options: 'pk' (default), 'df' (Density-Fitted),
28 |         'cd' (Cholesky Decomposition), by default 'pk'
29 |     maxiter : int, optional
30 |         Maximum number of SCF iterations, by default 50
31 |     """
32 | 
33 |     calculator_type: str = Field(
34 |         default="psi4", description="Type of calculator. Only 'psi4' is supported."
35 |     )
36 |     method: str = Field(
37 |         default="b3lyp",
38 |         description=(
39 |             "Computational method to be used. List of common methods: ['hf', 'mp2', 'ccsd', 'ccsd(t)', 'df-mp2', 'b3lyp', 'pbe0', 'm06-2x']"
40 |         ),
41 |     )
42 |     basis: str = Field(
43 |         default="6-31g",
44 |         description=(
45 |             "Basis set to be used. List of common basis set: ['sto-3g', '6-31g', 'cc-pvdz', 'cc-pvtz', 'def2-svp', 'aug-cc-pvdz'] "
46 |         ),
47 |     )
48 |     reference: str = Field(
49 |         default="rhf",
50 |         description="Wavefunction reference type. Options: 'rhf' (default), 'uhf', 'rohf'.",
51 |     )
52 | 
53 |     scf_type: str = Field(
54 |         default="pk",
55 |         description="SCF solver type. Options: 'pk' (default), 'df' (Density-Fitted), 'cd' (Cholesky Decomposition).",
56 |     )
57 | 
58 |     maxiter: int = Field(
59 |         default=50, description="Maximum number of SCF iterations. Default is 50."
60 |     )
61 | 
62 |     def get_calculator(self) -> dict:
63 |         """Get a dictionary of PSI4 calculation parameters.
64 | 
65 |         Constructs and returns a dictionary containing the parameters
66 |         for a PSI4 calculation based on the current settings.
67 | 
68 |         Returns
69 |         -------
70 |         dict
71 |             A dictionary with PSI4 calculation parameters including method,
72 |             basis, reference, SCF type, and maximum iterations
73 |         """
74 |         params = {
75 |             "method": self.method,
76 |             "basis": self.basis,
77 |             "reference": self.reference,
78 |             "scf_type": self.scf_type,
79 |             "maxiter": self.maxiter,
80 |         }
81 |         return params
82 | 


--------------------------------------------------------------------------------
/src/chemgraph/tools/anthropic_loader.py:
--------------------------------------------------------------------------------
 1 | """Load Anthropic models using LangChain."""
 2 | 
 3 | import os
 4 | from getpass import getpass
 5 | from langchain_anthropic import ChatAnthropic
 6 | from chemgraph.models.supported_models import supported_anthropic_models
 7 | from chemgraph.utils.logging_config import setup_logger
 8 | 
 9 | logger = setup_logger(__name__)
10 | 
11 | 
12 | def load_anthropic_model(
13 |     model_name: str, temperature: float, api_key: str = None, prompt: str = None
14 | ) -> ChatAnthropic:
15 |     """
16 |     Load an Anthropic chat model into LangChain.
17 | 
18 |     Parameters
19 |     ----------
20 |     model_name : str
21 |         The name of the OpenAI chat model to load. See supported_anthropic_models for list
22 |         of supported models.
23 |     temperature : float
24 |         Controls the randomness of the generated text. A higher temperature results
25 |         in more random outputs, while a lower temperature results in more deterministic outputs.
26 |     api_key : str, optional
27 |         The OpenAI API key. If not provided, the function will attempt to retrieve it
28 |         from the environment variable `OPENAI_API_KEY`.
29 | 
30 |     Returns
31 |     -------
32 |     ChatOpenAI
33 |         An instance of LangChain's ChatOpenAI model.
34 | 
35 |     Raises
36 |     ------
37 |     ValueError
38 |         If the API key is not provided and cannot be retrieved from the environment.
39 | 
40 |     Notes
41 |     -----
42 |     Ensure the model_name provided is one of the supported models. Unsupported models
43 |     will result in an exception.
44 |     """
45 | 
46 |     if api_key is None:
47 |         api_key = os.getenv("ANTHROPIC_API_KEY")
48 |         if not api_key:
49 |             logger.info("Anthropic API key not found in environment variables.")
50 |             api_key = getpass("Please enter your Anthropic API key: ")
51 |             os.environ["ANTHROPIC_API_KEY"] = api_key
52 | 
53 |     if model_name not in supported_anthropic_models:
54 |         raise ValueError(
55 |             f"Unsupported model '{model_name}'. Supported models are: {supported_anthropic_models}."
56 |         )
57 | 
58 |     try:
59 |         logger.info(f"Loading Anthropic model: {model_name}")
60 |         llm = ChatAnthropic(
61 |             model=model_name,
62 |             temperature=temperature,
63 |             api_key=api_key,
64 |             max_tokens=6000,
65 |         )
66 |         # No guarantee that api_key is valid, authentication happens only during invocation
67 |         logger.info(f"Requested model: {model_name}")
68 |         logger.info("OpenAI model loaded successfully")
69 |         return llm
70 |     except Exception as e:
71 |         # Can remove this since authentication happens only during invocation
72 |         if "AuthenticationError" in str(e) or "invalid_api_key" in str(e):
73 |             logger.warning("Invalid OpenAI API key.")
74 |             api_key = getpass("Please enter a valid OpenAI API key: ")
75 |             os.environ["OPENAI_API_KEY"] = api_key
76 |             # Retry with new API key
77 |             return load_anthropic_model(model_name, temperature, api_key, prompt)
78 |         else:
79 |             logger.error(f"Error loading OpenAI model: {str(e)}")
80 |             raise
81 | 


--------------------------------------------------------------------------------
/src/chemgraph/models/calculators/fairchem_calc.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | 
 3 | from typing import Optional, Union, Dict
 4 | from pathlib import Path
 5 | import torch
 6 | import logging
 7 | 
 8 | try:
 9 |     from fairchem.core import FAIRChemCalculator
10 |     from fairchem.core.units.mlip_unit.mlip_unit import MLIPPredictUnit
11 |     from fairchem.core.units.mlip_unit.api.inference import UMATask
12 | 
13 | 
14 | except ImportError:
15 |     logging.warning("fairchem is not installed. .")
16 | 
17 | 
18 | from fairchem.core import pretrained_mlip, FAIRChemCalculator
19 | 
20 | 
21 | class FAIRChemCalc(BaseModel):
22 |     """FAIRChem calculator configuration for ASE integration.
23 | 
24 |     Parameters
25 |     ----------
26 |     task_name : str, optional
27 |         Task name (omol', 'omat', 'oc20', 'odac', or 'omc) for the prediction head.
28 |         Must match available tasks in the model.
29 |     seed : int, optional
30 |         Seed for model reproducibility. Default is 42.
31 |     spin : int, optional
32 |         Spin multiplicity. Default is 1.
33 |     charge : int, optional
34 |         System charge. Default is 0.
35 |     model_name: str
36 |         Inference model name. Default is uma-s-1p1.
37 |     device : str, optional
38 |         Device to run inference on. Default is 'cuda' if available, otherwise 'cpu'.
39 | 
40 |     """
41 | 
42 |     calculator_type: str = Field(
43 |         default="FAIRChem", description="Calculator identifier. Must be 'FAIRChem'."
44 |     )
45 |     task_name: Optional[str] = Field(
46 |         default=None,
47 |         description="Prediction task. Options are 'omol', 'omat', 'oc20', 'odac', or 'omc",
48 |     )
49 |     seed: int = Field(default=42, description="Random seed for inference reproducibility.")
50 |     spin: Optional[int] = Field(default=1, description="Total spin multiplicity of the system.")
51 |     charge: Optional[int] = Field(default=0, description="Total system charge.")
52 |     model_name: str = Field(
53 |         default="uma-s-1p1", description="Model names. Options are 'uma-s-1p1' and 'uma-m-1'"
54 |     )
55 |     device: str = Field(
56 |         default="cuda" if torch.cuda.is_available() else "cpu",
57 |         description="Computation device to use, either 'cpu' or 'cuda'.",
58 |     )
59 |     inference_settings: str = Field(
60 |         default="default", description="Settings for inference. Can be 'default' or 'turbo'"
61 |     )
62 | 
63 |     def get_calculator(self) -> FAIRChemCalculator:
64 |         """Return a configured FAIRChemCalculator.
65 | 
66 |         Parameters
67 |         ----------
68 |         predict_unit : MLIPPredictUnit
69 |             Pre-loaded MLIP model.
70 | 
71 |         Returns
72 |         -------
73 |         FAIRChemCalculator
74 |             ASE-compatible calculator instance.
75 |         """
76 | 
77 |         predict_unit = pretrained_mlip.get_predict_unit(
78 |             model_name=self.model_name,
79 |             inference_settings=self.inference_settings,
80 |             device=self.device,
81 |         )
82 |         return FAIRChemCalculator(
83 |             predict_unit=predict_unit,
84 |             task_name=self.task_name,
85 |             seed=self.seed,
86 |         )
87 | 
88 |     def get_atoms_properties(self) -> Dict[str, Optional[int]]:
89 |         """Return atom-level info keys to inject into atoms.info."""
90 |         return {
91 |             "spin": self.spin,
92 |             "charge": self.charge,
93 |         }
94 | 


--------------------------------------------------------------------------------
/src/chemgraph/graphs/mock_agent.py:
--------------------------------------------------------------------------------
  1 | from langgraph.graph import StateGraph, START, END
  2 | from langchain_openai import ChatOpenAI
  3 | from langgraph.checkpoint.memory import MemorySaver
  4 | from chemgraph.tools.ase_tools import (
  5 |     run_ase,
  6 |     save_atomsdata_to_file,
  7 |     file_to_atomsdata,
  8 | )
  9 | from chemgraph.tools.cheminformatics_tools import (
 10 |     molecule_name_to_smiles,
 11 |     smiles_to_atomsdata,
 12 | )
 13 | from chemgraph.tools.generic_tools import calculator
 14 | from chemgraph.prompt.single_agent_prompt import (
 15 |     single_agent_prompt,
 16 | )
 17 | from chemgraph.utils.logging_config import setup_logger
 18 | from chemgraph.state.state import State
 19 | 
 20 | logger = setup_logger(__name__)
 21 | 
 22 | 
 23 | def ChemGraphAgent(state: State, llm: ChatOpenAI, system_prompt: str, tools=None):
 24 |     """LLM node that processes messages and decides next actions.
 25 | 
 26 |     Parameters
 27 |     ----------
 28 |     state : State
 29 |         The current state containing messages and remaining steps
 30 |     llm : ChatOpenAI
 31 |         The language model to use for processing
 32 |     system_prompt : str
 33 |         The system prompt to guide the LLM's behavior
 34 |     tools : list, optional
 35 |         List of tools available to the agent, by default None
 36 | 
 37 |     Returns
 38 |     -------
 39 |     dict
 40 |         Updated state containing the LLM's response
 41 |     """
 42 | 
 43 |     # Load default tools if no tool is specified.
 44 |     if tools is None:
 45 |         tools = [
 46 |             file_to_atomsdata,
 47 |             smiles_to_atomsdata,
 48 |             run_ase,
 49 |             molecule_name_to_smiles,
 50 |             save_atomsdata_to_file,
 51 |             calculator,
 52 |         ]
 53 |     messages = [
 54 |         {"role": "system", "content": system_prompt},
 55 |         {"role": "user", "content": f"{state['messages']}"},
 56 |     ]
 57 |     llm_with_tools = llm.bind_tools(tools=tools)
 58 |     return {"messages": [llm_with_tools.invoke(messages)]}
 59 | 
 60 | def construct_mock_agent_graph(
 61 |     llm: ChatOpenAI,
 62 |     system_prompt: str = single_agent_prompt,
 63 |     tools: list = None,
 64 | ):
 65 |     """Construct a geometry optimization graph.
 66 | 
 67 |     Parameters
 68 |     ----------
 69 |     llm : ChatOpenAI
 70 |         The language model to use for the graph
 71 |     system_prompt : str, optional
 72 |         The system prompt to guide the LLM's behavior, by default single_agent_prompt
 73 |     tools: list, optional
 74 |         The list of tools for the main agent, by default None
 75 |     Returns
 76 |     -------
 77 |     StateGraph
 78 |         The constructed single agent graph
 79 |     """
 80 |     logger.info("Constructing mock agent graph")
 81 |     checkpointer = MemorySaver()
 82 |     if tools is None:
 83 |         tools = [
 84 |             file_to_atomsdata,
 85 |             smiles_to_atomsdata,
 86 |             run_ase,
 87 |             molecule_name_to_smiles,
 88 |             save_atomsdata_to_file,
 89 |             calculator,
 90 |         ]
 91 |     graph_builder = StateGraph(State)
 92 | 
 93 |     graph_builder.add_node(
 94 |         "ChemGraphAgent",
 95 |         lambda state: ChemGraphAgent(state, llm, system_prompt=system_prompt, tools=tools),
 96 |     )
 97 |     graph_builder.add_edge(START, "ChemGraphAgent")
 98 |     graph_builder.add_edge("ChemGraphAgent", END)
 99 | 
100 |     graph = graph_builder.compile(checkpointer=checkpointer)
101 |     logger.info("Mock agent graph construction completed")
102 |     return graph
103 | 


--------------------------------------------------------------------------------
/src/chemgraph/models/calculators/nwchem_calc.py:
--------------------------------------------------------------------------------
 1 | # Main keywords and parameters obtained from https://wiki.fysik.dtu.dk/ase/_modules/ase/calculators/nwchem.html
 2 | # Parameters for NWChem calculator in CompChemAgent
 3 | 
 4 | from typing import Optional, Union, Dict
 5 | from pydantic import BaseModel, Field
 6 | from ase.calculators.nwchem import NWChem
 7 | 
 8 | 
 9 | class NWChemCalc(BaseModel):
10 |     """NWChem quantum chemistry calculator configuration.
11 | 
12 |     This class defines the configuration parameters for NWChem quantum chemistry
13 |     calculations. It supports various quantum chemical methods, basis sets, and
14 |     periodic calculations through the NWChem program.
15 | 
16 |     Parameters
17 |     ----------
18 |     calculator_type : str, optional
19 |         Calculator type. Currently supports only 'nwchem', by default 'nwchem'
20 |     theory : str, optional
21 |         NWChem module to be used. Options: 'dft', 'scf', 'mp2', 'ccsd', 'tce',
22 |         'tddft', 'pspw', 'band', 'paw', by default 'dft'
23 |     xc : str, optional
24 |         Exchange-correlation functional (only applicable for DFT calculations),
25 |         by default 'PBE'
26 |     basis : str or dict, optional
27 |         Basis set to use. Can be a string for all elements or a dictionary
28 |         mapping elements to basis sets, by default '6-31G'
29 |     kpts : tuple or dict, optional
30 |         K-point mesh for periodic calculations, by default None
31 |     directory : str, optional
32 |         Working directory for NWChem calculations, by default '.'
33 |     command : str, optional
34 |         Command to execute NWChem (e.g., 'nwchem PREFIX.nwi > PREFIX.nwo'),
35 |         by default None
36 |     """
37 | 
38 |     calculator_type: str = Field(
39 |         default="nwchem",
40 |         description="Calculator type. Currently supports only 'nwchem'.",
41 |     )
42 |     theory: Optional[str] = Field(
43 |         default="dft",
44 |         description="NWChem module to be used. Options: 'dft', 'scf', 'mp2', 'ccsd', 'tce', 'tddft', 'pspw', 'band', 'paw'.",
45 |     )
46 |     xc: Optional[str] = Field(
47 |         default="PBE",
48 |         description="Exchange-correlation functional (only applicable for DFT calculations).",
49 |     )
50 |     basis: Optional[Union[str, Dict[str, str]]] = Field(
51 |         default="6-31G",
52 |         description="Basis set to use. Can be a string for all elements or a dictionary mapping elements to basis sets.",
53 |     )
54 |     kpts: Optional[Union[tuple, Dict[str, Union[int, str]]]] = Field(
55 |         default=None, description="K-point mesh for periodic calculations."
56 |     )
57 |     directory: str = Field(
58 |         default=".", description="Working directory for NWChem calculations."
59 |     )
60 |     command: Optional[str] = Field(
61 |         default=None,
62 |         description="Command to execute NWChem (e.g., 'nwchem PREFIX.nwi > PREFIX.nwo').",
63 |     )
64 | 
65 |     def get_calculator(self):
66 |         """Get an ASE-compatible NWChem calculator instance.
67 | 
68 |         Returns
69 |         -------
70 |         NWChem
71 |             An ASE-compatible NWChem calculator instance
72 | 
73 |         Raises
74 |         ------
75 |         ValueError
76 |             If an invalid calculator_type is specified
77 |         """
78 |         if self.calculator_type != "nwchem":
79 |             raise ValueError(
80 |                 "Invalid calculator_type. The only valid option is 'nwchem'."
81 |             )
82 | 
83 |         return NWChem(
84 |             theory=self.theory,
85 |             xc=self.xc,
86 |             basis=self.basis,
87 |             kpts=self.kpts,
88 |             directory=self.directory,
89 |             command=self.command,
90 |         )
91 | 


--------------------------------------------------------------------------------
/src/chemgraph/tools/groq_loader.py:
--------------------------------------------------------------------------------
 1 | """Load GROQ models using LangChain."""
 2 | 
 3 | import os
 4 | from getpass import getpass
 5 | from langchain_groq import ChatGroq
 6 | from chemgraph.models.supported_models import supported_groq_models
 7 | from chemgraph.utils.logging_config import setup_logger
 8 | 
 9 | logger = setup_logger(__name__)
10 | 
11 | 
12 | def load_groq_model(
13 |     model_name: str,
14 |     temperature: float,
15 |     api_key: str = None,
16 |     prompt: str = None,
17 |     base_url: str = None,
18 | ) -> ChatGroq:
19 |     """Load a GROQ chat model into LangChain.
20 |     This function loads a GROQ model and configures it for use with LangChain.
21 |     It handles API key management, including prompting for the key if not provided
22 |     or if the provided key is invalid.
23 |     Parameters
24 |     ----------
25 |     model_name : str
26 |         The name of the GROQ chat model to load. See supported_groq_models for list
27 |         of supported models.
28 |     temperature : float
29 |         Controls the randomness of the generated text. Higher values (e.g., 0.8)
30 |         make the output more random, while lower values (e.g., 0.2) make it more
31 |         deterministic.
32 |     api_key : str, optional
33 |         The GROQ API key. If not provided, the function will attempt to retrieve it
34 |         from the environment variable `GROQ_API_KEY`.
35 |     prompt : str, optional
36 |         Custom prompt to use when requesting the API key from the user.
37 |     base_url : str, optional
38 |         Custom base URL for the GROQ API (currently unused but included for consistency).
39 |     Returns
40 |     -------
41 |     ChatGroq
42 |         An instance of LangChain's ChatGroq model.
43 |     Raises
44 |     ------
45 |     ValueError
46 |         If the model name is not in the list of supported models.
47 |     Exception
48 |         If there is an error loading the model or if the API key is invalid.
49 |     Notes
50 |     -----
51 |     The function will:
52 |     1. Check for the API key in the environment variables
53 |     2. Prompt for the key if not found
54 |     3. Validate the model name against supported models
55 |     4. Attempt to load the model
56 |     5. Handle any authentication errors by prompting for a new key
57 |     """
58 | 
59 |     if api_key is None:
60 |         api_key = os.getenv("GROQ_API_KEY")
61 |         if not api_key:
62 |             logger.info("GROQ API key not found in environment variables.")
63 |             api_key = getpass("Please enter your GROQ API key: ")
64 |             os.environ["GROQ_API_KEY"] = api_key
65 | 
66 |     if model_name not in supported_groq_models:
67 |         raise ValueError(
68 |             f"Unsupported model '{model_name}'. Supported models are: {supported_groq_models}."
69 |         )
70 | 
71 |     try:
72 |         logger.info(f"Loading GROQ model: {model_name}")
73 |         llm = ChatGroq(
74 |             model=model_name,
75 |             temperature=temperature,
76 |             api_key=api_key,
77 |             max_tokens=6000,
78 |         )
79 |         # No guarantee that api_key is valid, authentication happens only during invocation
80 |         logger.info(f"Requested model: {model_name}")
81 |         logger.info("GROQ model loaded successfully")
82 |         return llm
83 |     except Exception as e:
84 |         # Can remove this since authentication happens only during invocation
85 |         if "AuthenticationError" in str(e) or "invalid_api_key" in str(e):
86 |             logger.warning("Invalid GROQ API key.")
87 |             api_key = getpass("Please enter a valid GROQ API key: ")
88 |             os.environ["GROQ_API_KEY"] = api_key
89 |             # Retry with new API key
90 |             return load_groq_model(model_name, temperature, api_key, prompt)
91 |         else:
92 |             logger.error(f"Error loading GROQ model: {str(e)}")
93 |             raise
94 |         
95 | 


--------------------------------------------------------------------------------
/src/chemgraph/tools/gemini_loader.py:
--------------------------------------------------------------------------------
 1 | """Load Gemini models using LangChain."""
 2 | 
 3 | import os
 4 | from getpass import getpass
 5 | from langchain_google_genai import ChatGoogleGenerativeAI
 6 | from chemgraph.models.supported_models import supported_gemini_models
 7 | from chemgraph.utils.logging_config import setup_logger
 8 | 
 9 | logger = setup_logger(__name__)
10 | 
11 | 
12 | def load_gemini_model(
13 |     model_name: str,
14 |     temperature: float,
15 |     api_key: str = None,
16 |     prompt: str = None,
17 |     base_url: str = None,
18 | ) -> ChatGoogleGenerativeAI:
19 |     """Load an Gemini chat model into LangChain.
20 | 
21 |     This function loads an Gemini model and configures it for use with LangChain.
22 |     It handles API key management, including prompting for the key if not provided
23 |     or if the provided key is invalid.
24 | 
25 |     Parameters
26 |     ----------
27 |     model_name : str
28 |         The name of the Gemini chat model to load. See supported_gemini_models for list
29 |         of supported models.
30 |     temperature : float
31 |         Controls the randomness of the generated text. Higher values (e.g., 0.8)
32 |         make the output more random, while lower values (e.g., 0.2) make it more
33 |         deterministic.
34 |     api_key : str, optional
35 |         The Google API key. If not provided, the function will attempt to retrieve it
36 |         from the environment variable `GEMINI_API_KEY`.
37 |     prompt : str, optional
38 |         Custom prompt to use when requesting the API key from the user.
39 | 
40 |     Returns
41 |     -------
42 |     ChatGoogleGenerativeAI
43 |         An instance of LangChain's ChatGoogleGenerativeAI model.
44 | 
45 |     Raises
46 |     ------
47 |     ValueError
48 |         If the model name is not in the list of supported models.
49 |     Exception
50 |         If there is an error loading the model or if the API key is invalid.
51 | 
52 |     Notes
53 |     -----
54 |     The function will:
55 |     1. Check for the API key in the environment variables
56 |     2. Prompt for the key if not found
57 |     3. Validate the model name against supported models
58 |     4. Attempt to load the model
59 |     5. Handle any authentication errors by prompting for a new key
60 |     """
61 | 
62 |     if api_key is None:
63 |         api_key = os.getenv("GEMINI_API_KEY")
64 |         if not api_key:
65 |             logger.info("Google API key not found in environment variables.")
66 |             api_key = getpass("Please enter your Google API key: ")
67 |             os.environ["GEMINI_API_KEY"] = api_key
68 | 
69 |     if model_name not in supported_gemini_models:
70 |         raise ValueError(
71 |             f"Unsupported model '{model_name}'. Supported models are: {supported_gemini_models}."
72 |         )
73 | 
74 |     try:
75 |         logger.info(f"Loading Gemini model: {model_name}")
76 |         llm = ChatGoogleGenerativeAI(
77 |             model=model_name,
78 |             temperature=temperature,
79 |             api_key=api_key,
80 |             max_output_tokens=6000,
81 |         )
82 |         # No guarantee that api_key is valid, authentication happens only during invocation
83 |         logger.info(f"Requested model: {model_name}")
84 |         logger.info("Gemini model loaded successfully")
85 |         return llm
86 |     except Exception as e:
87 |         # Can remove this since authentication happens only during invocation
88 |         if "AuthenticationError" in str(e) or "invalid_api_key" in str(e):
89 |             logger.warning("Invalid Google API key.")
90 |             api_key = getpass("Please enter a valid Google API key: ")
91 |             os.environ["GEMINI_API_KEY"] = api_key
92 |             # Retry with new API key
93 |             return load_gemini_model(model_name, temperature, api_key, prompt)
94 |         else:
95 |             logger.error(f"Error loading Google model: {str(e)}")
96 |             raise
97 | 


--------------------------------------------------------------------------------
/scripts/evaluations/run_llm_workflow/Exp12_from_reaction_to_enthalpy/run_llm_workflow.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from chemgraph.agent.llm_agent import ChemGraph
 3 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state
 4 | import argparse
 5 | import datetime
 6 | 
 7 | 
 8 | def get_query(
 9 |     reaction: dict,
10 |     query_name: str = "enthalpy",
11 |     temperature: float = 298.15,
12 |     pressure: float = 101325,
13 |     method: str = "mace_mp",
14 | ) -> str:
15 |     """Get query for LLM.
16 | 
17 |     Returns:
18 |         _type_: _description_
19 |     """
20 |     reactants_str = " + ".join([f"{r['coefficient']} ({r['name']})" for r in reaction["reactants"]])
21 |     products_str = " + ".join([f"{p['coefficient']} ({p['name']})" for p in reaction["products"]])
22 | 
23 |     reaction_equation = f"{reactants_str} -> {products_str}"
24 |     query_dict = {
25 |         "enthalpy": f"Calculate the reaction enthalpy for this reaction: {reaction_equation}",
26 |         "enthalpy_method": f"You are given a chemical reaction: {reaction_equation}. Calculate the enthalpy for this reaction using {method} at {temperature}K.",
27 |         "gibbs_free_energy": f"What is the Gibbs free energy of reaction for {reaction_equation}?",
28 |         "gibbs_free_energy_method": f"What is the Gibbs free energy of reaction for {reaction_equation} using {method}?",
29 |         "gibbs_free_energy_method_temperature": f"What is the Gibbs free energy of reaction for {reaction_equation} using {method} at {temperature}K?",
30 |     }
31 | 
32 |     return query_dict.get(query_name, "Query not found")  # Returns the query or a default message
33 | 
34 | 
35 | def main(n_reactions: int):
36 |     """ """
37 |     # Load SMILES data from the specified JSON file
38 |     combined_data = {}
39 | 
40 |     cca = ChemGraph(
41 |         model_name='gpt-4o-mini',
42 |         workflow_type="single_agent",
43 |         structured_output=True,
44 |         return_option="state",
45 |     )
46 |     with open("reaction_dataset.json", "r") as rf:
47 |         reactions = json.load(rf)
48 | 
49 |     # Iterate through the first n_structures molecules
50 |     for idx, reaction in enumerate(reactions[:n_reactions]):
51 |         print("********************************************")
52 |         print(
53 |             f"REACTION INDEX {reaction['reaction_index']}: REACTION NAME: {reaction['reaction_name']}"
54 |         )
55 |         print("********************************************")
56 | 
57 |         name = reaction["reaction_name"]
58 | 
59 |         query = get_query(
60 |             reaction, query_name="enthalpy_method", method="GFN2-xTB", temperature=400
61 |         )
62 | 
63 |         try:
64 |             state = cca.run(query, config={"configurable": {"thread_id": f"{str(idx)}"}})
65 |         except Exception as e:
66 |             print(e)
67 | 
68 |         llm_workflow = get_workflow_from_state(state)
69 | 
70 |         # Store results in a structured dictionary
71 |         state_data = cca.write_state(config={"configurable": {"thread_id": f"{str(idx)}"}})
72 | 
73 |         combined_data[name] = {"llm_workflow": llm_workflow}
74 |         combined_data[name]["metadata"] = state_data
75 | 
76 |     # Save the results to a JSON file
77 |     timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
78 |     filename = f"llm_workflow_{timestamp}.json"
79 | 
80 |     # Save the results to a JSON file
81 |     with open(filename, "w") as f:
82 |         json.dump(combined_data, f, indent=4)
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     # Parse command-line arguments
87 |     parser = argparse.ArgumentParser(description="Calculate properties of a reaction.")
88 |     parser.add_argument(
89 |         "--n_reactions", type=int, default=10, help="Number of molecules to process (default: 10)"
90 |     )
91 |     args = parser.parse_args()
92 | 
93 |     # Call the main function with parsed arguments
94 |     main(args.n_reactions)
95 | 


--------------------------------------------------------------------------------
/scripts/evaluations/run_llm_workflow/Exp14_from_reaction_to_enthalpy_multiagent/run_llm_workflow.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from chemgraph.agent.llm_agent import ChemGraph
 3 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state
 4 | import argparse
 5 | import datetime
 6 | 
 7 | 
 8 | def get_query(
 9 |     reaction: dict,
10 |     query_name: str = "enthalpy",
11 |     temperature: float = 298,
12 |     pressure: float = 101325,
13 |     method: str = "mace_mp",
14 | ) -> str:
15 |     """Get query for LLM.
16 | 
17 |     Returns:
18 |         _type_: _description_
19 |     """
20 |     reactants_str = " + ".join([f"{r['coefficient']} ({r['name']})" for r in reaction["reactants"]])
21 |     products_str = " + ".join([f"{p['coefficient']} ({p['name']})" for p in reaction["products"]])
22 | 
23 |     reaction_equation = f"{reactants_str} -> {products_str}"
24 |     query_dict = {
25 |         "enthalpy": f"Calculate the reaction enthalpy for this reaction: {reaction_equation}",
26 |         "enthalpy_method": f"You are given a chemical reaction: {reaction_equation}. Calculate the enthalpy for this reaction using {method} at {temperature}K.",
27 |         "gibbs_free_energy": f"What is the Gibbs free energy of reaction for {reaction_equation}?",
28 |         "gibbs_free_energy_method": f"What is the Gibbs free energy of reaction for {reaction_equation} using {method}?",
29 |         "gibbs_free_energy_method_temperature": f"What is the Gibbs free energy of reaction for {reaction_equation} using {method} at {temperature}K?",
30 |     }
31 | 
32 |     return query_dict.get(query_name, "Query not found")  # Returns the query or a default message
33 | 
34 | 
35 | def main(n_reactions: int):
36 |     """ """
37 |     # Load SMILES data from the specified JSON file
38 |     combined_data = {}
39 | 
40 |     cca = ChemGraph(
41 |         model_name='gpt-4o-mini',
42 |         workflow_type="multi_agent",
43 |         structured_output=True,
44 |         return_option="state",
45 |     )
46 |     with open("reaction_dataset.json", "r") as rf:
47 |         reactions = json.load(rf)
48 | 
49 |     # Iterate through the first n_structures molecules
50 |     for idx, reaction in enumerate(reactions[:n_reactions]):
51 |         print("********************************************")
52 |         print(
53 |             f"REACTION INDEX {reaction['reaction_index']}: REACTION NAME: {reaction['reaction_name']}"
54 |         )
55 |         print("********************************************")
56 | 
57 |         name = reaction["reaction_name"]
58 | 
59 |         query = get_query(
60 |             reaction, query_name="enthalpy_method", method="GFN2-xTB", temperature=400
61 |         )
62 | 
63 |         try:
64 |             state = cca.run(query, config={"configurable": {"thread_id": f"{str(idx)}"}})
65 |         except Exception as e:
66 |             print(e)
67 | 
68 |         llm_workflow = get_workflow_from_state(state)
69 | 
70 |         # Store results in a structured dictionary
71 |         state_data = cca.write_state(config={"configurable": {"thread_id": f"{str(idx)}"}})
72 | 
73 |         combined_data[name] = {"llm_workflow": llm_workflow}
74 |         combined_data[name]["metadata"] = state_data
75 | 
76 |     # Save the results to a JSON file
77 |     timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
78 |     filename = f"llm_workflow_{timestamp}.json"
79 | 
80 |     # Save the results to a JSON file
81 |     with open(filename, "w") as f:
82 |         json.dump(combined_data, f, indent=4)
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     # Parse command-line arguments
87 |     parser = argparse.ArgumentParser(description="Calculate properties of a reaction.")
88 |     parser.add_argument(
89 |         "--n_reactions", type=int, default=10, help="Number of molecules to process (default: 10)"
90 |     )
91 |     args = parser.parse_args()
92 | 
93 |     # Call the main function with parsed arguments
94 |     main(args.n_reactions)
95 | 


--------------------------------------------------------------------------------
/scripts/evaluations/run_llm_workflow/Exp13_from_reaction_to_gibbs/run_llm_workflow.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from chemgraph.agent.llm_agent import ChemGraph
 3 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state
 4 | import argparse
 5 | import datetime
 6 | 
 7 | 
 8 | def get_query(
 9 |     reaction: dict,
10 |     query_name: str = "enthalpy",
11 |     temperature: float = 298,
12 |     pressure: float = 101325,
13 |     method: str = "mace_mp",
14 | ) -> str:
15 |     """Get query for LLM.
16 | 
17 |     Returns:
18 |         _type_: _description_
19 |     """
20 |     reactants_str = " + ".join([f"{r['coefficient']} ({r['name']})" for r in reaction["reactants"]])
21 |     products_str = " + ".join([f"{p['coefficient']} ({p['name']})" for p in reaction["products"]])
22 | 
23 |     reaction_equation = f"{reactants_str} -> {products_str}"
24 |     query_dict = {
25 |         "enthalpy": f"Calculate the reaction enthalpy for this reaction: {reaction_equation}",
26 |         "enthalpy_method": f"You are given a chemical reaction: {reaction_equation}. Calculate the enthalpy for this reaction using {method}.",
27 |         "gibbs_free_energy": f"What is the Gibbs free energy of reaction for {reaction_equation}?",
28 |         "gibbs_free_energy_method": f"What is the Gibbs free energy of reaction for {reaction_equation} using {method}?",
29 |         "gibbs_free_energy_method_temperature": f"You are given a chemical reaction: {reaction_equation}. Calculate the Gibbs free energy change (ΔG) for this reaction using {method} at {temperature}K.",
30 |     }
31 | 
32 |     return query_dict.get(query_name, "Query not found")  # Returns the query or a default message
33 | 
34 | 
35 | def main(n_reactions: int):
36 |     """ """
37 |     # Load SMILES data from the specified JSON file
38 |     combined_data = {}
39 | 
40 |     cca = ChemGraph(
41 |         model_name='gpt-4o-mini',
42 |         workflow_type="single_agent",
43 |         structured_output=True,
44 |         return_option="state",
45 |     )
46 | 
47 |     with open("reaction_dataset.json", "r") as rf:
48 |         reactions = json.load(rf)
49 |     # Iterate through the first n_structures molecules
50 |     for idx, reaction in enumerate(reactions[:n_reactions]):
51 |         print("********************************************")
52 |         print(
53 |             f"REACTION INDEX {reaction['reaction_index']}: REACTION NAME: {reaction['reaction_name']}"
54 |         )
55 |         print("********************************************")
56 | 
57 |         name = reaction["reaction_name"]
58 | 
59 |         query = get_query(
60 |             reaction,
61 |             query_name="gibbs_free_energy_method_temperature",
62 |             method="mace_mp",
63 |             temperature=500,
64 |         )
65 |         state = cca.run(query, config={"configurable": {"thread_id": f"{str(idx)}"}})
66 | 
67 |         llm_workflow = get_workflow_from_state(state)
68 | 
69 |         # Store results in a structured dictionary
70 |         state_data = cca.write_state(config={"configurable": {"thread_id": f"{str(idx)}"}})
71 | 
72 |         combined_data[name] = {"llm_workflow": llm_workflow}
73 |         combined_data[name]["metadata"] = state_data
74 | 
75 |     # Save the results to a JSON file
76 |     timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
77 |     filename = f"llm_workflow_{timestamp}.json"
78 | 
79 |     # Save the results to a JSON file
80 |     with open(filename, "w") as f:
81 |         json.dump(combined_data, f, indent=4)
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     # Parse command-line arguments
86 |     parser = argparse.ArgumentParser(description="Calculate properties of a reaction.")
87 |     parser.add_argument(
88 |         "--n_reactions", type=int, default=10, help="Number of molecules to process (default: 10)"
89 |     )
90 |     args = parser.parse_args()
91 | 
92 |     # Call the main function with parsed arguments
93 |     main(args.n_reactions)
94 | 


--------------------------------------------------------------------------------
/notebooks/cif_files/calf-20_pacmof.cif:
--------------------------------------------------------------------------------
 1 | data_image0
 2 | _cell_length_a       8.9138
 3 | _cell_length_b       9.6935
 4 | _cell_length_c       9.4836
 5 | _cell_angle_alpha    90
 6 | _cell_angle_beta     115.895
 7 | _cell_angle_gamma    90
 8 | 
 9 | _symmetry_space_group_name_H-M    "P 1"
10 | _symmetry_int_tables_number       1
11 | 
12 | loop_
13 |   _symmetry_equiv_pos_as_xyz
14 |   'x, y, z'
15 | 
16 | loop_
17 |   _atom_site_label
18 |   _atom_site_occupancy
19 |   _atom_site_fract_x
20 |   _atom_site_fract_y
21 |   _atom_site_fract_z
22 |   _atom_site_thermal_displace_type
23 |   _atom_site_B_iso_or_equiv
24 |   _atom_site_type_symbol
25 |   _atom_site_charge
26 |   Zn1      1.0000 0.17588  0.05771  0.43679  Biso   1.000  Zn  0.734237
27 |   Zn2      1.0000 0.82412  0.55771  0.06321  Biso   1.000  Zn  0.734237
28 |   Zn3      1.0000 0.82412  0.94229  0.56321  Biso   1.000  Zn  0.734237
29 |   Zn4      1.0000 0.17588  0.44229  0.93679  Biso   1.000  Zn  0.734237
30 |   N1       1.0000 0.03080  0.88920  0.36830  Biso   1.000  N  -0.225444
31 |   N2       1.0000 0.96920  0.38920  0.13170  Biso   1.000  N  -0.225444
32 |   N3       1.0000 0.96920  0.11080  0.63170  Biso   1.000  N  -0.225444
33 |   N4       1.0000 0.03080  0.61080  0.86830  Biso   1.000  N  -0.225444
34 |   N5       1.0000 0.90780  0.85250  0.41000  Biso   1.000  N  -0.218757
35 |   N6       1.0000 0.09220  0.35250  0.09000  Biso   1.000  N  -0.218757
36 |   N7       1.0000 0.09220  0.14750  0.59000  Biso   1.000  N  -0.218757
37 |   N8       1.0000 0.90780  0.64750  0.91000  Biso   1.000  N  -0.218757
38 |   N9       1.0000 0.90080  0.70860  0.22590  Biso   1.000  N  -0.303066
39 |   N10      1.0000 0.09920  0.20860  0.27410  Biso   1.000  N  -0.303066
40 |   N11      1.0000 0.09920  0.29140  0.77410  Biso   1.000  N  -0.303066
41 |   N12      1.0000 0.90080  0.79140  0.72590  Biso   1.000  N  -0.303066
42 |   O1       1.0000 0.40980  0.07610  0.61020  Biso   1.000  O  -0.532130
43 |   O2       1.0000 0.59020  0.57610  0.88980  Biso   1.000  O  -0.532130
44 |   O3       1.0000 0.59020  0.92390  0.38980  Biso   1.000  O  -0.532130
45 |   O4       1.0000 0.40980  0.42390  0.11020  Biso   1.000  O  -0.532130
46 |   O5       1.0000 0.67530  0.03070  0.67320  Biso   1.000  O  -0.530638
47 |   O6       1.0000 0.32470  0.53070  0.82680  Biso   1.000  O  -0.530638
48 |   O7       1.0000 0.32470  0.96930  0.32680  Biso   1.000  O  -0.530638
49 |   O8       1.0000 0.67530  0.46930  0.17320  Biso   1.000  O  -0.530638
50 |   C1       1.0000 0.02150  0.80170  0.25880  Biso   1.000  C  0.170903
51 |   C2       1.0000 0.97850  0.30170  0.24120  Biso   1.000  C  0.170903
52 |   C3       1.0000 0.97850  0.19830  0.74120  Biso   1.000  C  0.170903
53 |   C4       1.0000 0.02150  0.69830  0.75880  Biso   1.000  C  0.170903
54 |   H1       1.0000 0.09320  0.80450  0.20860  Biso   1.000  H  0.108210
55 |   H2       1.0000 0.90680  0.30450  0.29140  Biso   1.000  H  0.108210
56 |   H3       1.0000 0.90680  0.19550  0.79140  Biso   1.000  H  0.108210
57 |   H4       1.0000 0.09320  0.69550  0.70860  Biso   1.000  H  0.108210
58 |   C5       1.0000 0.83450  0.74460  0.32320  Biso   1.000  C  0.172635
59 |   C6       1.0000 0.16550  0.24460  0.17680  Biso   1.000  C  0.172635
60 |   C7       1.0000 0.16550  0.25540  0.67680  Biso   1.000  C  0.172635
61 |   C8       1.0000 0.83450  0.75540  0.82320  Biso   1.000  C  0.172635
62 |   H5       1.0000 0.74410  0.69710  0.32890  Biso   1.000  H  0.108121
63 |   H6       1.0000 0.25590  0.19710  0.17110  Biso   1.000  H  0.108121
64 |   H7       1.0000 0.25590  0.30290  0.67110  Biso   1.000  H  0.108121
65 |   H8       1.0000 0.74410  0.80290  0.82890  Biso   1.000  H  0.108121
66 |   C9       1.0000 0.52480  0.03080  0.58150  Biso   1.000  C  0.515930
67 |   C10      1.0000 0.47520  0.53080  0.91850  Biso   1.000  C  0.515930
68 |   C11      1.0000 0.47520  0.96920  0.41850  Biso   1.000  C  0.515930
69 |   C12      1.0000 0.52480  0.46920  0.08150  Biso   1.000  C  0.515930
70 | 


--------------------------------------------------------------------------------
/scripts/evaluations/run_llm_workflow/Exp15_from_reaction_to_gibbs_multi_agent/run_llm_workflow.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from chemgraph.agent.llm_agent import ChemGraph
 3 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state
 4 | import argparse
 5 | import datetime
 6 | 
 7 | 
 8 | def get_query(
 9 |     reaction: dict,
10 |     query_name: str = "enthalpy",
11 |     temperature: float = 298,
12 |     pressure: float = 101325,
13 |     method: str = "mace_mp",
14 | ) -> str:
15 |     """Get query for LLM.
16 | 
17 |     Returns:
18 |         _type_: _description_
19 |     """
20 |     reactants_str = " + ".join([f"{r['coefficient']} ({r['name']})" for r in reaction["reactants"]])
21 |     products_str = " + ".join([f"{p['coefficient']} ({p['name']})" for p in reaction["products"]])
22 | 
23 |     reaction_equation = f"{reactants_str} -> {products_str}"
24 |     query_dict = {
25 |         "enthalpy": f"Calculate the reaction enthalpy for this reaction: {reaction_equation}",
26 |         "enthalpy_method": f"You are given a chemical reaction: {reaction_equation}. Calculate the enthalpy for this reaction using {method} at {temperature}K.",
27 |         "gibbs_free_energy": f"What is the Gibbs free energy of reaction for {reaction_equation}?",
28 |         "gibbs_free_energy_method": f"What is the Gibbs free energy of reaction for {reaction_equation} using {method}?",
29 |         "gibbs_free_energy_method_temperature": f"You are given a chemical reaction: {reaction_equation}. Calculate the Gibbs free energy change for this reaction using {method} at {temperature}K.",
30 |     }
31 | 
32 |     return query_dict.get(query_name, "Query not found")  # Returns the query or a default message
33 | 
34 | 
35 | def main(n_reactions: int):
36 |     """ """
37 |     # Load SMILES data from the specified JSON file
38 |     combined_data = {}
39 | 
40 |     cca = ChemGraph(
41 |         model_name='gpt-4o-mini',
42 |         workflow_type="manager_worker",
43 |         structured_output=True,
44 |         return_option="state",
45 |     )
46 |     with open("reaction_dataset.json", "r") as rf:
47 |         reactions = json.load(rf)
48 | 
49 |     # Iterate through the first n_structures molecules
50 |     for idx, reaction in enumerate(reactions[:n_reactions]):
51 |         print("********************************************")
52 |         print(
53 |             f"REACTION INDEX {reaction['reaction_index']}: REACTION NAME: {reaction['reaction_name']}"
54 |         )
55 |         print("********************************************")
56 | 
57 |         name = reaction["reaction_name"]
58 | 
59 |         query = get_query(
60 |             reaction,
61 |             query_name="gibbs_free_energy_method_temperature",
62 |             method="mace_mp",
63 |             temperature=500,
64 |         )
65 | 
66 |         try:
67 |             state = cca.run(query, config={"configurable": {"thread_id": f"{str(idx)}"}})
68 |         except Exception as e:
69 |             print(e)
70 | 
71 |         llm_workflow = get_workflow_from_state(state)
72 | 
73 |         # Store results in a structured dictionary
74 |         state_data = cca.write_state(config={"configurable": {"thread_id": f"{str(idx)}"}})
75 | 
76 |         combined_data[name] = {"llm_workflow": llm_workflow}
77 |         combined_data[name]["metadata"] = state_data
78 | 
79 |     # Save the results to a JSON file
80 |     timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
81 |     filename = f"llm_workflow_{timestamp}.json"
82 | 
83 |     # Save the results to a JSON file
84 |     with open(filename, "w") as f:
85 |         json.dump(combined_data, f, indent=4)
86 | 
87 | 
88 | if __name__ == "__main__":
89 |     # Parse command-line arguments
90 |     parser = argparse.ArgumentParser(description="Calculate properties of a reaction.")
91 |     parser.add_argument(
92 |         "--n_reactions", type=int, default=10, help="Number of molecules to process (default: 10)"
93 |     )
94 |     args = parser.parse_args()
95 | 
96 |     # Call the main function with parsed arguments
97 |     main(args.n_reactions)
98 | 


--------------------------------------------------------------------------------
/src/chemgraph/models/agent_response.py:
--------------------------------------------------------------------------------
  1 | from pydantic import BaseModel, Field
  2 | from typing import Union, Optional
  3 | from chemgraph.models.atomsdata import AtomsData
  4 | 
  5 | 
  6 | class VibrationalFrequency(BaseModel):
  7 |     """
  8 |     Schema for storing vibrational frequency results from a simulation.
  9 | 
 10 |     Attributes
 11 |     ----------
 12 |     frequency_cm1 : list[str]
 13 |         List of vibrational frequencies in inverse centimeters (cm⁻¹).
 14 |         Each entry is a string representation of the frequency value.
 15 |     """
 16 | 
 17 |     frequency_cm1: list[str] = Field(
 18 |         ...,
 19 |         description="List of vibrational frequencies in cm-1.",
 20 |     )
 21 | 
 22 | class IRSpectrum(BaseModel):
 23 |     """
 24 |     Schema for storing vibrational frequency  and intensities from a simulation.
 25 | 
 26 |     Attributes
 27 |     ----------
 28 |     frequency_cm1 : list[str]
 29 |         List of vibrational frequencies in inverse centimeters (cm⁻¹).
 30 |         Each entry is a string representation of the frequency value.
 31 |     intensity : list[str]
 32 |         List of vibrational intensities.
 33 |         Each entry is a string representation of the intensity value.
 34 |     """
 35 | 
 36 |     frequency_cm1: list[str] = Field(
 37 |         ...,
 38 |         description="List of vibrational frequencies in cm-1.",
 39 |     )
 40 | 
 41 |     intensity: list[str] = Field(
 42 |         ...,
 43 |         description="List of intensities in D/Å^2 amu^-1.",
 44 |     )
 45 | 
 46 |     plot: Optional[str] = None   # base64 PNG image
 47 | 
 48 | 
 49 | class InfraredSpectrum(BaseModel):
 50 |     """
 51 |     Schema for calculating infrared spectrum from a simulation.
 52 | 
 53 |     Attributes
 54 |     ----------
 55 |     frequency_spec_cm1 : list[str]
 56 |         List of range of frequencies in inverse centimeters (cm⁻¹)
 57 |         Each entry is a string representation of the frequency value.
 58 |     intensity_spec_D2A2amu1 : list[str]
 59 |         List of range of intensities in (D/Å)^2 amu⁻¹
 60 |         Each entry is a string representation of the intensity value.
 61 |     """
 62 |     frequency_spec_cm1: list[str] = Field(
 63 |         ...,
 64 |         description="Range of frequencies for plotting spectrum in cm-1.",
 65 |     )
 66 |     
 67 |     intensity_spec_D2A2amu1: list[str] = Field(
 68 |         ...,
 69 |         description="Values of intensities for plotting spectrum in (D/Å)^2 amu^-1.",
 70 |     )
 71 | 
 72 | class ScalarResult(BaseModel):
 73 |     """
 74 |     Schema for storing a scalar numerical result from a simulation or calculation.
 75 | 
 76 |     Attributes
 77 |     ----------
 78 |     value : float
 79 |         The numerical value of the scalar result (e.g., 1.23).
 80 |     property : str
 81 |         The name of the physical or chemical property represented (e.g., 'enthalpy', 'Gibbs free energy').
 82 |     unit : str
 83 |         The unit associated with the result (e.g., 'eV', 'kJ/mol').
 84 |     """
 85 | 
 86 |     value: float = Field(..., description="Scalar numerical result like enthalpy")
 87 |     property: str = Field(
 88 |         ...,
 89 |         description="Name of the property, e.g. 'enthalpy', 'Gibbs free energy'",
 90 |     )
 91 |     unit: str = Field(..., description="Unit of the result, e.g. 'eV'")
 92 | 
 93 | 
 94 | class ResponseFormatter(BaseModel):
 95 |     """Defined structured output to the user."""
 96 | 
 97 |     answer: Union[
 98 |         str,
 99 |         ScalarResult,
100 |         VibrationalFrequency,
101 |         IRSpectrum,
102 |         AtomsData,
103 |     ] = Field(
104 |         description=(
105 |             "Structured answer to the user's query. Use:\n"
106 |             "1. `str` for general or explanatory responses or SMILES string.\n"
107 |             "2. `VibrationalFrequency` for vibrational frequencies.\n"
108 |             "3. `ScalarResult` for single numerical properties (e.g. enthalpy).\n"
109 |             "4. `AtomsData` for atomic geometries (XYZ coordinate, etc.) and optimized structures."
110 |             "5. `InfraredSpectrum` for calculating infrared spectra."
111 |         )
112 |     )
113 | 


--------------------------------------------------------------------------------
/src/chemgraph/prompt/single_agent_prompt.py:
--------------------------------------------------------------------------------
 1 | single_agent_prompt = """You are an expert in computational chemistry, using advanced tools to solve complex problems.
 2 | 
 3 | Instructions:
 4 | 1. Extract all relevant inputs from the user's query, such as SMILES strings, molecule names, methods, software, properties, and conditions.
 5 | 2. If a tool is needed, call it using the correct schema.
 6 | 3. Base all responses strictly on actual tool outputs—never fabricate results, coordinates or SMILES string.
 7 | 4. Review previous tool outputs. If they indicate failure, retry the tool with adjusted inputs if possible.
 8 | 5. Use available simulation data directly. If data is missing, clearly state that a tool call is required.
 9 | 6. If no tool call is needed, respond using factual domain knowledge.
10 | """
11 | """
12 | formatter_prompt = You are an agent that formats responses based on user intent. You must select the correct output type based on the content of the result:
13 | 
14 | 1. Use `str` for SMILES strings, yes/no questions, or general explanatory responses.
15 | 2. Use `AtomsData` for molecular structures or atomic geometries (e.g., atomic positions, element lists, or 3D coordinates).
16 | 3. Use `VibrationalFrequency` for vibrational frequency data. This includes one or more vibrational modes, typically expressed in units like cm⁻¹. 
17 |    - IMPORTANT: Do NOT use `ScalarResult` for vibrational frequencies. Vibrational data is a list or array of values and requires `VibrationalFrequency`.
18 | 4. Use `IRSpectrum` for vibrational frequency and intensities data and IR spectrum plot.
19 | 5. Use `ScalarResult` (float) only for scalar thermodynamic or energetic quantities such as:
20 |    - Enthalpy
21 |    - Entropy
22 |    - Gibbs free energy
23 | 5. Use `InfraredSpectrum` for infrared (also known as IR) spectrum data. This includes a range of frequencies, typically expressed in units like cm⁻¹, and a range of intensities, typically expressed in units like (D/Å)^2 amu^-1.
24 |    - IMPORTANT: Do NOT use `ScalarResult` for frequencies and intensities. Spectral data is a list or array of values and requires `InfraredSpectrum`.
25 | 
26 | Additional guidance:
27 | - Always read the user’s intent carefully to determine whether the requested quantity is a **list of values** (frequencies) or a **single scalar**.
28 | """
29 | 
30 | formatter_prompt = """You are an agent responsible for formatting the final output based on both the user’s intent and the actual results from prior agents. Your top priority is to accurately extract and interpret **the correct values from previous agent outputs** — do not fabricate or infer values beyond what has been explicitly provided.
31 | 
32 | Follow these rules for selecting the output type:
33 | 
34 | 1. Use `str` for:
35 |    - SMILES strings
36 |    - Yes/No questions
37 |    - General explanatory or descriptive responses
38 | 
39 | 2. Use `AtomsData` if the result contains:
40 |    - Atomic positions
41 |    - Element numbers or symbols
42 |    - Cell dimensions
43 |    - Any representation of molecular structure or geometry
44 | 
45 | 3. Use `VibrationalFrequency` for vibrational mode outputs:
46 |    - Must contain a list or array of frequencies (typically in cm⁻¹)
47 |    - Do **not** use `ScalarResult` for these — frequencies are not single-valued
48 | 
49 | 4. Use `ScalarResult` only for a single numeric value representing:
50 |    - Enthalpy
51 |    - Entropy
52 |    - Gibbs free energy
53 |    - Any other scalar thermodynamic or energetic quantity
54 | 
55 | Additional instructions:
56 | - Carefully check that the values you format are present in the **actual output of prior tools or agents**.
57 | - Pay close attention to whether the desired result is a **list vs. a scalar**, and choose the correct format accordingly.
58 | """
59 | 
60 | report_prompt = """You are an agent responsible for generating an html report based on the results of a computational chemistry simulation.
61 | 
62 | Instructions:
63 | - Use generate_html tool to generate the report.
64 | - Make sure the input to the generate_html tool is a valid ASEOutputSchema object.
65 | - Include all the information from the ASEOutputSchema object when invoking the generate_html tool.
66 | """
67 | 


--------------------------------------------------------------------------------
/docs/example_usage.md:
--------------------------------------------------------------------------------
 1 | !!! note
 2 |       Before exploring example usage in the `notebooks/` directory, ensure you have specified the necessary API tokens in your environment. 
 3 | 
 4 | === "OpenAI API Key"
 5 |       1. Log in to your OpenAI account at the OpenAI Platform website. If you don't have an account, you'll need to create one first.
 6 | 
 7 |       2. Navigate to the API keys section. You can find this by clicking on your profile icon in the top-right corner and selecting "API keys."
 8 | 
 9 |       3. Click the + Create new secret key button.
10 | 
11 |       4. Give your key a descriptive name (e.g., "ChemGraph").
12 | 
13 |       5. Click Create secret key. A new key will be generated.
14 | 
15 |       6. Copy the key and save it in a secure location. You will not be able to see it again after this step.
16 | 
17 |       7. Set the key in your environment using the command provided in the instructions:
18 |          ```bash
19 |          export OPENAI_API_KEY="your_api_key_here"  # On Unix or macOS
20 |          setx OPENAI_API_KEY "your_api_key_here"  # On Windows
21 |          ```
22 |       8. Restart your terminal or IDE to ensure the environment variable is loaded.
23 | 
24 | === "Anthropic API Key"
25 |       1. Sign up or log in to your Anthropic account at the [Anthropic console](https://console.anthropic.com/).
26 | 
27 |       2. In the left-hand navigation menu, select API Keys.
28 | 
29 |       3. Click on the option to create a new API key.
30 | 
31 |       4. Provide a name for your API key (e.g., "ChemGraph").
32 | 
33 |       5. Click Create Key again.
34 | 
35 |       6. Copy the generated key and store it securely, as you may not be able to view it again.
36 | 
37 |       7. Set the key in your environment using the command provided in the instructions:
38 |          ```bash
39 |          export ANTHROPIC_API_KEY="your_api_key_here"  # On Unix or macOS
40 |          setx ANTHROPIC_API_KEY "your_api_key_here"  # On Windows
41 |          ```
42 |       8. Restart your terminal or IDE to ensure the environment variable is loaded.
43 | 
44 | === "Google AI Studio (Gemini) API Key"
45 |       1. Go to the Google AI Studio website at [Google AI Studio](https://ai.google.com/studio) and sign in with your Google account.
46 | 
47 |       2. In the left-hand menu, select Get API key.
48 | 
49 |       3. Click the Create API key in new project button. A new key will be instantly generated.
50 | 
51 |       4. Copy the API key by clicking the copy icon next to it.
52 | 
53 |       5. Set the key as an environment variable:
54 |          ```bash
55 |          export GOOGLE_API_KEY="your_api_key_here"  # On Unix or macOS
56 |          setx GOOGLE_API_KEY "your_api_key_here"  # On Windows
57 |          ```
58 |       6. Restart your terminal or IDE to ensure the environment variable is loaded.
59 | 
60 | ???+ info "**Explore Example Notebooks**"
61 |       Navigate to the `notebooks/` directory to explore various example notebooks demonstrating different capabilities of ChemGraph.
62 | 
63 |       - **[Single-Agent System with MACE](https://github.com/argonne-lcf/ChemGraph/blob/main/notebooks/Demo-multi_agent.ipynb)**: This notebook demonstrates how a single agent can utilize multiple tools with MACE/xTB support.
64 | 
65 |       - **[Single-Agent System with UMA](https://github.com/argonne-lcf/ChemGraph/blob/main/notebooks/Demo_single_agent_UMA.ipynb)**: This notebook demonstrates how a single agent can utilize multiple tools with UMA support.
66 | 
67 |       - **[Multi-Agent System](https://github.com/argonne-lcf/ChemGraph/blob/main/notebooks/Demo-multi_agent.ipynb)**: This notebook demonstrates a multi-agent setup where different agents (Planner, Executor and Aggregator) handle various tasks exemplifying the collaborative potential of ChemGraph.
68 | 
69 |       - **[Single-Agent System with gRASPA](https://github.com/argonne-lcf/ChemGraph/blob/main/notebooks/Demo_graspa_agent.ipynb)**: This notebook provides a sample guide on executing a gRASPA simulation using a single agent. For gRASPA-related installation instructions, visit the [gRASPA GitHub repository](https://github.com/snurr-group/gRASPA). The notebook's functionality has been validated on a single compute node at ALCF Polaris.
70 | 


--------------------------------------------------------------------------------
/src/ui/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Configuration management for ChemGraph Streamlit app.
  3 | """
  4 | 
  5 | import toml
  6 | import os
  7 | from typing import Dict, Any
  8 | 
  9 | 
 10 | def load_config(config_path: str = "config.toml") -> Dict[str, Any]:
 11 |     """Load configuration from TOML file."""
 12 |     try:
 13 |         if os.path.exists(config_path):
 14 |             with open(config_path, "r") as f:
 15 |                 config = toml.load(f)
 16 |                 # Validate configuration structure
 17 |                 default_config = get_default_config()
 18 | 
 19 |                 # Ensure all required sections exist
 20 |                 for section in ["general", "api", "chemistry", "output"]:
 21 |                     if section not in config:
 22 |                         config[section] = default_config[section]
 23 |                     elif isinstance(config[section], dict) and isinstance(
 24 |                         default_config[section], dict
 25 |                     ):
 26 |                         # Merge missing keys from default
 27 |                         for key, value in default_config[section].items():
 28 |                             if key not in config[section]:
 29 |                                 config[section][key] = value
 30 | 
 31 |                 return config
 32 |         else:
 33 |             # Create default configuration file if it doesn't exist
 34 |             default_config = get_default_config()
 35 |             save_config(default_config, config_path)
 36 |             return default_config
 37 |     except Exception as e:
 38 |         print(f"Error loading configuration: {e}")
 39 |         return get_default_config()
 40 | 
 41 | 
 42 | def save_config(config: Dict[str, Any], config_path: str = "config.toml") -> bool:
 43 |     """Save configuration to TOML file."""
 44 |     try:
 45 |         with open(config_path, "w") as f:
 46 |             toml.dump(config, f)
 47 |         return True
 48 |     except Exception as e:
 49 |         print(f"Error saving configuration: {e}")
 50 |         return False
 51 | 
 52 | 
 53 | def get_default_config() -> Dict[str, Any]:
 54 |     """Return default configuration."""
 55 |     return {
 56 |         "general": {
 57 |             "model": "gpt-4o-mini",
 58 |             "workflow": "single_agent",
 59 |             "output": "state",
 60 |             "structured": False,
 61 |             "report": True,
 62 |             "thread": 1,
 63 |             "recursion_limit": 20,
 64 |             "verbose": False,
 65 |         },
 66 |         "api": {
 67 |             "openai": {"base_url": "https://api.openai.com/v1", "timeout": 30},
 68 |             "anthropic": {"base_url": "https://api.anthropic.com", "timeout": 30},
 69 |             "google": {
 70 |                 "base_url": "https://generativelanguage.googleapis.com/v1beta",
 71 |                 "timeout": 30,
 72 |             },
 73 |             "local": {"base_url": "http://localhost:11434", "timeout": 60},
 74 |         },
 75 |         "chemistry": {
 76 |             "optimization": {"method": "BFGS", "fmax": 0.05, "steps": 200},
 77 |             "calculators": {"default": "mace_mp", "fallback": "emt"},
 78 |         },
 79 |         "output": {
 80 |             "files": {
 81 |                 "directory": "./chemgraph_output",
 82 |                 "formats": ["xyz", "json", "html"],
 83 |             },
 84 |             "visualization": {"enable_3d": True, "viewer": "py3dmol"},
 85 |         },
 86 |     }
 87 | 
 88 | 
 89 | def flatten_config(config: Dict[str, Any]) -> Dict[str, Any]:
 90 |     """Flatten nested configuration for easier access."""
 91 |     flattened = {}
 92 | 
 93 |     # Handle general settings
 94 |     if "general" in config:
 95 |         flattened.update(config["general"])
 96 | 
 97 |     # Handle other sections
 98 |     for section in ["api", "chemistry", "output"]:
 99 |         if section in config:
100 |             for key, value in config[section].items():
101 |                 if isinstance(value, dict):
102 |                     for subkey, subvalue in value.items():
103 |                         flattened[f"{section}_{key}_{subkey}"] = subvalue
104 |                 else:
105 |                     flattened[f"{section}_{key}"] = value
106 | 
107 |     return flattened
108 | 


--------------------------------------------------------------------------------
/docs/running_local_models.md:
--------------------------------------------------------------------------------
 1 | !!! note
 2 |     This section describes how to set up and run local language models using the vLLM inference server.
 3 | 
 4 | ### Inference Backend Setup (Remote/Local)
 5 | 
 6 | #### Virtual Python Environment
 7 | All instructions below must be executed within a Python virtual environment. Ensure the virtual environment uses the same Python version as your project (e.g., Python 3.11).
 8 | 
 9 | **Example 1: Using conda**
10 | ```bash
11 | conda create -n vllm-env python=3.11 -y
12 | conda activate vllm-env
13 | ```
14 | 
15 | **Example 2: Using python venv**
16 | ```bash
17 | python3.11 -m venv vllm-env
18 | source vllm-env/bin/activate  # On Windows use `vllm-env\\Scripts\\activate`
19 | ```
20 | 
21 | #### Install Inference Server (vLLM)
22 | vLLM is recommended for serving many transformer models efficiently.
23 | 
24 | **Basic vLLM installation from source:**
25 | Make sure your virtual environment is activated.
26 | ```bash
27 | # Ensure git is installed
28 | git clone https://github.com/vllm-project/vllm.git
29 | cd vllm
30 | pip install -e .
31 | ```
32 | For specific hardware acceleration (e.g., CUDA, ROCm), refer to the [official vLLM installation documentation](https://docs.vllm.ai/en/latest/getting_started/installation.html).
33 | 
34 | #### Running the vLLM Server (Standalone)
35 | 
36 | A script is provided at `scripts/run_vllm_server.sh` to help start a vLLM server with features like logging, retry attempts, and timeout. This is useful for running vLLM outside of Docker Compose, for example, directly on a machine with GPU access.
37 | 
38 | **Before running the script:**
39 | 1.  Ensure your vLLM Python virtual environment is activated.
40 |     ```bash
41 |     # Example: if you used conda
42 |     # conda activate vllm-env 
43 |     # Example: if you used python venv
44 |     # source path/to/your/vllm-env/bin/activate
45 |     ```
46 | 2.  Make the script executable:
47 |     ```bash
48 |     chmod +x scripts/run_vllm_server.sh
49 |     ```
50 | 
51 | **To run the script:**
52 | 
53 | ```bash
54 | ./scripts/run_vllm_server.sh [MODEL_IDENTIFIER] [PORT] [MAX_MODEL_LENGTH]
55 | ```
56 | 
57 | -   `[MODEL_IDENTIFIER]` (optional): The Hugging Face model identifier. Defaults to `facebook/opt-125m`.
58 | -   `[PORT]` (optional): The port for the vLLM server. Defaults to `8001`.
59 | -   `[MAX_MODEL_LENGTH]` (optional): The maximum model length. Defaults to `4096`.
60 | 
61 | **Example:**
62 | ```bash
63 | ./scripts/run_vllm_server.sh meta-llama/Meta-Llama-3-8B-Instruct 8001 8192
64 | ```
65 | 
66 | ???+ info "**Important Note on Gated Models (e.g., Llama 3):**"
67 |     - Many models, such as those from the Llama family by Meta, are gated and require you to accept their terms of use on Hugging Face and use an access token for download. 
68 | 
69 |     - To use such models with vLLM (either via the script or Docker Compose):
70 |         1. **Hugging Face Account and Token**: Ensure you have a Hugging Face account and have generated an access token with `read` permissions. You can find this in your Hugging Face account settings under "Access Tokens".
71 |         2.  **Accept Model License**: Navigate to the Hugging Face page of the specific model you want to use (e.g., `meta-llama/Meta-Llama-3-8B-Instruct`) and accept its license/terms if prompted.
72 |         3.  **Environment Variables**: Before running the vLLM server (either via the script or `docker-compose up`), you need to set the following environment variables in your terminal session or within your environment configuration (e.g., `.bashrc`, `.zshrc`, or by passing them to Docker Compose if applicable):
73 |             ```bash
74 |             export HF_TOKEN="your_hugging_face_token_here"
75 |             # Optional: Specify a directory for Hugging Face to download models and cache.
76 |             # export HF_HOME="/path/to/your/huggingface_cache_directory"
77 |             ```
78 |             vLLM will use these environment variables to authenticate with Hugging Face and download the model weights.
79 | 
80 |     - The script will:
81 |         - Attempt to start the vLLM OpenAI-compatible API server.
82 |         - Log output to a file in the `logs/` directory (created if it doesn't exist at the project root).
83 |         - The server runs in the background via `nohup`.
84 | 
85 |     - This standalone script is an alternative to running vLLM via Docker Compose and is primarily for users who manage their vLLM instances directly.


--------------------------------------------------------------------------------
/docs/streamlit_web_interface.md:
--------------------------------------------------------------------------------
 1 | !!! note
 2 |       ChemGraph includes a **Streamlit web interface** that provides an intuitive, chat-based UI for interacting with computational chemistry agents. The interface supports 3D molecular visualization, conversation history, and easy access to various ChemGraph workflows.
 3 | 
 4 | ### Features
 5 | 
 6 | - **🧪 Interactive Chat Interface**: Natural language queries for computational chemistry tasks
 7 | - **🧬 3D Molecular Visualization**: Interactive molecular structure display using `stmol` and `py3Dmol`
 8 | - **📊 Report Integration**: Embedded HTML reports from computational calculations
 9 | - **💾 Data Export**: Download molecular structures as XYZ or JSON files
10 | - **🔧 Multiple Workflows**: Support for single-agent, multi-agent, Python REPL, and gRASPA workflows
11 | - **🎨 Modern UI**: Clean, responsive interface with conversation bubbles and molecular properties display
12 | 
13 | ### Installation Requirements
14 | 
15 | The Streamlit UI dependencies are included by default when you install ChemGraph:
16 | 
17 | ```bash
18 | # Install ChemGraph (includes UI dependencies)
19 | pip install -e .
20 | ```
21 | 
22 | **Alternative Installation Options:**
23 | ```bash
24 | # Install only UI dependencies separately (if needed)
25 | pip install -e ".[ui]"
26 | 
27 | # Install with UMA support (separate environment recommended)
28 | pip install -e ".[uma]"
29 | ```
30 | 
31 | ### Running the Streamlit Interface
32 | 
33 | 1. **Set up your API keys** (same as for notebooks):
34 |    ```bash
35 |    export OPENAI_API_KEY="your_openai_api_key_here"
36 |    export ANTHROPIC_API_KEY="your_anthropic_api_key_here"
37 |    ```
38 | 
39 | 2. **Launch the Streamlit app**:
40 |    ```bash
41 |    streamlit run ui/app.py
42 |    ```
43 | 
44 | 3. **Access the interface**: Open your browser to `http://localhost:8501`
45 | 
46 | ### Using the Interface
47 | 
48 | #### Configuration
49 | - **Model Selection**: Choose from GPT-4o, GPT-4o-mini, or Claude models
50 | - **Workflow Type**: Select single-agent, multi-agent, Python REPL, or gRASPA workflows
51 | 
52 | 
53 | #### Interaction
54 | 1. **Initialize Agent**: Click "Initialize Agent" in the sidebar to set up your ChemGraph instance
55 | 2. **Ask Questions**: Use the text area to enter computational chemistry queries
56 | 3. **View Results**: See responses in chat bubbles with automatic structure detection
57 | 4. **3D Visualization**: When molecular structures are detected, they're automatically displayed in 3D
58 | 5. **Download Data**: Export structures and calculation results directly from the interface
59 | 
60 | #### Example Queries
61 | - "What is the SMILES string for caffeine?"
62 | - "Optimize the geometry of water molecule using DFT"
63 | - "Calculate the single point energy of methane and show the structure"
64 | - "Generate the structure of aspirin and calculate its vibrational frequencies"
65 | 
66 | #### Molecular Visualization
67 | The interface automatically detects molecular structure data in agent responses and provides:
68 | - **Interactive 3D Models**: Multiple visualization styles (ball & stick, sphere, stick, wireframe)
69 | - **Structure Information**: Chemical formula, composition, mass, center of mass
70 | - **Export Options**: Download as XYZ files or JSON data
71 | - **Fallback Display**: Table view when 3D visualization is unavailable
72 | 
73 | #### Conversation Management
74 | - **History Display**: All queries and responses are preserved in conversation bubbles
75 | - **Structure Detection**: Molecular structures are automatically extracted and visualized
76 | - **Report Integration**: HTML reports from calculations are embedded directly in the interface
77 | - **Debug Information**: Expandable sections show detailed message processing information
78 | 
79 | ### Troubleshooting
80 | 
81 | **3D Visualization Issues:**
82 | - Ensure `stmol` is installed: `pip install stmol`
83 | - If 3D display fails, the interface falls back to table/text display
84 | - Check browser compatibility for WebGL support
85 | 
86 | **Agent Initialization:**
87 | - Verify API keys are set correctly
88 | - Check that ChemGraph package is installed: `pip install -e .`
89 | - Ensure all dependencies are available in your environment
90 | 
91 | **Performance:**
92 | - For large molecular systems, visualization may take longer to load
93 | - Use the refresh button if the interface becomes unresponsive
94 | - Clear conversation history to improve performance with many queries
95 | 


--------------------------------------------------------------------------------
/.github/workflows/conda-tests.yml:
--------------------------------------------------------------------------------
  1 | name: Conda Tests
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [ main ]
  6 |   pull_request:
  7 |     branches: [ main ]
  8 | 
  9 | jobs:
 10 |   test-conda-mace:
 11 |     runs-on: ubuntu-latest
 12 |     strategy:
 13 |       matrix:
 14 |         python-version: ["3.10", "3.11", "3.12"]
 15 | 
 16 |     steps:
 17 |     - uses: actions/checkout@v4
 18 |     
 19 |     - name: Set up Python ${{ matrix.python-version }}
 20 |       uses: actions/setup-python@v5
 21 |       with:
 22 |         python-version: ${{ matrix.python-version }}
 23 |     
 24 |     - name: Set up Conda
 25 |       uses: conda-incubator/setup-miniconda@v3
 26 |       with:
 27 |         miniconda-version: "latest"
 28 |         python-version: ${{ matrix.python-version }}
 29 |         activate-environment: chemgraph-mace
 30 |         auto-activate-base: false
 31 |     
 32 |     - name: Install Conda dependencies for MACE
 33 |       shell: bash -l {0}
 34 |       run: |
 35 |         conda install -c conda-forge nwchem -y
 36 |         conda install -c conda-forge mamba -y
 37 |     
 38 |     - name: Install ChemGraph with MACE support
 39 |       shell: bash -l {0}
 40 |       run: |
 41 |         pip install -e .
 42 |         pip install pytest
 43 |     
 44 |     - name: Run tests with MACE
 45 |       shell: bash -l {0}
 46 |       run: |
 47 |         python -m pytest tests/ -v
 48 | 
 49 |   # test-conda-uma:
 50 |   #   runs-on: ubuntu-latest
 51 |   #   strategy:
 52 |   #     matrix:
 53 |   #       python-version: ["3.10", "3.11", "3.12"]
 54 | 
 55 |   #   steps:
 56 |   #   - uses: actions/checkout@v4
 57 |     
 58 |   #   - name: Set up Python ${{ matrix.python-version }}
 59 |   #     uses: actions/setup-python@v5
 60 |   #     with:
 61 |   #       python-version: ${{ matrix.python-version }}
 62 |     
 63 |   #   - name: Set up Conda
 64 |   #     uses: conda-incubator/setup-miniconda@v3
 65 |   #     with:
 66 |   #       miniconda-version: "latest"
 67 |   #       python-version: ${{ matrix.python-version }}
 68 |   #       activate-environment: chemgraph-uma
 69 |   #       auto-activate-base: false
 70 |     
 71 |   #   - name: Install Conda dependencies for UMA
 72 |   #     shell: bash -l {0}
 73 |   #     run: |
 74 |   #       conda install -c conda-forge nwchem -y
 75 | 
 76 |   #   - name: Temporarily modify pyproject.toml for UMA e3nn compatibility
 77 |   #     shell: bash -l {0}
 78 |   #     run: |
 79 |   #       echo "Backing up pyproject.toml to pyproject.toml.original..."
 80 |   #       cp pyproject.toml pyproject.toml.original
 81 |   #       echo "Commenting out mace-torch from pyproject.toml..."
 82 |   #       # This sed command finds lines starting with optional whitespace,
 83 |   #       # then "mace-torch>=0.3.13", and prepends a '#' to the matched line.
 84 |   #       sed -i 's/^[[:space:]]*"mace-torch>=0.3.13",/#&/' pyproject.toml
 85 |   #       echo "pyproject.toml after modification:"
 86 |   #       cat pyproject.toml
 87 |     
 88 |   #   - name: Install ChemGraph with UMA support
 89 |   #     shell: bash -l {0}
 90 |   #     env:
 91 |   #       HF_TOKEN: ${{ secrets.HF_TOKEN }}
 92 |   #     run: |
 93 |   #       pip install -e ".[uma]"
 94 |   #       pip install pytest
 95 |   #       pip install huggingface-hub # Ensure huggingface-cli is available
 96 | 
 97 |   #   - name: Authenticate with Hugging Face CLI
 98 |   #     shell: bash -l {0}
 99 |   #     env:
100 |   #       HF_TOKEN: ${{ secrets.HF_TOKEN }}
101 |   #     run: |
102 |   #       echo "Attempting Hugging Face CLI login..."
103 |   #       huggingface-cli login --token $HF_TOKEN
104 |   #       echo "Login attempted. Verifying whoami..."
105 |   #       huggingface-cli whoami
106 |     
107 |   #   - name: Run tests with UMA
108 |   #     shell: bash -l {0}
109 |   #     env:
110 |   #       HF_TOKEN: ${{ secrets.HF_TOKEN }}
111 |   #     run: |
112 |   #       python -m pytest tests/ -v 
113 |     
114 |   #   - name: Restore pyproject.toml
115 |   #     shell: bash -l {0}
116 |   #     if: always() # Ensures this step runs even if previous steps fail
117 |   #     run: |
118 |   #       echo "Restoring pyproject.toml from pyproject.toml.original..."
119 |   #       if [ -f pyproject.toml.original ]; then
120 |   #         mv pyproject.toml.original pyproject.toml
121 |   #         echo "pyproject.toml restored."
122 |   #       else
123 |   #         echo "Backup pyproject.toml.original not found. Cannot restore."
124 |   #       fi 


--------------------------------------------------------------------------------
/scripts/evaluations/mock_llm/mock_eval.py:
--------------------------------------------------------------------------------
  1 | """Module to evaluate LLM performance on tool-calling workflows."""
  2 | 
  3 | import pprint
  4 | import json
  5 | import datetime
  6 | import argparse
  7 | from langchain_core.utils.function_calling import convert_to_openai_function
  8 | from chemgraph.agent.llm_agent import ChemGraph
  9 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state
 10 | from chemgraph.utils.tool_call_eval import multi_function_checker_with_order
 11 | from chemgraph.tools.cheminformatics_tools import (
 12 |     molecule_name_to_smiles,
 13 |     smiles_to_atomsdata,
 14 | )
 15 | from chemgraph.tools.ase_tools import (
 16 |     run_ase,
 17 |     file_to_atomsdata,
 18 |     save_atomsdata_to_file,
 19 | )
 20 | 
 21 | 
 22 | def evaluate_model(
 23 |     model_name: str,
 24 |     input_file: str = "ground_truth_sample.json",
 25 | ):
 26 |     """
 27 |     Evaluate the tool-calling behavior of an LLM given a list of queries.
 28 | 
 29 |     Parameters
 30 |     ----------
 31 |     model_name : str
 32 |         Name of the LLM model to use in ChemGraph.
 33 |     input_file : str
 34 |         Path to the ground truth sample JSON file.
 35 |     """
 36 |     with open(input_file, "r", encoding="utf-8") as f:
 37 |         list_of_queries = json.load(f)
 38 | 
 39 |     workflow_type = "mock_agent"
 40 |     cg = ChemGraph(
 41 |         model_name=model_name,
 42 |         workflow_type=workflow_type,
 43 |         structured_output=True,
 44 |         return_option="state",
 45 |     )
 46 | 
 47 |     llm_tool_calls = []
 48 |     for idx, item in enumerate(list_of_queries):
 49 |         query = item["query"]
 50 |         state = cg.run(query, {"configurable": {"thread_id": str(idx)}})
 51 |         llm_tool_call = get_workflow_from_state(state)
 52 |         llm_tool_calls.append(llm_tool_call)
 53 | 
 54 |     # Save tool call results
 55 |     timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
 56 |     output_file = f"{model_name}_{timestamp}_tool_call.json"
 57 |     with open(output_file, "w", encoding="utf-8") as wf:
 58 |         json.dump(llm_tool_calls, wf, indent=4)
 59 |     print(f"Saved tool calls to {output_file}")
 60 | 
 61 |     # Evaluation
 62 |     toolsets = [
 63 |         molecule_name_to_smiles,
 64 |         run_ase,
 65 |         smiles_to_atomsdata,
 66 |         file_to_atomsdata,
 67 |         save_atomsdata_to_file,
 68 |     ]
 69 | 
 70 |     func_descriptions = [convert_to_openai_function(tool) for tool in toolsets]
 71 |     accurate_tool_call = 0
 72 |     eval_details = {}
 73 |     for idx, toolcall in enumerate(llm_tool_calls):
 74 |         model_outputs = llm_tool_calls[idx].get("tool_calls", {})
 75 |         answers = list_of_queries[idx].get("answer", {}).get("tool_calls", {})
 76 |         eval_result = multi_function_checker_with_order(
 77 |             func_descriptions=func_descriptions,
 78 |             model_outputs=model_outputs,
 79 |             answers=answers,
 80 |         )
 81 |         if eval_result["acc_n_toolcalls"] == eval_result["n_toolcalls"]:
 82 |             accurate_tool_call += 1
 83 |         eval_details[list_of_queries[idx]["query"]] = eval_result
 84 |         print(eval_result)
 85 |     accuracy = accurate_tool_call / len(llm_tool_calls) * 100
 86 | 
 87 |     print(f"Accuracy of {model_name}: {accuracy}% ({accurate_tool_call}/10 accurate tool calls)")
 88 | 
 89 |     output_eval_file = f"{model_name}_{timestamp}_eval.txt"
 90 | 
 91 |     # Cannot do json.dump() due to DeepDiff output not serializable.
 92 |     with open(output_eval_file, "w", encoding="utf-8") as wf:
 93 |         pprint.pprint(eval_details, stream=wf, width=120)
 94 |     print(f"Saved evaluation results to {output_eval_file}")
 95 | 
 96 |     return accuracy
 97 | 
 98 | 
 99 | def main():
100 |     parser = argparse.ArgumentParser(
101 |         description="Evaluate ChemGraph tool-calling performance for different LLMs."
102 |     )
103 |     parser.add_argument(
104 |         "--model_name",
105 |         type=str,
106 |         required=True,
107 |         help="Name of the LLM model to evaluate (e.g., gpt-4o, claude-3.5-haiku)",
108 |     )
109 |     parser.add_argument(
110 |         "--input_file",
111 |         type=str,
112 |         default="ground_truth_sample.json",
113 |         help="Path to input JSON file of queries",
114 |     )
115 | 
116 |     args = parser.parse_args()
117 |     evaluate_model(args.model_name, args.input_file)
118 | 
119 | 
120 | if __name__ == "__main__":
121 |     main()
122 | 


--------------------------------------------------------------------------------
/scripts/evaluations/run_llm_workflow/Exp8_from_smiles_to_opt/run_llm_workflow.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from chemgraph.agent.llm_agent import ChemGraph
  3 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state
  4 | import argparse
  5 | import datetime
  6 | 
  7 | 
  8 | def get_query(
  9 |     smiles: str,
 10 |     query_name: str = "smiles_to_coord",  # options: atomsdata, opt, vib
 11 |     method: str = "mace_mp",
 12 | ) -> str:
 13 |     """Get query for a SMILES-related task for CompChemAgent
 14 | 
 15 |     Args:
 16 |         smiles (str): SMILES string.
 17 |         query_name (str, optional): Type of query. Defaults to "atomsdata". Options: "atomsdata", "opt", "vib", "opt_method" and "vib_method".
 18 |         method (str, optional): The method/level of theory for CompChemAgent to run simulation. Defaults to "mace_mp".
 19 | 
 20 |     Returns:
 21 |         str: formatted query.
 22 |     """
 23 |     query_dict = {
 24 |         "smiles_to_coord": f"Provide the XYZ coordinates corresponding to this SMILES string: {smiles}",
 25 |         "smiles_to_opt": f"Perform geometry optimization for this SMILES string {smiles} using NWChem, B3LYP and sto-3g",
 26 |         "smiles_to_vib": f"Run vibrational frequency calculation for this SMILES string {smiles} using {method}",
 27 |         "smiles_to_enthalpy": f"Calculate the enthalpy of this SMILES string {smiles} using {method}",
 28 |         "smiles_to_gibbs": f"Calculate the Gibbs free energy of this SMILES string {smiles} using {method} at T=400K",
 29 |         "smiles_to_opt_file": f"Perform geometry optimization for this SMILES string {smiles} using {method}. Save the optimized coordinate in an XYZ file.",
 30 |     }
 31 | 
 32 |     return query_dict.get(query_name, "Query not found")  # Returns the query or a default message
 33 | 
 34 | 
 35 | def main(fname: str, n_structures: int):
 36 |     """
 37 |     Run an LLM geometry optimization workflow on a subset of molecules
 38 |     from the input SMILES dataset.
 39 | 
 40 |     Args:
 41 |         fname (str): Path to the JSON file containing SMILES data.
 42 |         n_structures (int): Number of molecules to process from the dataset.
 43 |     """
 44 |     # Load SMILES data from the specified JSON file
 45 |     with open(fname, "r") as f:
 46 |         smiles_data = json.load(f)
 47 | 
 48 |     combined_data = {}
 49 | 
 50 |     cca = ChemGraph(
 51 |         model_name='gpt-4o-mini',
 52 |         workflow_type="single_agent",
 53 |         structured_output=True,
 54 |         return_option="state",
 55 |     )
 56 | 
 57 |     # Iterate through the first n_structures molecules
 58 |     for idx, molecule in enumerate(smiles_data[:n_structures]):
 59 |         print("********************************************")
 60 |         print(
 61 |             f"MOLECULE SMILES: {molecule['smiles']} MOLECULE NAME: {molecule['name']}"
 62 |         )
 63 |         print("********************************************")
 64 | 
 65 |         smiles = molecule["smiles"]
 66 | 
 67 |         query = get_query(smiles, query_name="smiles_to_opt")
 68 |         state = cca.run(query, config={"configurable": {"thread_id": f"{str(idx)}"}})
 69 | 
 70 |         llm_workflow = get_workflow_from_state(state)
 71 | 
 72 |         # Store results in a structured dictionary
 73 |         state_data = cca.write_state(config={"configurable": {"thread_id": f"{str(idx)}"}})
 74 | 
 75 |         combined_data[smiles] = {"llm_workflow": llm_workflow}
 76 |         combined_data[smiles]["metadata"] = state_data
 77 | 
 78 |     # Save the results to a JSON file
 79 |     timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
 80 |     filename = f"llm_workflow_{timestamp}.json"
 81 | 
 82 |     # Save the results to a JSON file
 83 |     with open(filename, "w") as f:
 84 |         json.dump(combined_data, f, indent=4)
 85 | 
 86 | 
 87 | if __name__ == "__main__":
 88 |     # Parse command-line arguments
 89 |     parser = argparse.ArgumentParser(description="Run geometry optimization on SMILES molecules.")
 90 |     parser.add_argument(
 91 |         "--fname",
 92 |         type=str,
 93 |         default="data_from_pubchempy.json",
 94 |         help="Path to the input SMILES JSON file (e.g., smiles_data.json)",
 95 |     )
 96 |     parser.add_argument(
 97 |         "--n_structures", type=int, default=30, help="Number of molecules to process (default: 30)"
 98 |     )
 99 |     args = parser.parse_args()
100 | 
101 |     # Call the main function with parsed arguments
102 |     main(args.fname, args.n_structures)
103 | 


--------------------------------------------------------------------------------
/scripts/evaluations/run_llm_workflow/Exp1_from_name_to_smiles/run_llm_workflow.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from chemgraph.agent.llm_agent import ChemGraph
  3 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state
  4 | import argparse
  5 | from datetime import datetime
  6 | 
  7 | 
  8 | def get_query(
  9 |     name: str,
 10 |     query_name: str = "atomsdata",  # options: atomsdata, opt, vib
 11 |     method: str = "mace_mp",
 12 | ) -> str:
 13 |     """Get query for a SMILES-related task for CompChemAgent
 14 | 
 15 |     Args:
 16 |         name (str): molecule name.
 17 |         query_name (str, optional): Type of query. Defaults to "atomsdata". Options: "atomsdata", "opt", "vib", "opt_method" and "vib_method".
 18 |         method (str, optional): The method/level of theory for CompChemAgent to run simulation. Defaults to "mace_mp".
 19 | 
 20 |     Returns:
 21 |         str: formatted query.
 22 |     """
 23 |     query_dict = {
 24 |         "name_to_smiles": f"Provide the SMILES string corresponding to this molecule: {name}",
 25 |         "name_to_coord": f"Provide the XYZ coordinates corresponding to this molecule: {name}",
 26 |         "name_to_opt": f"Perform geometry optimization for a molecule {name} using {method}",
 27 |         "name_to_vib": f"Run vibrational frequency calculation for a molecule {name} using {method}",
 28 |         "name_to_enthalpy": f"Calculate the enthalpy of a molecule {name} using {method}",
 29 |         "name_to_gibbs": f"Calculate the Gibbs free energy of a molecule {name} using {method} potential at a temperature of 400K",
 30 |         "name_to_opt_file": f"Perform geometry optimization for a molecule {name} using {method}. Save the optimized coordinate in an XYZ file.",
 31 |     }
 32 | 
 33 |     return query_dict.get(query_name, "Query not found")  # Returns the query or a default message
 34 | 
 35 | 
 36 | def main(fname: str, n_structures: int):
 37 |     """
 38 |     Run an LLM geometry optimization workflow on a subset of molecules
 39 |     from the input SMILES dataset.
 40 | 
 41 |     Args:
 42 |         fname (str): Path to the JSON file containing SMILES data.
 43 |         n_structures (int): Number of molecules to process from the dataset.
 44 |     """
 45 |     # Load SMILES data from the specified JSON file
 46 |     with open(fname, "r") as f:
 47 |         smiles_data = json.load(f)
 48 | 
 49 |     combined_data = {}
 50 | 
 51 |     cca = ChemGraph(
 52 |         model_name='gpt-4o-mini',
 53 |         workflow_type="single_agent",
 54 |         structured_output=True,
 55 |         return_option="state",
 56 |     )
 57 | 
 58 |     # Iterate through the first n_structures molecules
 59 |     for idx, molecule in enumerate(smiles_data[:n_structures]):
 60 |         print("********************************************")
 61 |         print(
 62 |             f"MOLECULE SMILES: {molecule['smiles']} MOLECULE NAME: {molecule['name']}"
 63 |         )
 64 |         print("********************************************")
 65 | 
 66 |         name = molecule["name"]
 67 | 
 68 |         query = get_query(name, query_name="name_to_smiles", method="mace_mp")
 69 |         state = cca.run(query, config={"configurable": {"thread_id": f"{str(idx)}"}})
 70 | 
 71 |         llm_workflow = get_workflow_from_state(state)
 72 | 
 73 |         # Store results in a structured dictionary
 74 |         state_data = cca.write_state(config={"configurable": {"thread_id": f"{str(idx)}"}})
 75 | 
 76 |         combined_data[name] = {"llm_workflow": llm_workflow}
 77 |         combined_data[name]["metadata"] = state_data
 78 | 
 79 |     timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
 80 |     filename = f"llm_workflow_{timestamp}.json"
 81 | 
 82 |     # Save the results to a JSON file
 83 |     with open(filename, "w") as f:
 84 |         json.dump(combined_data, f, indent=4)
 85 | 
 86 | 
 87 | if __name__ == "__main__":
 88 |     # Parse command-line arguments
 89 |     parser = argparse.ArgumentParser(description="Convert a molecule name to atomic coordinates.")
 90 |     parser.add_argument(
 91 |         "--fname",
 92 |         type=str,
 93 |         default="data_from_pubchempy.json",
 94 |         help="Path to the input SMILES JSON file (e.g., smiles_data.json)",
 95 |     )
 96 |     parser.add_argument(
 97 |         "--n_structures", type=int, default=30, help="Number of molecules to process (default: 30)"
 98 |     )
 99 |     args = parser.parse_args()
100 | 
101 |     # Call the main function with parsed arguments
102 |     main(args.fname, args.n_structures)
103 | 


--------------------------------------------------------------------------------
/scripts/evaluations/run_llm_workflow/Exp10_from_smiles_to_gibbs/run_llm_workflow.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from chemgraph.agent.llm_agent import ChemGraph
  3 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state
  4 | import argparse
  5 | import datetime
  6 | 
  7 | 
  8 | def get_query(
  9 |     smiles: str,
 10 |     query_name: str = "smiles_to_coord",  # options: atomsdata, opt, vib
 11 |     method: str = "mace_mp",
 12 | ) -> str:
 13 |     """Get query for a SMILES-related task for CompChemAgent
 14 | 
 15 |     Args:
 16 |         smiles (str): SMILES string.
 17 |         query_name (str, optional): Type of query. Defaults to "atomsdata". Options: "atomsdata", "opt", "vib", "opt_method" and "vib_method".
 18 |         method (str, optional): The method/level of theory for CompChemAgent to run simulation. Defaults to "mace_mp".
 19 | 
 20 |     Returns:
 21 |         str: formatted query.
 22 |     """
 23 |     query_dict = {
 24 |         "smiles_to_coord": f"Provide the XYZ coordinates corresponding to this SMILES string: {smiles}",
 25 |         "smiles_to_opt": f"Perform geometry optimization for this SMILES string {smiles} using {method}",
 26 |         "smiles_to_vib": f"Run vibrational frequency calculation for this SMILES string {smiles} using {method}",
 27 |         "smiles_to_enthalpy": f"Calculate the enthalpy of this SMILES string {smiles} using {method}",
 28 |         "smiles_to_gibbs": f"Calculate the Gibbs free energy of this SMILES string {smiles} using {method} at T=800K",
 29 |         "smiles_to_opt_file": f"Perform geometry optimization for this SMILES string {smiles} using {method}. Save the optimized coordinate in an XYZ file.",
 30 |     }
 31 | 
 32 |     return query_dict.get(query_name, "Query not found")  # Returns the query or a default message
 33 | 
 34 | 
 35 | def main(fname: str, n_structures: int):
 36 |     """
 37 |     Run an LLM geometry optimization workflow on a subset of molecules
 38 |     from the input SMILES dataset.
 39 | 
 40 |     Args:
 41 |         fname (str): Path to the JSON file containing SMILES data.
 42 |         n_structures (int): Number of molecules to process from the dataset.
 43 |     """
 44 |     # Load SMILES data from the specified JSON file
 45 |     with open(fname, "r") as f:
 46 |         smiles_data = json.load(f)
 47 | 
 48 |     combined_data = {}
 49 | 
 50 |     cca = ChemGraph(
 51 |         model_name='gpt-4o-mini',
 52 |         workflow_type="single_agent",
 53 |         structured_output=True,
 54 |         return_option="state",
 55 |     )
 56 | 
 57 |     # Iterate through the first n_structures molecules
 58 |     for idx, molecule in enumerate(smiles_data[:n_structures]):
 59 |         print("********************************************")
 60 |         print(
 61 |             f"MOLECULE SMILES: {molecule['smiles']} MOLECULE NAME: {molecule['name']}"
 62 |         )
 63 |         print("********************************************")
 64 | 
 65 |         smiles = molecule["smiles"]
 66 | 
 67 |         query = get_query(smiles, query_name="smiles_to_gibbs", method="mace_mp")
 68 |         state = cca.run(query, config={"configurable": {"thread_id": f"{str(idx)}"}})
 69 | 
 70 |         llm_workflow = get_workflow_from_state(state)
 71 | 
 72 |         # Store results in a structured dictionary
 73 |         state_data = cca.write_state(config={"configurable": {"thread_id": f"{str(idx)}"}})
 74 | 
 75 |         combined_data[smiles] = {"llm_workflow": llm_workflow}
 76 |         combined_data[smiles]["metadata"] = state_data
 77 | 
 78 |     # Save the results to a JSON file
 79 |     timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
 80 |     filename = f"llm_workflow_{timestamp}.json"
 81 | 
 82 |     # Save the results to a JSON file
 83 |     with open(filename, "w") as f:
 84 |         json.dump(combined_data, f, indent=4)
 85 | 
 86 | 
 87 | if __name__ == "__main__":
 88 |     # Parse command-line arguments
 89 |     parser = argparse.ArgumentParser(description="Run vibrational frequency on SMILES molecules.")
 90 |     parser.add_argument(
 91 |         "--fname",
 92 |         type=str,
 93 |         default="data_from_pubchempy.json",
 94 |         help="Path to the input SMILES JSON file (e.g., smiles_data.json)",
 95 |     )
 96 |     parser.add_argument(
 97 |         "--n_structures", type=int, default=15, help="Number of molecules to process (default: 15)"
 98 |     )
 99 |     args = parser.parse_args()
100 | 
101 |     # Call the main function with parsed arguments
102 |     main(args.fname, args.n_structures)
103 | 


--------------------------------------------------------------------------------
/scripts/evaluations/run_llm_workflow/Exp7_from_smiles_to_coords/run_llm_workflow.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from chemgraph.agent.llm_agent import ChemGraph
  3 | from chemgraph.utils.get_workflow_from_llm import get_workflow_from_state
  4 | import argparse
  5 | import datetime
  6 | 
  7 | 
  8 | def get_query(
  9 |     smiles: str,
 10 |     query_name: str = "smiles_to_coord",  # options: atomsdata, opt, vib
 11 |     method: str = "mace_mp",
 12 | ) -> str:
 13 |     """Get query for a SMILES-related task for CompChemAgent
 14 | 
 15 |     Args:
 16 |         smiles (str): SMILES string.
 17 |         query_name (str, optional): Type of query. Defaults to "atomsdata". Options: "atomsdata", "opt", "vib", "opt_method" and "vib_method".
 18 |         method (str, optional): The method/level of theory for CompChemAgent to run simulation. Defaults to "mace_mp".
 19 | 
 20 |     Returns:
 21 |         str: formatted query.
 22 |     """
 23 |     query_dict = {
 24 |         "smiles_to_coord": f"Provide the XYZ coordinates corresponding to this SMILES string: {smiles}",
 25 |         "smiles_to_opt": f"Perform geometry optimization for this SMILES string {smiles} using {method}",
 26 |         "smiles_to_vib": f"Run vibrational frequency calculation for this SMILES string {smiles} using {method}",
 27 |         "smiles_to_enthalpy": f"Calculate the enthalpy of this SMILES string {smiles} using {method}",
 28 |         "smiles_to_gibbs": f"Calculate the Gibbs free energy of this SMILES string {smiles} using {method} at T=400K",
 29 |         "smiles_to_opt_file": f"Perform geometry optimization for this SMILES string {smiles} using {method}. Save the optimized coordinate in an XYZ file.",
 30 |     }
 31 | 
 32 |     return query_dict.get(query_name, "Query not found")  # Returns the query or a default message
 33 | 
 34 | 
 35 | def main(fname: str, n_structures: int):
 36 |     """
 37 |     Run an LLM geometry optimization workflow on a subset of molecules
 38 |     from the input SMILES dataset.
 39 | 
 40 |     Args:
 41 |         fname (str): Path to the JSON file containing SMILES data.
 42 |         n_structures (int): Number of molecules to process from the dataset.
 43 |     """
 44 |     # Load SMILES data from the specified JSON file
 45 |     with open(fname, "r") as f:
 46 |         smiles_data = json.load(f)
 47 | 
 48 |     combined_data = {}
 49 | 
 50 |     cca = ChemGraph(
 51 |         model_name='gpt-4o-mini',
 52 |         workflow_type="single_agent",
 53 |         structured_output=True,
 54 |         return_option="state",
 55 |     )
 56 | 
 57 |     # Iterate through the first n_structures molecules
 58 |     for idx, molecule in enumerate(smiles_data[:n_structures]):
 59 |         print("********************************************")
 60 |         print(
 61 |             f"MOLECULE SMILES: {molecule['smiles']} MOLECULE NAME: {molecule['name']}"
 62 |         )
 63 |         print("********************************************")
 64 | 
 65 |         smiles = molecule["smiles"]
 66 | 
 67 |         query = get_query(smiles, query_name="smiles_to_coord", method="mace_mp")
 68 |         state = cca.run(query, config={"configurable": {"thread_id": f"{str(idx)}"}})
 69 | 
 70 |         llm_workflow = get_workflow_from_state(state)
 71 | 
 72 |         # Store results in a structured dictionary
 73 |         state_data = cca.write_state(config={"configurable": {"thread_id": f"{str(idx)}"}})
 74 | 
 75 |         combined_data[smiles] = {"llm_workflow": llm_workflow}
 76 |         combined_data[smiles]["metadata"] = state_data
 77 | 
 78 |     # Save the results to a JSON file
 79 |     timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
 80 |     filename = f"llm_workflow_{timestamp}.json"
 81 | 
 82 |     # Save the results to a JSON file
 83 |     with open(filename, "w") as f:
 84 |         json.dump(combined_data, f, indent=4)
 85 | 
 86 | 
 87 | if __name__ == "__main__":
 88 |     # Parse command-line arguments
 89 |     parser = argparse.ArgumentParser(description="Run geometry optimization on SMILES molecules.")
 90 |     parser.add_argument(
 91 |         "--fname",
 92 |         type=str,
 93 |         default="data_from_pubchempy.json",
 94 |         help="Path to the input SMILES JSON file (e.g., smiles_data.json)",
 95 |     )
 96 |     parser.add_argument(
 97 |         "--n_structures", type=int, default=30, help="Number of molecules to process (default: 30)"
 98 |     )
 99 |     args = parser.parse_args()
100 | 
101 |     # Call the main function with parsed arguments
102 |     main(args.fname, args.n_structures)
103 | 


--------------------------------------------------------------------------------
/src/chemgraph/utils/get_workflow_from_llm.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from langchain.schema.messages import AIMessage
  3 | import logging
  4 | from chemgraph.utils.logging_config import setup_logger
  5 | 
  6 | logger = setup_logger(__name__)
  7 | 
  8 | 
  9 | def get_workflow_from_log(file_path: str) -> dict:
 10 |     """Convert a run_logs file to a workflow dictionary for evaluations.
 11 | 
 12 |     This function reads a JSON log file containing tool calls and their results,
 13 |     and converts it into a standardized workflow dictionary format.
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     file_path : str
 18 |         Path to the run logs file in JSON format
 19 | 
 20 |     Returns
 21 |     -------
 22 |     dict
 23 |         A dictionary containing:
 24 |         - tool_calls: List of tool call arguments
 25 |         - result: The final result or answer from the workflow
 26 | 
 27 |     Notes
 28 |     -----
 29 |     The function expects the log file to contain:
 30 |     - A 'state' list with tool calls and their arguments
 31 |     - A final message with either a JSON 'answer' field or direct content
 32 |     """
 33 |     with open(file_path, "r") as f:
 34 |         data = json.load(f)
 35 |     # Extract tool names and arguments
 36 |     workflow_dict = {"tool_calls": []}
 37 |     for state in data.get("state", []):
 38 |         tool_calls = state.get("tool_calls", [])
 39 |         for call in tool_calls:
 40 |             name = call.get("name")
 41 |             args = call.get("args")
 42 |             dat = {}
 43 |             dat[name] = args
 44 |             workflow_dict["tool_calls"].append(args)
 45 |     last_message = data.get("state", [])[-1]
 46 |     try:
 47 |         if "answer" in last_message["content"]:
 48 |             result_data = json.loads(last_message["content"])
 49 |             workflow_dict["result"] = result_data.get("answer")
 50 |     except Exception as e:
 51 |         result_data = last_message["content"]
 52 |         workflow_dict["result"] = result_data
 53 |         logging.debug(f"Exception thrown while parsing result: {e}")
 54 | 
 55 |     return workflow_dict
 56 | 
 57 | 
 58 | def get_workflow_from_state(state) -> dict:
 59 |     """Convert a state object to a workflow dictionary.
 60 | 
 61 |     This function processes a state object containing AIMessages with tool calls
 62 |     and converts it into a standardized workflow dictionary format.
 63 | 
 64 |     Parameters
 65 |     ----------
 66 |     state : list
 67 |         List of messages, including AIMessages containing tool calls
 68 | 
 69 |     Returns
 70 |     -------
 71 |     dict
 72 |         A dictionary containing:
 73 |         - tool_calls: List of dictionaries mapping tool names to their arguments
 74 |         - result: The final result or answer from the workflow
 75 | 
 76 |     Notes
 77 |     -----
 78 |     The function processes:
 79 |     - AIMessages containing tool calls
 80 |     - The final message's content, which may be:
 81 |       - A JSON string with an 'answer' field
 82 |       - A JSON string with direct content
 83 |       - A plain string
 84 |       - Any other content type
 85 |     """
 86 |     workflow_dict = {"tool_calls": []}
 87 | 
 88 |     def recurse(obj):
 89 |         if isinstance(obj, dict):
 90 |             # Extract tool_calls if it's an AI message
 91 |             if obj.get("type") == "ai":
 92 |                 tool_calls = obj.get("tool_calls", [])
 93 |                 for call in tool_calls:
 94 |                     name = call.get("name")
 95 |                     args = call.get("args", {})
 96 |                     workflow_dict["tool_calls"].append({name: args})
 97 |             # Recurse into all values
 98 |             for v in obj.values():
 99 |                 recurse(v)
100 |         elif isinstance(obj, list):
101 |             for item in obj:
102 |                 recurse(item)
103 | 
104 |     recurse(state)
105 | 
106 |     last_message = state["messages"][-1]
107 | 
108 |     content = last_message.get("content", {})
109 | 
110 |     if isinstance(content, str):
111 |         try:
112 |             content = json.loads(content)
113 |         except json.JSONDecodeError:
114 |             pass  # keep content as-is if it's not valid JSON
115 | 
116 |     # Extract result (just the value of the "answer" key if it exists)
117 |     if isinstance(content, dict) and "answer" in content:
118 |         workflow_dict["result"] = content["answer"]
119 |     else:
120 |         workflow_dict["result"] = content
121 | 
122 |     return workflow_dict
123 | 


--------------------------------------------------------------------------------