├── prompt
    ├── __init__.py
    ├── profile_extraction_prompt.md
    ├── question_gate_prompt.md
    ├── template_loader.py
    ├── query_enrichment_prompt.md
    ├── query_maker_prompt.md
    └── document_suitability_prompt.md
├── infra
    ├── __init__.py
    ├── monitoring
    │   ├── __init__.py
    │   ├── check_server.py
    │   └── README.md
    └── observability
    │   ├── README.md
    │   └── token_usage.py
├── engine
    ├── __init__.py
    └── query_executor.py
├── docker
    ├── pgvector
    │   └── init
    │   │   ├── 001_create_database.sql
    │   │   └── 002_create_user_and_grant.sql
    ├── postgres
    │   └── init
    │   │   ├── 001_create_database.sql
    │   │   └── 002_create_user_and_grant.sql
    ├── Dockerfile.dockerignore
    ├── docker-compose-postgres.yml
    ├── docker-compose-pgvector.yml
    ├── Dockerfile
    └── docker-compose.yml
├── .pre-commit-config.yaml
├── pgvector.sh
├── interface
    ├── app_pages
    │   ├── settings_sections
    │   │   ├── __init__.py
    │   │   └── README.md
    │   ├── sidebar_components
    │   │   ├── __init__.py
    │   │   ├── db_selector.py
    │   │   ├── chatbot_session_controller.py
    │   │   ├── llm_selector.py
    │   │   ├── embedding_selector.py
    │   │   └── data_source_selector.py
    │   ├── home.py
    │   ├── settings.py
    │   ├── chatbot.py
    │   └── lang2sql.py
    ├── pages_config.py
    ├── streamlit_app.py
    └── core
    │   ├── session_utils.py
    │   ├── lang2sql_runner.py
    │   ├── config
    │       ├── paths.py
    │       ├── models.py
    │       ├── __init__.py
    │       ├── registry_db.py
    │       ├── registry_llm.py
    │       └── registry_data_sources.py
    │   └── dialects.py
├── utils
    ├── llm
    │   ├── vectordb
    │   │   ├── __init__.py
    │   │   ├── faiss_db.py
    │   │   ├── factory.py
    │   │   └── pgvector_db.py
    │   ├── tools
    │   │   └── __init__.py
    │   ├── output_schema
    │   │   ├── question_suitability.py
    │   │   ├── document_suitability.py
    │   │   └── README.md
    │   ├── graph_utils
    │   │   ├── profile_utils.py
    │   │   ├── __init__.py
    │   │   ├── basic_graph.py
    │   │   ├── enriched_graph.py
    │   │   └── README.md
    │   ├── core
    │   │   └── __init__.py
    │   ├── llm_response_parser.py
    │   ├── retrieval.py
    │   └── chains.py
    ├── databases
    │   ├── __init__.py
    │   ├── logger.py
    │   ├── config.py
    │   ├── connector
    │   │   ├── base_connector.py
    │   │   ├── duckdb_connector.py
    │   │   ├── clickhouse_connector.py
    │   │   ├── sqlite_connector.py
    │   │   ├── mysql_connector.py
    │   │   ├── postgres_connector.py
    │   │   ├── oracle_connector.py
    │   │   ├── mariadb_connector.py
    │   │   ├── databricks_connector.py
    │   │   ├── snowflake_connector.py
    │   │   └── trino_connector.py
    │   └── factory.py
    ├── data
    │   ├── datahub_services
    │   │   ├── __init__.py
    │   │   ├── base_client.py
    │   │   └── query_service.py
    │   └── datahub_source.py
    └── visualization
    │   └── README.md
├── .gitignore
├── version.py
├── cli
    ├── utils
    │   ├── logger.py
    │   ├── env_loader.py
    │   └── README.md
    ├── core
    │   ├── environment.py
    │   ├── streamlit_runner.py
    │   └── README.md
    ├── commands
    │   ├── run_streamlit.py
    │   └── quary.py
    └── __init__.py
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── documentation.md
    │   ├── feature_request.md
    │   ├── performance.md
    │   └── bug_report.md
    ├── workflows
    │   ├── pre-commit.yml
    │   ├── pypi-release.yml
    │   └── pr-notification.yml
    └── PULL_REQUEST_TEMPLATE.md
├── docs
    ├── branch_guidelines.md
    └── pull_request_guidelines.md
├── dev
    ├── create_faiss.py
    └── create_pgvector.py
├── pyproject.toml
├── test
    └── test_llm_utils
    │   └── test_llm_response_parser.py
└── .env.example


/prompt/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/infra/__init__.py:
--------------------------------------------------------------------------------
1 | """인프라 계층 패키지 (DB, 모니터링 등)"""
2 | 


--------------------------------------------------------------------------------
/infra/monitoring/__init__.py:
--------------------------------------------------------------------------------
1 | """모니터링/헬스체크 패키지"""
2 | 


--------------------------------------------------------------------------------
/engine/__init__.py:
--------------------------------------------------------------------------------
1 | """Lang2SQL Data Processing 진입점 패키지"""
2 | 


--------------------------------------------------------------------------------
/docker/pgvector/init/001_create_database.sql:
--------------------------------------------------------------------------------
1 | CREATE DATABASE lang2sql;
2 | CREATE DATABASE test;
3 | 


--------------------------------------------------------------------------------
/docker/postgres/init/001_create_database.sql:
--------------------------------------------------------------------------------
1 | CREATE DATABASE lang2sql;
2 | CREATE DATABASE test;
3 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 |   - repo: https://github.com/psf/black
3 |     rev: 25.1.0
4 |     hooks:
5 |       - id: black
6 | 


--------------------------------------------------------------------------------
/pgvector.sh:
--------------------------------------------------------------------------------
1 | docker run -d \
2 |   --name pgvector \
3 |   -e POSTGRES_PASSWORD=postgres \
4 |   -p 5431:5432 \
5 |   pgvector/pgvector:pg17  


--------------------------------------------------------------------------------
/docker/Dockerfile.dockerignore:
--------------------------------------------------------------------------------
 1 | .git
 2 | __pycache__/
 3 | *.pyc
 4 | *.pyo
 5 | *.pyd
 6 | *.db
 7 | *.log
 8 | venv/
 9 | .env
10 | docker/
11 | 


--------------------------------------------------------------------------------
/interface/app_pages/settings_sections/__init__.py:
--------------------------------------------------------------------------------
1 | # Namespace package for settings page sections
2 | 
3 | __all__ = [
4 |     "data_source_section",
5 |     "llm_section",
6 |     "db_section",
7 | ]
8 | 


--------------------------------------------------------------------------------
/utils/llm/vectordb/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | VectorDB 모듈 - FAISS와 pgvector를 지원하는 벡터 데이터베이스 추상화
3 | """
4 | 
5 | from utils.llm.vectordb.factory import get_vector_db
6 | 
7 | __all__ = ["get_vector_db"]
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | .env
 3 | __pycache__/
 4 | build/
 5 | lang2sql.egg-info/
 6 | dist/
 7 | .pypirc
 8 | .venv/
 9 | test_lhm/
10 | .cursorignore
11 | .vscode
12 | table_info_db
13 | ko_reranker_local
14 | *.csv
15 | 


--------------------------------------------------------------------------------
/docker/pgvector/init/002_create_user_and_grant.sql:
--------------------------------------------------------------------------------
1 | CREATE USER lang2sql WITH PASSWORD 'lang2sqlpassword';
2 | GRANT ALL PRIVILEGES ON DATABASE lang2sql TO lang2sql;
3 | 
4 | CREATE USER test WITH PASSWORD 'testpassword';
5 | GRANT ALL PRIVILEGES ON DATABASE test TO test;
6 | 


--------------------------------------------------------------------------------
/docker/postgres/init/002_create_user_and_grant.sql:
--------------------------------------------------------------------------------
1 | CREATE USER lang2sql WITH PASSWORD 'lang2sqlpassword';
2 | GRANT ALL PRIVILEGES ON DATABASE lang2sql TO lang2sql;
3 | 
4 | CREATE USER test WITH PASSWORD 'testpassword';
5 | GRANT ALL PRIVILEGES ON DATABASE test TO test;
6 | 


--------------------------------------------------------------------------------
/utils/databases/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 데이터베이스 유틸리티 패키지 초기화 모듈.
 3 | 
 4 | 이 모듈은 주요 구성 요소인 DatabaseFactory와 DBConfig를 외부로 노출하여
 5 | 데이터베이스 관련 기능을 손쉽게 사용할 수 있도록 합니다.
 6 | """
 7 | 
 8 | from utils.databases.config import DBConfig
 9 | from utils.databases.factory import DatabaseFactory
10 | 
11 | __all__ = [
12 |     "DatabaseFactory",
13 |     "DBConfig",
14 | ]
15 | 


--------------------------------------------------------------------------------
/utils/databases/logger.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 로깅 설정 모듈.
 3 | 
 4 | 이 모듈은 애플리케이션 전역에서 사용할 기본 로깅 설정을 정의하고,
 5 | 표준 로거 인스턴스(logger)를 제공합니다.
 6 | """
 7 | 
 8 | import logging
 9 | 
10 | logging.basicConfig(
11 |     level=logging.INFO,
12 |     format="%(asctime)s [%(levelname)s] %(message)s",
13 |     datefmt="%Y-%m-%d %H:%M:%S",
14 | )
15 | logger = logging.getLogger(__name__)
16 | 


--------------------------------------------------------------------------------
/version.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Lang2SQL 패키지의 버전 정보를 정의하는 모듈입니다.
 3 | 
 4 | 이 모듈은 패키지의 버전을 추적하고 관리하는 데 사용됩니다.
 5 | 
 6 | 패키지의 버전은 다음과 같은 형식을 따라야 합니다:
 7 | 
 8 | MAJOR.MINOR.PATCH
 9 | 
10 | 여기서:
11 | - MAJOR는 큰 변경이 있을 때 증가합니다.
12 | - MINOR는 새로운 기능이 추가되거나 중요한 변경이 있을 때 증가합니다.
13 | - PATCH는 버그 수정이 있을 때 증가합니다.
14 | 
15 | 예를 들어, 버전 0.1.0에서 0.1.1로 업그레이드하면:
16 | - MAJOR는 변경되지 않습니다.
17 | - MINOR는 변경되지 않습니다.
18 | - PATCH는 1로 증가합니다.
19 | """
20 | 
21 | __version__ = "0.2.2"
22 | 


--------------------------------------------------------------------------------
/utils/llm/tools/__init__.py:
--------------------------------------------------------------------------------
 1 | from utils.llm.tools.datahub import (
 2 |     get_info_from_db,
 3 |     get_metadata_from_db,
 4 |     set_gms_server,
 5 | )
 6 | 
 7 | from utils.llm.tools.chatbot_tool import (
 8 |     search_database_tables,
 9 |     get_glossary_terms,
10 |     get_query_examples,
11 | )
12 | 
13 | __all__ = [
14 |     "set_gms_server",
15 |     "get_info_from_db",
16 |     "get_metadata_from_db",
17 |     "search_database_tables",
18 |     "get_glossary_terms",
19 |     "get_query_examples",
20 | ]
21 | 


--------------------------------------------------------------------------------
/cli/utils/logger.py:
--------------------------------------------------------------------------------
 1 | """CLI 전용 로깅 유틸리티 모듈."""
 2 | 
 3 | import logging
 4 | 
 5 | 
 6 | def configure_logging(level: int = logging.INFO) -> logging.Logger:
 7 |     """로깅을 설정하고 기본 로거를 반환합니다.
 8 | 
 9 |     Args:
10 |         level (int, optional): 로깅 레벨. 기본값은 logging.INFO.
11 | 
12 |     Returns:
13 |         logging.Logger: 설정된 로거 인스턴스.
14 |     """
15 |     logging.basicConfig(
16 |         level=level,
17 |         format="%(asctime)s [%(levelname)s] %(message)s",
18 |         datefmt="%Y-%m-%d %H:%M:%S",
19 |     )
20 |     return logging.getLogger("cli")
21 | 


--------------------------------------------------------------------------------
/prompt/profile_extraction_prompt.md:
--------------------------------------------------------------------------------
 1 | # Role
 2 | 
 3 | You are an assistant that analyzes a user question and extracts the following profiles as JSON:
 4 | - is_timeseries (boolean)
 5 | - is_aggregation (boolean)
 6 | - has_filter (boolean)
 7 | - is_grouped (boolean)
 8 | - has_ranking (boolean)
 9 | - has_temporal_comparison (boolean)
10 | - intent_type (one of: trend, lookup, comparison, distribution)
11 | 
12 | # Input
13 | 
14 | Question:
15 | {question}
16 | 
17 | # Output Example
18 | 
19 | The output must be a valid JSON matching the QuestionProfile schema.
20 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: 📚 Documentation Issue
 3 | about: 문서의 오타, 빠진 내용, 개선 요청 등
 4 | title: "[문서] 간단한 제목"
 5 | labels: documentation
 6 | ---
 7 | 
 8 | ### **문서 부분 (Which part of docs)**
 9 | - README, CLI 사용법, 튜토리얼, API 문서 등
10 | - 해당 문서의 경로 또는 파일
11 | 
12 | ### **문제 설명 (What is wrong or missing)**
13 | - 오타, 잘못된 설명, 링크 오류, 빠진 예제 등
14 | 
15 | ### **개선 제안 (Suggested Fix)**
16 | - 어떤 내용을 추가/수정하면 좋을지
17 | - 예시 문구, 스크린샷 등이 있으면 좋음
18 | 
19 | ### **추가 정보 (Additional Context)**
20 | - 문서를 읽는 사용자 입장에서 헷갈렸던 점
21 | - 다른 프로젝트 문서와 비교했을 때 좋은 포맷, 구조 등이 있다면
22 | 
23 | 


--------------------------------------------------------------------------------
/prompt/question_gate_prompt.md:
--------------------------------------------------------------------------------
 1 | 당신은 데이터 분석 도우미입니다. 아래 사용자 질문이 SQL로 답변 가능한지 판별하고, 구조화된 결과를 반환하세요.
 2 | 
 3 | 요건:
 4 | - reason: 한 줄 설명(어떤 보완이 필요한지 요약)
 5 | - missing_entities: 기간, 대상 엔터티, 측정값 등 누락된 핵심 요소 리스트(없으면 빈 리스트)
 6 | - requires_data_science: 통계/ML 분석이 필요한지 여부(Boolean)
 7 | 
 8 | 언어/출력 형식:
 9 | - 모든 텍스트 값은 한국어로 작성하세요. (reason는 한국어 문장, missing_entities 항목은 한국어 명사구)
10 | - Boolean 값은 JSON의 true/false로 표기하세요.
11 | 
12 | 주의:
13 | - 데이터 분석 맥락에서 SQL 집계/필터/조인으로 해결 가능한지 판단합니다.
14 | - 정책/운영/가이드/설치/권한/오류 해결 등은 SQL 부적합으로 간주합니다.
15 | 
16 | 입력: {question}
17 | 
18 | 출력은 반드시 지정된 스키마의 JSON으로만 반환하세요.
19 | 
20 | 


--------------------------------------------------------------------------------
/interface/app_pages/sidebar_components/__init__.py:
--------------------------------------------------------------------------------
 1 | from .data_source_selector import render_sidebar_data_source_selector
 2 | from .llm_selector import render_sidebar_llm_selector
 3 | from .embedding_selector import render_sidebar_embedding_selector
 4 | from .db_selector import render_sidebar_db_selector
 5 | from .chatbot_session_controller import render_sidebar_chatbot_session_controller
 6 | 
 7 | __all__ = [
 8 |     "render_sidebar_data_source_selector",
 9 |     "render_sidebar_llm_selector",
10 |     "render_sidebar_embedding_selector",
11 |     "render_sidebar_db_selector",
12 |     "render_sidebar_chatbot_session_controller",
13 | ]
14 | 


--------------------------------------------------------------------------------
/interface/app_pages/home.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 홈 페이지 모듈.
 3 | 
 4 | Lang2SQL 데이터 분석 도구의 소개와 사용 방법을 안내합니다.
 5 | """
 6 | 
 7 | import streamlit as st
 8 | 
 9 | st.title("🏠 홈")
10 | 
11 | st.markdown(
12 |     """
13 |     ### Lang2SQL 데이터 분석 도구에 오신 것을 환영합니다 🎉  
14 | 
15 |     이 도구는 자연어로 작성한 질문을 SQL 쿼리로 변환하고,  
16 |     데이터베이스를 조회하여 결과를 **표와 차트**로 시각화합니다.  
17 | 
18 |     ---
19 |     #### 사용 방법
20 |     1. 왼쪽 메뉴에서 원하는 기능 페이지를 선택하세요.
21 |     2. **🔍 Lang2SQL**: 자연어 → SQL 변환 및 결과 분석
22 |     3. **📊 그래프 빌더**: LangGraph 실행 순서를 프리셋/커스텀으로 구성하고 세션에 적용
23 |     4. **⚙️ 설정**: 데이터 소스, LLM, DB 연결 등 환경 설정
24 |     """
25 | )
26 | 
27 | st.info("왼쪽 메뉴에서 기능 페이지를 선택해 시작하세요 🚀")
28 | 


--------------------------------------------------------------------------------
/utils/llm/output_schema/question_suitability.py:
--------------------------------------------------------------------------------
 1 | """
 2 | QuestionSuitability 출력 모델.
 3 | 
 4 | LLM 구조화 출력으로부터 SQL 적합성 판단 결과를 표현하는 Pydantic 모델입니다.
 5 | """
 6 | 
 7 | from pydantic import BaseModel, Field
 8 | 
 9 | 
10 | class QuestionSuitability(BaseModel):
11 |     """
12 |     SQL 생성 적합성 결과 모델.
13 | 
14 |     LLM 구조화 출력으로 직렬화 가능한 필드를 정의합니다.
15 |     """
16 | 
17 |     reason: str = Field(description="보완/설명 사유 요약")
18 |     missing_entities: list[str] = Field(
19 |         default_factory=list, description="질문에서 누락된 핵심 엔터티/기간 등"
20 |     )
21 |     requires_data_science: bool = Field(
22 |         default=False, description="SQL을 넘어 ML/통계 분석이 필요한지 여부"
23 |     )
24 | 


--------------------------------------------------------------------------------
/interface/pages_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Streamlit 애플리케이션 페이지 설정 모듈.
 3 | 
 4 | 각 페이지의 경로와 제목을 정의하여 내비게이션에 사용합니다.
 5 | 
 6 | Attributes:
 7 |     PAGES (list): Streamlit Page 객체 리스트.
 8 |         - 홈 페이지
 9 |         - Lang2SQL 페이지
10 |         - 그래프 빌더 페이지
11 |         - ChatBot 페이지
12 |         - 설정 페이지
13 | """
14 | 
15 | import streamlit as st
16 | 
17 | PAGES = [
18 |     st.Page("app_pages/home.py", title="🏠 홈"),
19 |     st.Page("app_pages/lang2sql.py", title="🔍 Lang2SQL"),
20 |     st.Page("app_pages/graph_builder.py", title="📊 그래프 빌더"),
21 |     st.Page("app_pages/chatbot.py", title="🤖 ChatBot"),
22 |     st.Page("app_pages/settings.py", title="⚙️ 설정"),
23 | ]
24 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: ✨ Feature Request
 3 | about: 새로운 기능 제안
 4 | title: "[기능] 간단한 제목"
 5 | labels: enhancement
 6 | ---
 7 | 
 8 | ### **기능 설명 (Description)**
 9 | - 어떤 기능을 원하시는지 상세히 설명해주세요.
10 | 
11 | ### **왜 필요한가요? (Motivation / Use Case)**
12 | - 이 기능이 현재 상황에서 어떻게 도움이 될지
13 | - 사용 시 예상 시나리오
14 | 
15 | ### **제안하는 해결 방법 (Proposed Implementation)**
16 | - 가능하다면 구체적인 아이디어, 알고리즘, 인터페이스, UI 흐름 등
17 | - 예: 자연어 → SQL 변환 시 특정 조건 지원, 새로운 벡터DB 유형 추가, CLI 출력 형식, etc.
18 | 
19 | ### **대안 (Alternatives)**
20 | - 다른 접근 방식이나 기존 부분 활용 가능성
21 | 
22 | ### **추가 고려사항 (Additional Context)**
23 | - 문서, 테스트, 성능, 호환성 등에 대한 우려사항
24 | - 사용자 영향, 마이그레이션 이슈 등이 있다면
25 | 


--------------------------------------------------------------------------------
/utils/llm/graph_utils/profile_utils.py:
--------------------------------------------------------------------------------
 1 | def profile_to_text(profile_obj) -> str:
 2 |     mapping = {
 3 |         "is_timeseries": "• 시계열 분석 필요",
 4 |         "is_aggregation": "• 집계 함수 필요",
 5 |         "has_filter": "• WHERE 조건 필요",
 6 |         "is_grouped": "• GROUP BY 필요",
 7 |         "has_ranking": "• 정렬/순위 필요",
 8 |         "has_temporal_comparison": "• 기간 비교 필요",
 9 |     }
10 |     bullets = [
11 |         text for field, text in mapping.items() if getattr(profile_obj, field, False)
12 |     ]
13 |     intent = getattr(profile_obj, "intent_type", None)
14 |     if intent:
15 |         bullets.append(f"• 의도 유형 → {intent}")
16 | 
17 |     return "\n".join(bullets)
18 | 


--------------------------------------------------------------------------------
/docker/docker-compose-postgres.yml:
--------------------------------------------------------------------------------
 1 | # docker compose -f docker-compose-postgres.yml up
 2 | # docker compose -f docker-compose-postgres.yml down
 3 | 
 4 | services:
 5 |   postgres:
 6 |     image: postgres:15
 7 |     hostname: postgres
 8 |     container_name: postgres
 9 |     restart: always
10 |     ports:
11 |       - "5432:5432"
12 |     environment:
13 |       POSTGRES_USER: postgres
14 |       POSTGRES_PASSWORD: postgres
15 |       POSTGRES_DB: postgres
16 |       TZ: Asia/Seoul
17 |       LANG: en_US.utf8
18 |     volumes:
19 |       - postgres_data:/var/lib/postgresql/data
20 |       - ./postgres/init:/docker-entrypoint-initdb.d
21 | 
22 | volumes:
23 |   postgres_data:
24 | 


--------------------------------------------------------------------------------
/docker/docker-compose-pgvector.yml:
--------------------------------------------------------------------------------
 1 | # docker compose -f docker-compose-pgvector.yml up
 2 | # docker compose -f docker-compose-pgvector.yml down
 3 | 
 4 | services:
 5 |   pgvector:
 6 |     image: pgvector/pgvector:pg17
 7 |     hostname: pgvector
 8 |     container_name: pgvector
 9 |     restart: always
10 |     ports:
11 |       - "5432:5432"
12 |     environment:
13 |       POSTGRES_USER: pgvector
14 |       POSTGRES_PASSWORD: pgvector
15 |       POSTGRES_DB: pgvector
16 |       TZ: Asia/Seoul
17 |       LANG: en_US.utf8
18 |     volumes:
19 |       - pgvector_data:/var/lib/postgresql/data
20 |       - ./pgvector/init:/docker-entrypoint-initdb.d
21 | 
22 | volumes:
23 |   pgvector_data:
24 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
 1 | name: Pre-commit Black Check
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 |     branches:
 9 |       - master
10 | 
11 | jobs:
12 |   pre-commit:
13 |     name: Run Pre-commit Hooks
14 |     runs-on: ubuntu-latest
15 | 
16 |     steps:
17 |       - name: Checkout repository
18 |         uses: actions/checkout@v4
19 | 
20 |       - name: Set up Python
21 |         uses: actions/setup-python@v4
22 |         with:
23 |           python-version: "3.x"
24 | 
25 |       - name: Install pre-commit
26 |         run: pip install pre-commit
27 | 
28 |       - name: Run pre-commit
29 |         run: pre-commit run --all-files --verbose
30 | 


--------------------------------------------------------------------------------
/utils/data/datahub_services/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | DataHub 유틸리티 패키지
 3 | 
 4 | DataHub와의 상호작용을 위한 모듈들을 제공합니다.
 5 | 
 6 | 주요 구성요소:
 7 | - DataHubBaseClient: 기본 연결 및 통신
 8 | - MetadataService: 메타데이터, 리니지, URN 관련 기능
 9 | - QueryService: 쿼리 관련 기능
10 | - GlossaryService: 용어집 관련 기능
11 | """
12 | 
13 | from utils.data.datahub_services.base_client import DataHubBaseClient
14 | from utils.data.datahub_services.glossary_service import GlossaryService
15 | from utils.data.datahub_services.metadata_service import MetadataService
16 | from utils.data.datahub_services.query_service import QueryService
17 | 
18 | __all__ = [
19 |     "DataHubBaseClient",
20 |     "MetadataService",
21 |     "QueryService",
22 |     "GlossaryService",
23 | ]
24 | 


--------------------------------------------------------------------------------
/cli/core/environment.py:
--------------------------------------------------------------------------------
 1 | """환경 변수 초기화 모듈 (VectorDB 설정은 UI에서 관리)."""
 2 | 
 3 | from typing import Optional
 4 | 
 5 | from cli.utils.env_loader import load_env, set_prompt_dir
 6 | 
 7 | 
 8 | def initialize_environment(
 9 |     *,
10 |     env_file_path: Optional[str],
11 |     prompt_dir_path: Optional[str],
12 | ) -> None:
13 |     """환경 변수를 초기화합니다. VectorDB 설정은 UI에서 관리합니다.
14 | 
15 |     Args:
16 |         env_file_path (Optional[str]): 로드할 .env 파일 경로. None이면 기본값 사용.
17 |         prompt_dir_path (Optional[str]): 프롬프트 템플릿 디렉토리 경로. None이면 설정하지 않음.
18 | 
19 |     Raises:
20 |         Exception: 초기화 과정에서 오류가 발생한 경우.
21 |     """
22 |     load_env(env_file_path=env_file_path)
23 |     set_prompt_dir(prompt_dir_path=prompt_dir_path)
24 | 


--------------------------------------------------------------------------------
/prompt/template_loader.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 이 모듈은 프롬프트 템플릿을 로드하는 기능을 제공합니다.
 3 | - 프롬프트 템플릿은 마크다운 파일로 관리되고 있으며, 환경변수에서 템플릿 디렉토리를 가져오거나, 없으면 현재 파일 위치 기준으로 설정합니다.
 4 | """
 5 | 
 6 | import os
 7 | 
 8 | 
 9 | def get_prompt_template(prompt_name: str) -> str:
10 |     # 환경변수에서 템플릿 디렉토리를 가져오거나, 없으면 현재 파일 위치 기준으로 설정
11 |     templates_dir = os.environ.get("PROMPT_TEMPLATES_DIR", os.path.dirname(__file__))
12 | 
13 |     try:
14 |         template_path = os.path.join(templates_dir, f"{prompt_name}.md")
15 |         with open(template_path, "r", encoding="utf-8") as f:
16 |             template = f.read()
17 |     except FileNotFoundError:
18 |         raise FileNotFoundError(f"경고: '{prompt_name}.md' 파일을 찾을 수 없습니다.")
19 | 
20 |     return template
21 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # 1. Base image
 2 | FROM python:3.12-slim-bullseye
 3 | 
 4 | # 2. 시스템 라이브러리 설치
 5 | RUN apt-get update && apt-get install -y \
 6 |     build-essential \
 7 |     curl \
 8 |     git \
 9 |     libpq-dev \
10 |     && rm -rf /var/lib/apt/lists/*
11 | 
12 | # 3. uv 설치
13 | RUN pip install --no-cache-dir uv
14 | 
15 | # 4. 작업 디렉토리 설정
16 | WORKDIR /app
17 | 
18 | # 5. 소스 코드 복사 및 의존성 설치
19 | COPY pyproject.toml ./
20 | COPY . .
21 | RUN uv pip install --system --upgrade pip setuptools wheel \
22 |     && uv pip install --system .
23 | 
24 | # 6. 환경 변수 설정
25 | ENV PYTHONPATH=/app
26 | ENV PYTHONUNBUFFERED=1
27 | 
28 | # 7. 포트 설정
29 | ENV STREAMLIT_SERVER_PORT=8501
30 | 
31 | # 8. 실행 명령
32 | CMD ["lang2sql", "run-streamlit"]
33 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## #️⃣ Issue Number
 2 | <!-- ex) #이슈번호, #이슈번호 -->
 3 | - TBD
 4 | 
 5 | ## 📝 요약(Summary)
 6 | <!-- 해당 PR에 대해서 간단히 설명해주세요(3줄 이내). (Why? How?) -->
 7 | <!-- (세부적인 내용은 Issue에 작성되었을 것이라고 가정합니다) -->
 8 | - TBD
 9 | 
10 | ## 💬  To Reviewers (선택)
11 | <!-- 리뷰어가 중점적으로 봐줬으면 좋겠는 부분이 있으면 작성해주세요. --> 
12 | <!-- ex) 특정 코드의 분기를 처리함에 있어서 적절한 방법일 지 확인 부탁드립니다. -->
13 | - TBD
14 | 
15 | ## PR Checklist
16 | <!-- [ ] 변경 사항에 대한 테스트 (버그 수정 or 기능에 대한 테스트) -->
17 | - TBD
18 | 
19 | 
20 | ## reference) How to Code Review
21 | - 따봉(👍): 리뷰어가 리뷰이의 코드에서 칭찬의 의견을 남기고 싶을 때 사용합니다.
22 | - 느낌표(❗): 리뷰어가 리뷰이에게 필수적으로 코드 수정을 요청할 때 사용합니다.
23 | - 물음표 (❓): 리뷰어가 리뷰이에게 의견을 물어보고 싶을 때 사용합니다.
24 | - 알약 (💊): 리뷰어가 리뷰이의 코드에서 개선된 방법을 제안하지만 그것의 반영이 필수까지는 아닐 때 사용합니다.


--------------------------------------------------------------------------------
/prompt/query_enrichment_prompt.md:
--------------------------------------------------------------------------------
 1 | # Role
 2 | 
 3 | You are a smart assistant that takes a user question and enriches it using:
 4 | 1. Question profiles: {profiles}
 5 | 2. Table metadata (names, columns, descriptions): 
 6 |    {related_tables}
 7 | 
 8 | # Tasks
 9 | 
10 | - Correct any wrong terms by matching them to actual column names.
11 | - If the question is time-series or aggregation, add explicit hints (e.g., "over the last 30 days").
12 | - If needed, map natural language terms to actual column values (e.g., ‘미국’ → ‘USA’ for country_code).
13 | - Output the enriched question only.
14 | 
15 | # Input
16 | 
17 | Refined question:
18 | {refined_question}
19 | 
20 | # Notes
21 | 
22 | Using the refined version for enrichment, but keep the original intent in mind.
23 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/performance.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: ⚡ Performance / Benchmark
 3 | about: 성능 문제 또는 벤치마크 관련 이슈
 4 | title: "[성능] 간단한 제목"
 5 | labels: performance
 6 | ---
 7 | 
 8 | ### **문제 요약 (Summary)**
 9 | - 성능이 기대 이하인 지점 (쿼리 처리 시간, 메모리 사용량, 벡터 DB 응답 속도 등)
10 | 
11 | ### **재현 조건 (Reproduction)**
12 | - 데이터 크기 (예: 몇 개의 테이블, 칼럼 수, row 수 등)
13 | - 입력 내용 (자연어, 모델 설정, 벡터 DB 종류 등)
14 | - 실행 환경 (하드웨어, 네트워크, 버전 등)
15 | 
16 | ### **측정값 (Metrics)**
17 | - 실제 측정된 성능 (시간, 메모리, CPU 등)
18 | - 기대 성능 또는 비교 대상
19 | 
20 | ### **가능한 원인 (Possible Causes)**
21 | - 어디서 병목이 생겼다고 생각하는지 (예: 벡터 검색, SQL 생성, 데이터 불러오기 등)
22 | 
23 | ### **제안된 해결 방안 (Suggested Fix)**
24 | - 예: 인덱스 추가, 쿼리 최적화, 병렬 처리, 캐시 활용 등
25 | 
26 | ### **추가 영향 및 고려사항 (Additional Context)**
27 | - 성능 개선이 다른 기능이나 안정성에 미칠 영향
28 | - 벡터 DB 특성, 데이터 스키마 복잡성, 사용자 환경 등
29 | 
30 | 


--------------------------------------------------------------------------------
/interface/app_pages/settings.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Settings 페이지 – 섹션 기반 UI
 3 | """
 4 | 
 5 | import streamlit as st
 6 | 
 7 | from interface.core.config import load_config
 8 | from interface.app_pages.settings_sections.data_source_section import (
 9 |     render_data_source_section,
10 | )
11 | from interface.app_pages.settings_sections.llm_section import render_llm_section
12 | from interface.app_pages.settings_sections.db_section import render_db_section
13 | 
14 | 
15 | st.title("⚙️ 설정")
16 | 
17 | config = load_config()
18 | 
19 | tabs = st.tabs(["데이터 소스", "LLM", "DB"])
20 | 
21 | with tabs[0]:
22 |     render_data_source_section(config)
23 | 
24 | with tabs[1]:
25 |     render_llm_section(config)
26 | 
27 | with tabs[2]:
28 |     render_db_section()
29 | 
30 | st.divider()
31 | st.caption("민감 정보는 로그에 기록되지 않으며, 이 설정은 현재 세션에 우선 반영됩니다.")
32 | 


--------------------------------------------------------------------------------
/cli/commands/run_streamlit.py:
--------------------------------------------------------------------------------
 1 | """Streamlit 실행 CLI 명령어 모듈."""
 2 | 
 3 | import click
 4 | 
 5 | from cli.core.streamlit_runner import run_streamlit_command
 6 | from cli.utils.logger import configure_logging
 7 | 
 8 | logger = configure_logging()
 9 | 
10 | 
11 | @click.command(name="run-streamlit")
12 | @click.option(
13 |     "-p",
14 |     "--port",
15 |     type=int,
16 |     default=8501,
17 |     help=(
18 |         "Streamlit 애플리케이션이 바인딩될 포트 번호를 지정합니다. "
19 |         "기본 포트는 8501이며, 필요 시 다른 포트를 설정할 수 있습니다."
20 |     ),
21 | )
22 | def run_streamlit_cli_command(port: int) -> None:
23 |     """CLI 명령어로 Streamlit 애플리케이션을 실행합니다.
24 | 
25 |     Args:
26 |         port (int): Streamlit 서버가 바인딩될 포트 번호. 기본값은 8501.
27 |     """
28 |     logger.info("Executing 'run-streamlit' command on port %d...", port)
29 |     run_streamlit_command(port)
30 | 


--------------------------------------------------------------------------------
/.github/workflows/pypi-release.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Python Package to PyPI (uv)
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - "v*"  # v로 시작하는 태그 (예: v1.0.0)
 7 | 
 8 | jobs:
 9 |   deploy:
10 |     name: Build and Publish
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |       - name: Checkout repository
15 |         uses: actions/checkout@v4
16 | 
17 |       - name: Set up Python
18 |         uses: actions/setup-python@v5
19 |         with:
20 |           python-version: "3.x"
21 | 
22 |       - name: Install uv
23 |         uses: astral-sh/setup-uv@v3
24 | 
25 |       - name: Build sdist/wheel with uv
26 |         run: |
27 |           uv build
28 | 
29 |       - name: Publish to PyPI with uv
30 |         env:
31 |           UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
32 |         run: |
33 |           uv publish --token $UV_PUBLISH_TOKEN
34 | 
35 | 


--------------------------------------------------------------------------------
/interface/streamlit_app.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Streamlit 애플리케이션 메인 실행 모듈.
 3 | 
 4 | Lang2SQL 데이터 분석 도구의 내비게이션을 초기화하고 실행합니다.
 5 | """
 6 | 
 7 | import streamlit as st
 8 | 
 9 | from interface.pages_config import PAGES
10 | 
11 | 
12 | def configure_app() -> None:
13 |     """앱 전역 설정 초기화.
14 | 
15 |     Streamlit 애플리케이션의 제목, 아이콘, 레이아웃, 사이드바 상태를 설정합니다.
16 | 
17 |     Returns:
18 |         None
19 |     """
20 |     st.set_page_config(
21 |         page_title="Lang2SQL 데이터 분석 도구",
22 |         page_icon="🔎",
23 |         layout="wide",
24 |         initial_sidebar_state="expanded",
25 |     )
26 | 
27 | 
28 | def main() -> None:
29 |     """애플리케이션 진입점.
30 | 
31 |     전역 설정을 초기화하고, 정의된 페이지 내비게이션을 실행합니다.
32 | 
33 |     Returns:
34 |         None
35 |     """
36 |     configure_app()
37 |     pg = st.navigation(PAGES)
38 |     pg.run()
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     main()
43 | 


--------------------------------------------------------------------------------
/docker/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   streamlit:
 3 |     hostname: streamlit
 4 |     container_name: streamlit
 5 |     build:
 6 |       context: ..
 7 |       dockerfile: docker/Dockerfile
 8 |     ports:
 9 |       - "8501:8501"
10 |     volumes:
11 |       - ../:/app
12 |     env_file:
13 |       - ../.env
14 |     environment:
15 |       - STREAMLIT_SERVER_PORT=8501
16 |       - DATABASE_URL=postgresql://pgvector:pgvector@localhost:5432/streamlit
17 |     depends_on:
18 |       - pgvector
19 | 
20 |   pgvector:
21 |     image: pgvector/pgvector:pg17
22 |     hostname: pgvector
23 |     container_name: pgvector
24 |     environment:
25 |       POSTGRES_USER: pgvector
26 |       POSTGRES_PASSWORD: pgvector
27 |       POSTGRES_DB: streamlit
28 |     ports:
29 |       - "5432:5432"
30 |     volumes:
31 |       - pgdata:/var/lib/postgresql/data
32 | 
33 | volumes:
34 |   pgdata:
35 | 


--------------------------------------------------------------------------------
/utils/databases/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 데이터베이스 설정 정보를 정의하는 모듈.
 3 | 
 4 | 이 모듈은 데이터베이스 연결에 필요한 기본 설정값과
 5 | 추가 옵션(extra)을 포함한 타입 힌트를 제공합니다.
 6 | """
 7 | 
 8 | from typing import Dict, Optional, TypedDict
 9 | 
10 | 
11 | class DBConfig(TypedDict):
12 |     """
13 |     데이터베이스 연결 설정 정보를 표현하는 타입 딕셔너리.
14 | 
15 |     데이터베이스 커넥터가 공통적으로 사용하는 설정 필드를 정의합니다.
16 |     일부 필드는 선택적으로 제공될 수 있습니다.
17 | 
18 |     Attributes:
19 |         host (str): 데이터베이스 호스트명 또는 IP 주소.
20 |         port (Optional[int]): 데이터베이스 포트 번호.
21 |         user (Optional[str]): 접속 사용자명.
22 |         password (Optional[str]): 접속 비밀번호.
23 |         database (Optional[str]): 대상 데이터베이스 이름.
24 |         extra (Optional[Dict[str, str]]): 드라이버별 추가 설정값.
25 |     """
26 | 
27 |     host: str
28 |     port: Optional[int]
29 |     user: Optional[str]
30 |     password: Optional[str]
31 |     database: Optional[str]
32 |     extra: Optional[Dict[str, str]]
33 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: 🐞 Bug Report
 3 | about: Lang2SQL 사용 중 버그가 있을 때 알려주세요
 4 | title: "[버그] [모듈명 또는 기능] 간략한 설명"
 5 | labels: bug
 6 | ---
 7 | 
 8 | ### **버그 요약 (Summary)**
 9 | - 무엇이 잘못되었는지 간단히 요약해주세요.
10 | 
11 | ### **재현 단계 (Steps to Reproduce)**
12 | 1. 어떤 환경에서 (예: 운영체제, Python 버전, Lang2SQL 버전 등)
13 | 2. 구체적으로 어떤 입력을 했는지 (자연어 쿼리, 설정, 벡터 DB 종류, DataHub 유무 등)
14 | 3. 기대 결과는 무엇이었는지
15 | 4. 실제 결과는 무엇인지 (에러 메시지, 로그, SQL 결과, 속도 문제 등)
16 | 
17 | ### **환경 (Environment)**
18 | - Lang2SQL 버전:
19 | - Python 버전:
20 | - OS (예: Ubuntu 22.04, macOS 13):
21 | - VectorDB 종류 및 설정 (FAISS / pgvector 등):
22 | - DataHub 설정 유무:
23 | - 기타 관련된 설정 (예: `.env`, 구성 파일 등)
24 | 
25 | ### **스크린샷 / 로그 (Screenshots & Logs)**
26 | 필요하다면 에러 로그, 콘솔 출력, 스택 트레이스, 화면 캡처 등을 첨부해주세요.
27 | 
28 | ### **추가 정보 (Additional Context)**
29 | - 이전에 유사한 오류가 있었는지?
30 | - 회피 방법(임시 해결책)이 있는 경우
31 | - 이 버그가 프로젝트 전체에 어떤 영향을 미치는지
32 | 


--------------------------------------------------------------------------------
/utils/llm/core/__init__.py:
--------------------------------------------------------------------------------
 1 | from utils.llm.core.factory import (
 2 |     get_embeddings,
 3 |     get_embeddings_azure,
 4 |     get_embeddings_bedrock,
 5 |     get_embeddings_gemini,
 6 |     get_embeddings_huggingface,
 7 |     get_embeddings_ollama,
 8 |     get_embeddings_openai,
 9 |     get_llm,
10 |     get_llm_azure,
11 |     get_llm_bedrock,
12 |     get_llm_gemini,
13 |     get_llm_huggingface,
14 |     get_llm_ollama,
15 |     get_llm_openai,
16 | )
17 | 
18 | __all__ = [
19 |     "get_llm",
20 |     "get_llm_openai",
21 |     "get_llm_azure",
22 |     "get_llm_bedrock",
23 |     "get_llm_gemini",
24 |     "get_llm_ollama",
25 |     "get_llm_huggingface",
26 |     "get_embeddings",
27 |     "get_embeddings_openai",
28 |     "get_embeddings_azure",
29 |     "get_embeddings_bedrock",
30 |     "get_embeddings_gemini",
31 |     "get_embeddings_ollama",
32 |     "get_embeddings_huggingface",
33 | ]
34 | 


--------------------------------------------------------------------------------
/utils/llm/graph_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 그래프 관련 유틸리티 모듈입니다.
 3 | 
 4 | 이 패키지는 Lang2SQL의 워크플로우 그래프 구성과 관련된 모듈들을 포함합니다.
 5 | """
 6 | 
 7 | from utils.llm.graph_utils.base import (
 8 |     CONTEXT_ENRICHMENT,
 9 |     GET_TABLE_INFO,
10 |     PROFILE_EXTRACTION,
11 |     QUERY_MAKER,
12 |     QueryMakerState,
13 |     context_enrichment_node,
14 |     get_table_info_node,
15 |     profile_extraction_node,
16 |     query_maker_node,
17 | )
18 | 
19 | from .basic_graph import builder as basic_builder
20 | from .enriched_graph import builder as enriched_builder
21 | 
22 | __all__ = [
23 |     # 상태 및 노드 식별자
24 |     "QueryMakerState",
25 |     "GET_TABLE_INFO",
26 |     "QUERY_MAKER",
27 |     "PROFILE_EXTRACTION",
28 |     "CONTEXT_ENRICHMENT",
29 |     # 노드 함수들
30 |     "get_table_info_node",
31 |     "query_maker_node",
32 |     "profile_extraction_node",
33 |     "context_enrichment_node",
34 |     # 그래프 빌더들
35 |     "basic_builder",
36 |     "enriched_builder",
37 | ]
38 | 


--------------------------------------------------------------------------------
/utils/llm/vectordb/faiss_db.py:
--------------------------------------------------------------------------------
 1 | """
 2 | FAISS VectorDB 구현
 3 | """
 4 | 
 5 | import os
 6 | from typing import Optional
 7 | 
 8 | from langchain_community.vectorstores import FAISS
 9 | 
10 | from utils.llm.core import get_embeddings
11 | from utils.llm.tools import get_info_from_db
12 | 
13 | 
14 | def get_faiss_vector_db(vectordb_path: Optional[str] = None):
15 |     """FAISS 벡터 데이터베이스를 로드하거나 생성합니다."""
16 |     embeddings = get_embeddings()
17 | 
18 |     # 기본 경로 설정
19 |     if vectordb_path is None:
20 |         vectordb_path = os.path.join(os.getcwd(), "dev/table_info_db")
21 | 
22 |     try:
23 |         db = FAISS.load_local(
24 |             vectordb_path,
25 |             embeddings,
26 |             allow_dangerous_deserialization=True,
27 |         )
28 |     except:
29 |         documents = get_info_from_db()
30 |         db = FAISS.from_documents(documents, embeddings)
31 |         db.save_local(vectordb_path)
32 |         print(f"VectorDB를 새로 생성했습니다: {vectordb_path}")
33 |     return db
34 | 


--------------------------------------------------------------------------------
/cli/core/streamlit_runner.py:
--------------------------------------------------------------------------------
 1 | """Streamlit 실행 유틸리티 모듈."""
 2 | 
 3 | import subprocess
 4 | 
 5 | from cli.utils.logger import configure_logging
 6 | 
 7 | logger = configure_logging()
 8 | 
 9 | 
10 | def run_streamlit_command(port: int) -> None:
11 |     """지정된 포트에서 Streamlit 애플리케이션을 실행합니다.
12 | 
13 |     Args:
14 |         port (int): 바인딩할 포트 번호.
15 | 
16 |     Raises:
17 |         subprocess.CalledProcessError: 실행 실패 시 발생.
18 |     """
19 |     logger.info("Starting Streamlit application on port %d...", port)
20 | 
21 |     try:
22 |         subprocess.run(
23 |             [
24 |                 "streamlit",
25 |                 "run",
26 |                 "interface/streamlit_app.py",
27 |                 "--server.address=0.0.0.0",
28 |                 "--server.port",
29 |                 str(port),
30 |             ],
31 |             check=True,
32 |         )
33 |         logger.info("Streamlit application started successfully.")
34 |     except subprocess.CalledProcessError as e:
35 |         logger.error("Failed to start Streamlit application: %s", e)
36 |         raise
37 | 


--------------------------------------------------------------------------------
/utils/llm/output_schema/document_suitability.py:
--------------------------------------------------------------------------------
 1 | """
 2 | DocumentSuitability 출력 모델.
 3 | 
 4 | LLM 구조화 출력으로부터 테이블별 적합성 평가 결과를 표현하는 Pydantic 모델입니다.
 5 | 최상위는 테이블명(string) -> 평가 객체 매핑을 담는 Root 모델입니다.
 6 | """
 7 | 
 8 | from typing import List
 9 | 
10 | from pydantic import BaseModel, Field
11 | 
12 | 
13 | class DocumentSuitability(BaseModel):
14 |     """
15 |     단일 테이블에 대한 적합성 평가 결과.
16 |     """
17 | 
18 |     table_name: str = Field(description="테이블명")
19 |     score: float = Field(description="0.0~1.0 사이의 적합도 점수")
20 |     reason: str = Field(description="한국어 한두 문장 근거")
21 |     matched_columns: List[str] = Field(
22 |         default_factory=list, description="질문과 직접 연관된 컬럼명 목록"
23 |     )
24 |     missing_entities: List[str] = Field(
25 |         default_factory=list, description="부족한 엔티티/지표/기간 등"
26 |     )
27 | 
28 | 
29 | class DocumentSuitabilityList(BaseModel):
30 |     """
31 |     문서 적합성 평가 결과 리스트 래퍼.
32 | 
33 |     OpenAI Structured Outputs 호환을 위해 명시적 최상위 키(`results`)를 둡니다.
34 |     """
35 | 
36 |     results: List[DocumentSuitability] = Field(description="평가 결과 목록")
37 | 


--------------------------------------------------------------------------------
/interface/core/session_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Streamlit 세션 상태에서 그래프 빌더를 초기화하는 모듈.
 3 | 
 4 | 이 모듈은 Lang2SQL 애플리케이션의 그래프 실행 파이프라인을 준비하기 위해
 5 | 기본 또는 확장(enriched) 그래프 빌더를 선택적으로 로드하고,
 6 | 세션 상태에 초기화된 그래프 객체를 저장합니다.
 7 | 
 8 | Functions:
 9 |     init_graph(use_enriched: bool) -> str:
10 |         그래프 빌더를 초기화하고 세션 상태를 갱신합니다.
11 | """
12 | 
13 | import streamlit as st
14 | 
15 | 
16 | def init_graph(use_enriched: bool) -> str:
17 |     """그래프 빌더를 초기화하고 세션 상태를 갱신합니다.
18 | 
19 |     Args:
20 |         use_enriched (bool): 확장(enriched) 그래프 빌더를 사용할지 여부.
21 | 
22 |     Returns:
23 |         str: 초기화된 그래프 유형. "확장된" 또는 "기본".
24 |     """
25 | 
26 |     builder_module = (
27 |         "utils.llm.graph_utils.enriched_graph"
28 |         if use_enriched
29 |         else "utils.llm.graph_utils.basic_graph"
30 |     )
31 | 
32 |     builder = __import__(builder_module, fromlist=["builder"]).builder
33 | 
34 |     st.session_state.setdefault("graph", builder.compile())
35 |     st.session_state["graph"] = builder.compile()
36 |     st.session_state["use_enriched"] = use_enriched
37 | 
38 |     return "확장된" if use_enriched else "기본"
39 | 


--------------------------------------------------------------------------------
/prompt/query_maker_prompt.md:
--------------------------------------------------------------------------------
 1 | # Role
 2 | 
 3 | 당신은 데이터 분석 전문가(데이터 분석가 페르소나)입니다.
 4 | 사용자의 질문을 기반으로, 주어진 테이블과 컬럼 정보를 활용하여 적절한 SQL 쿼리를 생성하세요.
 5 | 
 6 | # 주의사항
 7 | - 사용자의 질문이 다소 모호하더라도, 주어진 데이터를 참고하여 합리적인 가정을 통해 SQL 쿼리를 완성하세요.
 8 | - 불필요한 재질문 없이, 가능한 가장 명확한 분석 쿼리를 만들어 주세요.
 9 | - 반드시 입력된 다이얼렉트 변수들을 준수하여 문법을 선택하세요.
10 | - 최종 출력 형식은 반드시 아래와 같아야 합니다.
11 | 
12 | # Output Example
13 | 최종 형태 예시:
14 | <SQL>
15 | ```sql
16 |     SELECT COUNT(DISTINCT user_id)
17 |     FROM stg_users
18 | ```
19 | 
20 | <해석>
21 | ```plaintext (max_length_per_line=100)
22 |     이 쿼리는 stg_users 테이블에서 고유한 사용자의 수를 계산합니다.
23 |     사용자는 유니크한 user_id를 가지고 있으며
24 |     중복을 제거하기 위해 COUNT(DISTINCT user_id)를 사용했습니다.
25 | ```
26 | 
27 | # Input
28 | 
29 | - 사용자 질문:
30 | {user_input}
31 | 
32 | - DB 환경:
33 | {user_database_env}
34 | 
35 | - 관련 테이블 및 컬럼 정보:
36 | {searched_tables}
37 | 
38 | - 다이얼렉트 정보:
39 |   - dialect_name: {dialect_name}
40 |   - supports_ilike: {supports_ilike}
41 |   - dialect_hints: {dialect_hints}
42 | 
43 | # Notes
44 | 
45 | - 위 입력을 바탕으로 최적의 SQL을 생성하세요.
46 | - {dialect_hints}를 참고하여 엔진에 맞는 함수/연산자를 우선 사용하세요.
47 | - 출력은 위 '최종 형태 예시'와 동일한 구조로만 작성하세요.


--------------------------------------------------------------------------------
/docs/branch_guidelines.md:
--------------------------------------------------------------------------------
 1 | # Branch Guidelines
 2 | 
 3 | ## 개요
 4 | 
 5 | 이 문서는 이 프로젝트에서의 브랜치 전략과 작업 방식에 대해 설명합니다.  
 6 | 새로운 기여자가 쉽게 이해하고 따를 수 있도록, 실제 작업 예시와 함께 브랜치 사용 규칙을 문서화합니다.
 7 | 
 8 | ## 브랜치 네이밍 규칙
 9 | 
10 | - 브랜치 이름 형식: `feature/이슈번호-작업내용`
11 |   - 예시: `feature/32-branch-guidelines`
12 | 
13 | ## 작업 절차 예시
14 | 
15 | 1. **문제점 발견**
16 |    - 작업이 필요한 버그나 개선 사항 등을 발견합니다.
17 | 
18 | 2. **이슈 생성**
19 |    - GitHub 또는 이슈 트래커에 이슈를 등록합니다.
20 |    - 이슈 템플릿:
21 | 
22 |      ```
23 |      - Why❓ : 이 작업이 필요한 이유
24 |      - How❓ : 해결 방법 또는 접근 방식
25 |      - What❓ : 작업해야 할 구체적인 내용
26 |      ```
27 | 
28 | 3. **master 브랜치 최신화**
29 |    - 로컬 환경에서 `master` 브랜치를 최신 상태로 유지합니다.
30 | 
31 | 4. **작업 브랜치 생성**
32 |    - 위의 네이밍 규칙에 따라 새로운 브랜치를 생성합니다.
33 |    - 예시: `feature/32-branch-guidelines`
34 | 
35 | 5. **작업 수행**
36 |    - 필요한 기능 구현 또는 코드 수정을 진행합니다.
37 | 
38 | 6. **Pull Request(PR) 생성**
39 |    - 작업이 완료되면 `master` 브랜치 대상으로 PR을 생성합니다.
40 |    - PR 제목과 설명에는 이슈 번호를 명시합니다.
41 | 
42 | 7. **리뷰 요청**
43 |    - 최소 2명의 팀원에게 코드 리뷰를 요청합니다.
44 | 
45 | 8. **병합(Merge)**
46 |    - 리뷰 승인을 받은 후 `master` 브랜치에 병합합니다.
47 | 
48 | ## 참고 사항
49 | 
50 | - 가능하면 커밋 메시지도 의미 있게 작성해주세요.
51 | - 필요 시 `hotfix/`, `bugfix/`, `refactor/` 등 브랜치 접두어를 추가로 사용할 수 있습니다.
52 | - PR에는 작업 목적, 변경 내용, 테스트 방법 등을 명확히 기술해주세요.
53 | 


--------------------------------------------------------------------------------
/interface/core/lang2sql_runner.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Lang2SQL 실행 모듈.
 3 | 
 4 | 이 모듈은 자연어로 입력된 질문을 SQL 쿼리로 변환하고,
 5 | 지정된 데이터베이스 환경에서 실행하는 함수(`run_lang2sql`)를 제공합니다.
 6 | 내부적으로 `engine.query_executor.execute_query`를 호출하여
 7 | Lang2SQL 전체 파이프라인을 간단히 실행할 수 있도록 합니다.
 8 | """
 9 | 
10 | from engine.query_executor import execute_query as execute_query_common
11 | 
12 | 
13 | def run_lang2sql(
14 |     query,
15 |     database_env,
16 |     retriever_name,
17 |     top_n,
18 |     device,
19 |     use_enriched,
20 | ):
21 |     """
22 |     Lang2SQL 실행 함수.
23 | 
24 |     주어진 자연어 질문을 SQL 쿼리로 변환하고 지정된 데이터베이스 환경에서 실행합니다.
25 |     내부적으로 `engine.query_executor.execute_query`를 호출합니다.
26 | 
27 |     Args:
28 |         query (str): 사용자 입력 자연어 질문.
29 |         database_env (str): 사용할 데이터베이스 환경 이름.
30 |         retriever_name (str): 검색기(retriever) 유형 이름.
31 |         top_n (int): 검색할 테이블 정보 개수.
32 |         device (str): 모델 실행 장치 ("cpu" 또는 "cuda").
33 | 
34 |     Returns:
35 |         dict: Lang2SQL 실행 결과를 담은 딕셔너리.
36 |     """
37 | 
38 |     return execute_query_common(
39 |         query=query,
40 |         database_env=database_env,
41 |         retriever_name=retriever_name,
42 |         top_n=top_n,
43 |         device=device,
44 |         use_enriched_graph=use_enriched,
45 |     )
46 | 


--------------------------------------------------------------------------------
/utils/llm/vectordb/factory.py:
--------------------------------------------------------------------------------
 1 | """
 2 | VectorDB 팩토리 모듈 - 환경 변수에 따라 적절한 VectorDB 인스턴스를 생성
 3 | """
 4 | 
 5 | import os
 6 | from typing import Optional
 7 | 
 8 | from utils.llm.vectordb.faiss_db import get_faiss_vector_db
 9 | from utils.llm.vectordb.pgvector_db import get_pgvector_db
10 | 
11 | 
12 | def get_vector_db(
13 |     vectordb_type: Optional[str] = None, vectordb_location: Optional[str] = None
14 | ):
15 |     """
16 |     VectorDB 타입과 위치에 따라 적절한 VectorDB 인스턴스를 반환합니다.
17 | 
18 |     Args:
19 |         vectordb_type: VectorDB 타입 ("faiss" 또는 "pgvector"). None인 경우 환경 변수에서 읽음.
20 |         vectordb_location: VectorDB 위치 (FAISS: 디렉토리 경로, pgvector: 연결 문자열). None인 경우 환경 변수에서 읽음.
21 | 
22 |     Returns:
23 |         VectorDB 인스턴스 (FAISS 또는 PGVector)
24 |     """
25 |     if vectordb_type is None:
26 |         vectordb_type = os.getenv("VECTORDB_TYPE", "faiss").lower()
27 | 
28 |     if vectordb_location is None:
29 |         vectordb_location = os.getenv("VECTORDB_LOCATION")
30 | 
31 |     if vectordb_type == "faiss":
32 |         return get_faiss_vector_db(vectordb_location)
33 |     elif vectordb_type == "pgvector":
34 |         return get_pgvector_db(vectordb_location)
35 |     else:
36 |         raise ValueError(
37 |             f"지원하지 않는 VectorDB 타입: {vectordb_type}. 'faiss' 또는 'pgvector'를 사용하세요."
38 |         )
39 | 


--------------------------------------------------------------------------------
/utils/databases/connector/base_connector.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 데이터베이스 커넥터의 기본 인터페이스 정의 모듈.
 3 | 
 4 | 이 모듈은 모든 DB 커넥터 클래스가 상속해야 하는
 5 | 공통 추상 클래스(BaseConnector)를 제공합니다.
 6 | """
 7 | 
 8 | from abc import ABC, abstractmethod
 9 | 
10 | import pandas as pd
11 | 
12 | 
13 | class BaseConnector(ABC):
14 |     """
15 |     데이터베이스 커넥터의 기본 추상 클래스.
16 | 
17 |     모든 구체적인 DB 커넥터(Postgres, MySQL 등)는
18 |     이 클래스를 상속받아 공통 메서드(`connect`, `run_sql`, `close`)를 구현해야 합니다.
19 | 
20 |     Attributes:
21 |         connection (Any): DB 연결 객체. 구체 클래스에서 초기화 및 관리됩니다.
22 |     """
23 | 
24 |     connection = None
25 | 
26 |     @abstractmethod
27 |     def connect(self):
28 |         """
29 |         데이터베이스 연결을 수행합니다.
30 | 
31 |         이 메서드는 각 DB별 커넥터에서 구체적으로 구현되어야 합니다.
32 |         """
33 |         pass
34 | 
35 |     @abstractmethod
36 |     def run_sql(self, sql: str) -> pd.DataFrame:
37 |         """
38 |         SQL 쿼리를 실행하고 결과를 반환합니다.
39 | 
40 |         Args:
41 |             sql (str): 실행할 SQL 쿼리 문자열.
42 | 
43 |         Returns:
44 |             pd.DataFrame: 쿼리 결과를 포함하는 데이터프레임.
45 |         """
46 |         pass
47 | 
48 |     @abstractmethod
49 |     def close(self) -> None:
50 |         """
51 |         데이터베이스 연결을 종료합니다.
52 | 
53 |         모든 리소스(커서, 연결 등)를 안전하게 해제해야 합니다.
54 | 
55 |         Raises:
56 |             RuntimeError: 연결 종료 중 예외가 발생한 경우.
57 |         """
58 |         pass
59 | 


--------------------------------------------------------------------------------
/utils/llm/graph_utils/basic_graph.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 기본 워크플로우를 위한 StateGraph 구성입니다.
 3 | GET_TABLE_INFO -> QUERY_MAKER 순서로 실행됩니다.
 4 | """
 5 | 
 6 | from langgraph.graph import END, StateGraph
 7 | 
 8 | from utils.llm.graph_utils.base import (
 9 |     EVALUATE_DOCUMENT_SUITABILITY,
10 |     GET_TABLE_INFO,
11 |     QUERY_MAKER,
12 |     QUESTION_GATE,
13 |     QueryMakerState,
14 |     document_suitability_node,
15 |     get_table_info_node,
16 |     query_maker_node,
17 |     question_gate_node,
18 | )
19 | 
20 | # StateGraph 생성 및 구성
21 | builder = StateGraph(QueryMakerState)
22 | builder.set_entry_point(QUESTION_GATE)
23 | 
24 | # 노드 추가
25 | builder.add_node(QUESTION_GATE, question_gate_node)
26 | builder.add_node(GET_TABLE_INFO, get_table_info_node)
27 | builder.add_node(EVALUATE_DOCUMENT_SUITABILITY, document_suitability_node)
28 | builder.add_node(QUERY_MAKER, query_maker_node)
29 | 
30 | 
31 | def _route_after_gate(state: QueryMakerState):
32 |     return GET_TABLE_INFO
33 | 
34 | 
35 | builder.add_conditional_edges(
36 |     QUESTION_GATE,
37 |     _route_after_gate,
38 |     {
39 |         GET_TABLE_INFO: GET_TABLE_INFO,
40 |         END: END,
41 |     },
42 | )
43 | 
44 | # 기본 엣지 설정
45 | builder.add_edge(GET_TABLE_INFO, EVALUATE_DOCUMENT_SUITABILITY)
46 | builder.add_edge(EVALUATE_DOCUMENT_SUITABILITY, QUERY_MAKER)
47 | 
48 | # QUERY_MAKER 노드 후 종료
49 | builder.add_edge(QUERY_MAKER, END)
50 | 


--------------------------------------------------------------------------------
/interface/core/config/paths.py:
--------------------------------------------------------------------------------
 1 | """레지스트리 파일 경로 계산 및 상위 디렉토리 생성 유틸리티를 제공합니다."""
 2 | 
 3 | import os
 4 | from pathlib import Path
 5 | 
 6 | 
 7 | def get_registry_file_path() -> Path:
 8 |     # Allow override via env var, else default to ./config/data_sources.json
 9 |     override = os.getenv("LANG2SQL_REGISTRY_PATH")
10 |     if override:
11 |         return Path(override).expanduser().resolve()
12 |     return Path(os.getcwd()) / "config" / "data_sources.json"
13 | 
14 | 
15 | def get_db_registry_file_path() -> Path:
16 |     # Allow override via env var, else default to ./config/db_connections.json
17 |     override = os.getenv("LANG2SQL_DB_REGISTRY_PATH")
18 |     if override:
19 |         return Path(override).expanduser().resolve()
20 |     return Path(os.getcwd()) / "config" / "db_connections.json"
21 | 
22 | 
23 | def get_llm_registry_file_path() -> Path:
24 |     override = os.getenv("LANG2SQL_LLM_REGISTRY_PATH")
25 |     if override:
26 |         return Path(override).expanduser().resolve()
27 |     return Path(os.getcwd()) / "config" / "llm_profiles.json"
28 | 
29 | 
30 | def get_embedding_registry_file_path() -> Path:
31 |     override = os.getenv("LANG2SQL_EMBEDDING_REGISTRY_PATH")
32 |     if override:
33 |         return Path(override).expanduser().resolve()
34 |     return Path(os.getcwd()) / "config" / "embedding_profiles.json"
35 | 
36 | 
37 | def ensure_parent_dir(path: Path) -> None:
38 |     path.parent.mkdir(parents=True, exist_ok=True)
39 | 


--------------------------------------------------------------------------------
/prompt/document_suitability_prompt.md:
--------------------------------------------------------------------------------
 1 | ## 문서 적합성 평가 프롬프트 (Table Search 재랭킹)
 2 | 
 3 | 당신은 데이터 카탈로그 평가자입니다. 주어진 사용자 질문과 검색 결과(테이블 → 칼럼 설명 맵)를 바탕으로, 각 테이블이 질문에 얼마나 적합한지 0~1 사이의 실수 점수로 평가하세요.
 4 | 
 5 | ### 입력
 6 | - **question**: {question}
 7 | - **tables**: {tables}
 8 | 
 9 | ### 과업
10 | 1. **핵심 신호 추출**: 질문에서 엔터티/지표/시간/필터/그룹화 단서를 추출합니다.
11 | 2. **테이블별 점수화**: 각 테이블의 칼럼·설명과의 연관성으로 적합도를 점수화합니다(0~1, 소수 셋째 자리 반올림).
12 | 3. **근거와 보완점 제시**: 매칭된 칼럼과 부족한 요소(엔터티/지표/기간 등)를 한국어로 설명합니다.
13 | 4. **정렬**: 결과를 점수 내림차순으로 정렬해 반환합니다.
14 | 
15 | ### 평가 규칙(가이드)
16 | - **0.90~1.00**: 필요한 엔터티, 기간/시간 컬럼, 핵심 지표/측정 칼럼이 모두 존재. 직접 조회/집계만으로 답 가능.
17 | - **0.60~0.89**: 주요 신호 매칭, 일부 보완(기간/그룹 키/보조 칼럼) 필요. 조인 없이 근사 가능.
18 | - **0.30~0.59**: 일부만 매칭. 외부 컨텍스트나 조인 없이는 부정확/제한적.
19 | - **0.00~0.29**: 연관성 낮음. 스키마/도메인 불일치 또는 정책/운영성 테이블.
20 | 
21 | ### 주의
22 | - 칼럼 이름/설명에 실제로 존재하지 않는 항목을 매칭하지 마세요(환각 금지).
23 | - 시간 요구(특정 날짜/기간)가 있으면 timestamp/date/created_at 등 시간 계열 키를 중시하세요.
24 | - 엔티티 키(예: id, user_id, product_id)의 존재 여부를 가산점으로 반영하세요.
25 | - 키 이름은 정확히 입력 맵의 키만 사용하세요(자유 추측 금지).
26 | 
27 | ### 언어/출력 형식
28 | - 모든 텍스트 값은 한국어로 작성하세요.
29 | - 결과는 반드시 아래 JSON 스키마로만 반환하세요(추가/누락 키 금지).
30 | 
31 | ### 출력(JSON 스키마)
32 | {{
33 |   "results": [
34 |     {{
35 |       "table_name": string,
36 |       "score": number,  // 0.0~1.0, 소수 셋째 자리 반올림
37 |       "reason": string, // 한국어 한두 문장 근거
38 |       "matched_columns": string[],
39 |       "missing_entities": string[]
40 |     }}
41 |   ]
42 | }}
43 | 
44 | ### 검증 규칙
45 | - score는 [0, 1] 범위로 클램핑하고 소수 셋째 자리까지 반올림하세요.
46 | - matched_columns는 해당 테이블 객체의 실제 키만 포함하세요(단, table_description 제외).
47 | - reason 및 missing_entities는 한국어로 작성하세요.


--------------------------------------------------------------------------------
/dev/create_faiss.py:
--------------------------------------------------------------------------------
 1 | """
 2 | dev/create_faiss.py
 3 | 
 4 | CSV 파일에서 테이블과 컬럼 정보를 불러와 OpenAI 임베딩으로 벡터화한 뒤,
 5 | FAISS 인덱스를 생성하고 로컬 디렉토리에 저장한다.
 6 | 
 7 | 환경 변수:
 8 |     OPEN_AI_KEY: OpenAI API 키
 9 |     OPEN_AI_EMBEDDING_MODEL: 사용할 임베딩 모델 이름
10 | 
11 | 출력:
12 |     지정된 OUTPUT_DIR 경로에 FAISS 인덱스 저장
13 | """
14 | 
15 | import csv
16 | import os
17 | from collections import defaultdict
18 | 
19 | from dotenv import load_dotenv
20 | from langchain_community.vectorstores import FAISS
21 | from langchain_openai import OpenAIEmbeddings
22 | 
23 | load_dotenv()
24 | # CSV 파일 경로
25 | CSV_PATH = "./dev/table_catalog.csv"
26 | # .env의 VECTORDB_LOCATION과 동일하게 맞추세요
27 | OUTPUT_DIR = "./dev/table_info_db"
28 | 
29 | tables = defaultdict(lambda: {"desc": "", "columns": []})
30 | with open(CSV_PATH, newline="", encoding="utf-8") as f:
31 |     reader = csv.DictReader(f)
32 |     for row in reader:
33 |         t = row["table_name"].strip()
34 |         tables[t]["desc"] = row["table_description"].strip()
35 |         col = row["column_name"].strip()
36 |         col_desc = row["column_description"].strip()
37 |         tables[t]["columns"].append((col, col_desc))
38 | 
39 | docs = []
40 | for t, info in tables.items():
41 |     cols = "\n".join([f"{c}: {d}" for c, d in info["columns"]])
42 |     page = f"{t}: {info['desc']}\nColumns:\n {cols}"
43 |     from langchain.schema import Document
44 | 
45 |     docs.append(Document(page_content=page))
46 | 
47 | emb = OpenAIEmbeddings(
48 |     model=os.getenv("OPEN_AI_EMBEDDING_MODEL"), openai_api_key=os.getenv("OPEN_AI_KEY")
49 | )
50 | db = FAISS.from_documents(docs, emb)
51 | os.makedirs(OUTPUT_DIR, exist_ok=True)
52 | db.save_local(OUTPUT_DIR)
53 | print(f"FAISS index saved to: {OUTPUT_DIR}")
54 | 


--------------------------------------------------------------------------------
/utils/llm/llm_response_parser.py:
--------------------------------------------------------------------------------
 1 | """
 2 | LLM 응답 텍스트에서 특정 마크업 태그(`<SQL>`, `<해석>`)에 포함된 콘텐츠 블록을 추출하는 유틸리티 모듈입니다.
 3 | 
 4 | 이 모듈은 OpenAI, LangChain 등에서 생성된 LLM 응답 문자열에서 Markdown 코드 블록을 파싱하여,
 5 | SQL 쿼리 및 자연어 해석 설명을 분리하여 사용할 수 있도록 정적 메서드 형태의 API를 제공합니다.
 6 | 
 7 | 지원되는 태그:
 8 |     - <SQL>: SQL 코드 블록 (```sql ... ```)
 9 |     - <해석>: 자연어 해석 블록 (```plaintext ... ```)
10 | """
11 | 
12 | import re
13 | 
14 | 
15 | class LLMResponseParser:
16 |     """
17 |     LLM 응답 문자열에서 특정 태그(<SQL>, <해석>)에 포함된 블록을 추출하는 유틸리티 클래스입니다.
18 | 
19 |     주요 기능:
20 |         - <SQL> 태그 내 ```sql ... ``` 블록에서 SQL 쿼리 추출
21 |         - <해석> 태그 내 ```plaintext ... ``` 블록에서 자연어 해석 추출
22 |     """
23 | 
24 |     @staticmethod
25 |     def extract_sql(text: str) -> str:
26 |         """
27 |         <SQL> 태그 내부의 SQL 코드 블록만 추출합니다.
28 | 
29 |         Args:
30 |             text (str): 전체 LLM 응답 문자열.
31 | 
32 |         Returns:
33 |             str: SQL 쿼리 문자열 (```sql ... ``` 내부 텍스트).
34 | 
35 |         Raises:
36 |             ValueError: <SQL> 태그 또는 SQL 코드 블록을 찾을 수 없는 경우.
37 |         """
38 |         match = re.search(r"<SQL>\s*```sql\n(.*?)```", text, re.DOTALL)
39 |         if match:
40 |             return match.group(1).strip()
41 |         raise ValueError("SQL 블록을 추출할 수 없습니다.")
42 | 
43 |     @staticmethod
44 |     def extract_interpretation(text: str) -> str:
45 |         """
46 |         <해석> 태그 내부의 해석 설명 텍스트만 추출합니다.
47 | 
48 |         Args:
49 |             text (str): 전체 LLM 응답 문자열.
50 | 
51 |         Returns:
52 |             str: 해석 설명 텍스트. 블록이 존재하지 않으면 빈 문자열을 반환합니다.
53 |         """
54 |         match = re.search(r"<해석>\s*```plaintext\n(.*?)```", text, re.DOTALL)
55 |         if match:
56 |             return match.group(1).strip()
57 |         return ""
58 | 


--------------------------------------------------------------------------------
/docs/pull_request_guidelines.md:
--------------------------------------------------------------------------------
 1 | # Pull Request & Code Review Guidelines
 2 | 
 3 | 이 문서는 이 프로젝트의 Pull Request(PR) 작성 및 코드 리뷰 시 지켜야 할 규칙과 포맷을 정의합니다.  
 4 | 기여자와 리뷰어 모두가 **효율적이고 일관된 협업**을 할 수 있도록 도움을 줍니다.
 5 | 
 6 | ## ✅ PR 작성 포맷
 7 | 
 8 | PR을 생성할 때는 아래 형식을 따라 주세요:
 9 | 
10 | ```md
11 | ## #️⃣ Issue Number
12 | <!-- ex) #이슈번호, #이슈번호 -->
13 | - 예시: #32
14 | 
15 | ## 📝 요약(Summary)
16 | <!-- 해당 PR에 대해서 간단히 설명해주세요(3줄 이내). (Why? How?) -->
17 | <!-- (세부적인 내용은 Issue에 작성되었을 것이라고 가정합니다) -->
18 | - 예시: 브랜치 전략 가이드라인 문서 추가  
19 | - 프로젝트 내 컨트리뷰션 흐름 정리를 위한 작업  
20 | - 신규 기여자 onboarding 시 참고 문서로 활용 예정
21 | 
22 | ## 💬  To Reviewers (선택)
23 | <!-- 리뷰어가 중점적으로 봐줬으면 좋겠는 부분이 있으면 작성해주세요. -->
24 | - 예시: 문서 구조나 용어 선택이 자연스러운지 확인 부탁드립니다.
25 | 
26 | ## PR Checklist
27 | <!-- [x] 항목 완료 여부 체크해주세요 -->
28 | - [x] 변경 사항에 대한 테스트 또는 검증 완료
29 | - [x] 로컬에서 정상 동작 확인
30 | - [ ] 관련 문서 업데이트 완료
31 | ```
32 | 
33 | ## 🔍 Code Review Emoji Rules
34 | 
35 | 리뷰어는 아래의 **이모지 규칙**에 따라 피드백을 남겨주세요.  
36 | 서로의 의도를 명확히 전달하는 데 도움이 됩니다.
37 | 
38 | | 이모지 | 의미 | 사용 예시 |
39 | |--------|------|-----------|
40 | | 👍     | **칭찬** (Good Job) | 구조가 깔끔하고 읽기 쉬운 코드입니다! |
41 | | ❗     | **필수 수정** (Required) | 이 부분은 예외 처리가 필요해 보여요. 꼭 수정해주세요. |
42 | | ❓     | **질문** (Clarify) | 이 로직이 이런 방식으로 구성된 이유가 있을까요? |
43 | | 💊     | **제안** (Optional Suggestion) | 이 부분은 `map` 대신 `flatMap`도 고려해볼 수 있을 것 같아요. |
44 | 
45 | ## 🔁 리뷰 프로세스
46 | 
47 | 1. PR 생성자는 **2명 이상의 리뷰어**를 지정합니다.
48 | 2. 리뷰어는 위의 이모지 규칙에 따라 피드백을 남깁니다.
49 | 3. 리뷰어 전원이 ❗ 항목을 해소한 후 👍 또는 승인(approve)을 남기면 병합 가능합니다.
50 | 4. 병합 전에는 가급적 `master` 브랜치를 기준으로 최신 상태로 유지해주세요.
51 | 
52 | ## 📌 기타 권장 사항
53 | 
54 | - 커밋 메시지는 명확하고 목적 중심적으로 작성해주세요.
55 |   - 예: `docs: add branch guidelines documentation`
56 | - 하나의 PR에는 가능한 한 **의미 있는 단위의 작업**만 포함해주세요.
57 | - 기능 단위 커밋 → PR 단위 리뷰 → 병합이라는 흐름을 지켜주세요.
58 | 


--------------------------------------------------------------------------------
/utils/llm/graph_utils/enriched_graph.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 기본 워크플로우에 '프로파일 추출(PROFILE_EXTRACTION)'과 '컨텍스트 보강(CONTEXT_ENRICHMENT)'를
 3 | 추가한 확장된 그래프입니다.
 4 | """
 5 | 
 6 | from langgraph.graph import END, StateGraph
 7 | 
 8 | from utils.llm.graph_utils.base import (
 9 |     CONTEXT_ENRICHMENT,
10 |     EVALUATE_DOCUMENT_SUITABILITY,
11 |     GET_TABLE_INFO,
12 |     PROFILE_EXTRACTION,
13 |     QUERY_MAKER,
14 |     QUESTION_GATE,
15 |     QueryMakerState,
16 |     context_enrichment_node,
17 |     document_suitability_node,
18 |     get_table_info_node,
19 |     profile_extraction_node,
20 |     query_maker_node,
21 |     question_gate_node,
22 | )
23 | 
24 | # StateGraph 생성 및 구성
25 | builder = StateGraph(QueryMakerState)
26 | builder.set_entry_point(QUESTION_GATE)
27 | 
28 | # 노드 추가
29 | builder.add_node(QUESTION_GATE, question_gate_node)
30 | builder.add_node(GET_TABLE_INFO, get_table_info_node)
31 | builder.add_node(EVALUATE_DOCUMENT_SUITABILITY, document_suitability_node)
32 | builder.add_node(PROFILE_EXTRACTION, profile_extraction_node)
33 | builder.add_node(CONTEXT_ENRICHMENT, context_enrichment_node)
34 | builder.add_node(QUERY_MAKER, query_maker_node)
35 | 
36 | 
37 | def _route_after_gate(state: QueryMakerState):
38 |     return GET_TABLE_INFO
39 | 
40 | 
41 | builder.add_conditional_edges(
42 |     QUESTION_GATE,
43 |     _route_after_gate,
44 |     {
45 |         GET_TABLE_INFO: GET_TABLE_INFO,
46 |         END: END,
47 |     },
48 | )
49 | 
50 | # 기본 엣지 설정
51 | builder.add_edge(GET_TABLE_INFO, EVALUATE_DOCUMENT_SUITABILITY)
52 | builder.add_edge(EVALUATE_DOCUMENT_SUITABILITY, PROFILE_EXTRACTION)
53 | builder.add_edge(PROFILE_EXTRACTION, CONTEXT_ENRICHMENT)
54 | builder.add_edge(CONTEXT_ENRICHMENT, QUERY_MAKER)
55 | 
56 | # QUERY_MAKER 노드 후 종료
57 | builder.add_edge(QUERY_MAKER, END)
58 | 


--------------------------------------------------------------------------------
/interface/app_pages/sidebar_components/db_selector.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import streamlit as st
 3 | 
 4 | from interface.core.config import get_db_connections_registry, update_db_settings
 5 | 
 6 | 
 7 | def render_sidebar_db_selector() -> None:
 8 |     st.sidebar.markdown("### DB 연결")
 9 | 
10 |     registry = get_db_connections_registry()
11 |     names = [c.name for c in registry.connections]
12 |     if not names:
13 |         st.sidebar.warning("등록된 DB 프로파일이 없습니다. 설정 > DB에서 추가하세요.")
14 |         return
15 | 
16 |     # 기본 선택: 세션 또는 ENV의 DB_TYPE과 일치하는 첫 프로파일
17 |     current_type = (
18 |         st.session_state.get("DB_TYPE") or os.getenv("DB_TYPE") or ""
19 |     ).lower()
20 |     default_index = 0
21 |     if current_type:
22 |         for idx, c in enumerate(registry.connections):
23 |             if c.type == current_type:
24 |                 default_index = idx
25 |                 break
26 | 
27 |     sel_name = st.sidebar.selectbox(
28 |         "프로파일", options=names, index=default_index, key="sidebar_db_profile"
29 |     )
30 |     selected = next((c for c in registry.connections if c.name == sel_name), None)
31 |     if selected is None:
32 |         st.sidebar.error("선택한 프로파일을 찾을 수 없습니다.")
33 |         return
34 | 
35 |     if st.sidebar.button("적용", key="sidebar_apply_db"):
36 |         try:
37 |             values = {
38 |                 "host": selected.host,
39 |                 "port": selected.port,
40 |                 "user": selected.user,
41 |                 "password": selected.password,
42 |                 "database": selected.database,
43 |                 "extra": selected.extra,
44 |             }
45 |             update_db_settings(db_type=selected.type, values=values, secrets={})
46 |             st.sidebar.success(f"DB 적용됨: {selected.name}")
47 |         except Exception as e:
48 |             st.sidebar.error(f"적용 실패: {e}")
49 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-notification.yml:
--------------------------------------------------------------------------------
 1 | name: Notify Telegram on PR Events
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types: [opened, closed]
 6 | 
 7 | jobs:
 8 |   notify:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - name: Send Telegram Message on PR Open
12 |         if: github.event_name == 'pull_request' && github.event.action == 'opened'
13 |         env:
14 |           TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
15 |           TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
16 |           PR_TITLE: ${{ github.event.pull_request.title }}
17 |           PR_URL: ${{ github.event.pull_request.html_url }}
18 |           PR_AUTHOR: ${{ github.event.pull_request.user.login }}
19 |         run: |
20 |           MESSAGE="📢 새로운 PR 등록!%0A%0A제목: $PR_TITLE%0A작성자: $PR_AUTHOR%0A링크: $PR_URL"
21 |           curl -s -X POST "https://api.telegram.org/bot$TELEGRAM_BOT_TOKEN/sendMessage" \
22 |           -d chat_id="$TELEGRAM_CHAT_ID" \
23 |           -d text="$MESSAGE"
24 | 
25 |       - name: Send Telegram Message on PR Approved
26 |         if: github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged
27 |         env:
28 |           TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
29 |           TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
30 |           PR_TITLE: ${{ github.event.pull_request.title }}
31 |           PR_URL: ${{ github.event.pull_request.html_url }}
32 |           PR_AUTHOR: ${{ github.event.pull_request.user.login }}
33 |           REVIEWER: ${{ github.event.review.user.login }}
34 |         run: |
35 |           MESSAGE="✅ PR이 병합되었습니다! %0A%0A제목: $PR_TITLE%0A작성자: $PR_AUTHOR%0A리뷰어: $REVIEWER%0A링크: $PR_URL"
36 |           curl -s -X POST "https://api.telegram.org/bot$TELEGRAM_BOT_TOKEN/sendMessage" \
37 |           -d chat_id="$TELEGRAM_CHAT_ID" \
38 |           -d text="$MESSAGE"
39 | 


--------------------------------------------------------------------------------
/dev/create_pgvector.py:
--------------------------------------------------------------------------------
 1 | """
 2 | dev/create_pgvector.py
 3 | 
 4 | CSV 파일에서 테이블과 컬럼 정보를 불러와 OpenAI 임베딩으로 벡터화한 뒤,
 5 | pgvector에 적재한다.
 6 | 
 7 | 환경 변수:
 8 |     OPEN_AI_KEY: OpenAI API 키
 9 |     OPEN_AI_EMBEDDING_MODEL: 사용할 임베딩 모델 이름
10 |     VECTORDB_LOCATION: pgvector 연결 문자열
11 |     PGVECTOR_COLLECTION: pgvector 컬렉션 이름
12 | """
13 | 
14 | import csv
15 | import os
16 | from collections import defaultdict
17 | 
18 | from dotenv import load_dotenv
19 | from langchain.schema import Document
20 | from langchain_openai import OpenAIEmbeddings
21 | from langchain_postgres.vectorstores import PGVector
22 | 
23 | load_dotenv()
24 | # CSV 파일 경로
25 | CSV_PATH = "./dev/table_catalog.csv"
26 | # .env의 VECTORDB_LOCATION과 동일하게 맞추세요
27 | CONN = (
28 |     os.getenv("VECTORDB_LOCATION")
29 |     or "postgresql://pgvector:pgvector@localhost:5432/postgres"
30 | )
31 | COLLECTION = os.getenv("PGVECTOR_COLLECTION", "table_info_db")
32 | 
33 | tables = defaultdict(lambda: {"desc": "", "columns": []})
34 | with open(CSV_PATH, newline="", encoding="utf-8") as f:
35 |     reader = csv.DictReader(f)
36 |     for row in reader:
37 |         t = row["table_name"].strip()
38 |         tables[t]["desc"] = row["table_description"].strip()
39 |         col = row["column_name"].strip()
40 |         col_desc = row["column_description"]
41 |         tables[t]["columns"].append((col, col_desc))
42 | 
43 | docs = []
44 | for t, info in tables.items():
45 |     cols = "\n".join([f"{c}: {d}" for c, d in info["columns"]])
46 |     docs.append(Document(page_content=f"{t}: {info['desc']}\nColumns:\n {cols}"))
47 | 
48 | emb = OpenAIEmbeddings(
49 |     model=os.getenv("OPEN_AI_EMBEDDING_MODEL"), openai_api_key=os.getenv("OPEN_AI_KEY")
50 | )
51 | PGVector.from_documents(
52 |     documents=docs, embedding=emb, connection=CONN, collection_name=COLLECTION
53 | )
54 | print(f"pgvector collection populated: {COLLECTION}")
55 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling>=1.25.0"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "lang2sql"
 7 | dynamic = ["version"]
 8 | description = "Lang2SQL - Query Generator for Data Warehouse"
 9 | readme = "README.md"
10 | requires-python = ">=3.9"
11 | authors = [
12 |   { name = "ehddnr301", email = "dy95032@gmail.com" },
13 | ]
14 | dependencies = [
15 |   "langgraph==0.2.62",
16 |   "datahub==0.999.1",
17 |   "langchain==0.3.14",
18 |   "langchain-community==0.3.14",
19 |   "openai==1.59.8",
20 |   "langchain-openai==0.3.0",
21 |   "streamlit==1.41.1",
22 |   "python-dotenv==1.0.1",
23 |   "faiss-cpu==1.10.0",
24 |   "transformers==4.51.2",
25 |   "langchain-aws>=0.2.21,<0.3.0",
26 |   "langchain-google-genai>=2.1.3,<3.0.0",
27 |   "langchain-ollama>=0.3.2,<0.4.0",
28 |   "langchain-huggingface>=0.1.2,<0.2.0",
29 |   "clickhouse_driver>=0.2.9,<0.3.0",
30 |   "plotly",
31 |   "matplotlib",
32 |   "ipython",
33 |   "kaleido",
34 |   "numpy<2.0",
35 |   "snowflake-connector-python>=3.15.0,<4.0.0",
36 |   "databricks-sql-connector>=4.0.3,<5.0.0",
37 |   "oracledb>=3.1.0,<4.0.0",
38 |   "mysql-connector-python>=9.3.0,<10.0.0",
39 |   "duckdb>=1.2.2,<2.0.0",
40 |   "psycopg2-binary>=2.9.10,<3.0.0",
41 |   "psycopg[binary]>=3.2,<4.0",
42 |   "pyodbc>=5.1.0,<6.0.0",
43 |   "crate>=0.29.0,<1.0.0",
44 |   "pyhive>=0.6.6,<1.0.0",
45 |   "google-cloud-bigquery>=3.20.1,<4.0.0",
46 |   "pgvector==0.3.6",
47 |   "langchain-postgres==0.0.15",
48 |   "trino>=0.329.0,<1.0.0",
49 | ]
50 | 
51 | [project.scripts]
52 | lang2sql = "cli.__init__:cli"
53 | 
54 | [project.urls]
55 | Homepage = "https://github.com/CausalInferenceLab/Lang2SQL"
56 | Repository = "https://github.com/CausalInferenceLab/Lang2SQL"
57 | 
58 | [tool.hatch.version]
59 | path = "version.py"
60 | 
61 | [tool.hatch.build]
62 | include = [
63 |   "version.py",
64 |   "prompt/*.md",
65 | ]
66 | 
67 | [tool.hatch.build.targets.wheel]
68 | packages = [
69 |   "cli",
70 |   "interface",
71 |   "engine",
72 |   "infra",
73 |   "prompt",
74 |   "utils",
75 | ]
76 | 
77 | [tool.uv]
78 | dev-dependencies = [
79 |   "pre_commit==4.1.0",
80 |   "pytest>=8.3.5",
81 | ]
82 | 


--------------------------------------------------------------------------------
/interface/app_pages/sidebar_components/chatbot_session_controller.py:
--------------------------------------------------------------------------------
 1 | """ChatBot 세션 제어를 위한 사이드바 컴포넌트"""
 2 | 
 3 | import streamlit as st
 4 | import uuid
 5 | 
 6 | 
 7 | def render_sidebar_chatbot_session_controller() -> str:
 8 |     """ChatBot 세션 관리 및 대화 기록 표시 (사이드바 전용)
 9 | 
10 |     Returns:
11 |         str: 현재 thread_id
12 |     """
13 |     # 세션 ID 자동 생성 (처음 방문 시에만)
14 |     if "chatbot_thread_id" not in st.session_state:
15 |         st.session_state.chatbot_thread_id = str(uuid.uuid4())[:8]  # 8자리 짧은 ID
16 | 
17 |     thread_id = st.session_state.chatbot_thread_id
18 | 
19 |     # 세션 관리 섹션
20 |     st.markdown("### 📋 세션 관리")
21 | 
22 |     # 세션 정보 표시
23 |     st.markdown(f"**현재 세션:** `{thread_id}`")
24 |     st.caption("대화 기록을 구분하는 고유 ID입니다.")
25 | 
26 |     # 새 세션 시작 버튼
27 |     if st.button(
28 |         "🔄 새 세션 시작",
29 |         use_container_width=True,
30 |         help="새로운 대화 세션을 시작합니다.",
31 |     ):
32 |         st.session_state.chatbot_thread_id = str(uuid.uuid4())[:8]
33 |         st.session_state.chatbot_messages = []
34 |         st.rerun()
35 | 
36 |     # 대화 기록 섹션
37 |     if st.session_state.get("chatbot_messages"):
38 |         st.divider()
39 |         st.markdown("### 💬 대화 기록")
40 | 
41 |         # 메시지 개수 표시
42 |         message_count = len(st.session_state.chatbot_messages)
43 |         st.caption(f"총 {message_count}개의 메시지")
44 | 
45 |         # 대화 기록 표시 (접힌 상태)
46 |         with st.expander("📄 전체 기록 보기 (JSON)", expanded=False):
47 |             st.json(st.session_state.chatbot_messages)
48 | 
49 |         # 최근 메시지 미리보기
50 |         if message_count > 0:
51 |             with st.expander("👀 최근 메시지 미리보기", expanded=False):
52 |                 recent_messages = st.session_state.chatbot_messages[-3:]  # 최근 3개
53 |                 for msg in recent_messages:
54 |                     role_icon = "👤" if msg["role"] == "user" else "🤖"
55 |                     role_text = "사용자" if msg["role"] == "user" else "AI"
56 |                     content_preview = (
57 |                         msg["content"][:50] + "..."
58 |                         if len(msg["content"]) > 50
59 |                         else msg["content"]
60 |                     )
61 |                     st.caption(f"{role_icon} {role_text}: {content_preview}")
62 | 
63 |     return thread_id
64 | 


--------------------------------------------------------------------------------
/utils/databases/connector/duckdb_connector.py:
--------------------------------------------------------------------------------
 1 | """
 2 | DuckDB 데이터베이스 커넥터 모듈.
 3 | 
 4 | 이 모듈은 DuckDB 데이터베이스에 연결하여 SQL 쿼리를 실행하고,
 5 | 결과를 pandas DataFrame 형태로 반환하는 기능을 제공합니다.
 6 | """
 7 | 
 8 | import duckdb
 9 | import pandas as pd
10 | 
11 | from utils.databases.config import DBConfig
12 | from utils.databases.connector.base_connector import BaseConnector
13 | from utils.databases.logger import logger
14 | 
15 | 
16 | class DuckDBConnector(BaseConnector):
17 |     """
18 |     DuckDB 데이터베이스 커넥터 클래스.
19 | 
20 |     DuckDB 데이터베이스에 연결하여 쿼리를 실행하고,
21 |     결과를 DataFrame 형태로 반환하거나 연결을 종료하는 기능을 제공합니다.
22 |     """
23 | 
24 |     connection = None
25 | 
26 |     def __init__(self, config: DBConfig):
27 |         """
28 |         DuckDBConnector 인스턴스를 초기화합니다.
29 | 
30 |         Args:
31 |             config (DBConfig): DuckDB 연결 정보를 담은 설정 객체.
32 |                 `path` 키를 사용하여 파일 경로를 지정하거나, `:memory:`를 사용하여 인메모리 DB로 설정합니다.
33 |         """
34 |         self.database = config.get("path", ":memory:")
35 |         self.connect()
36 | 
37 |     def connect(self) -> None:
38 |         """
39 |         DuckDB 데이터베이스에 연결을 설정합니다.
40 | 
41 |         Raises:
42 |             ConnectionError: DuckDB 연결에 실패한 경우 발생합니다.
43 |         """
44 |         try:
45 |             self.connection = duckdb.connect(database=self.database)
46 |             logger.info("Successfully connected to DuckDB.")
47 |         except Exception as e:
48 |             logger.error("Failed to connect to DuckDB: %s", e)
49 |             raise
50 | 
51 |     def run_sql(self, sql: str) -> pd.DataFrame:
52 |         """
53 |         SQL 쿼리를 실행하고 결과를 pandas DataFrame으로 반환합니다.
54 | 
55 |         Args:
56 |             sql (str): 실행할 SQL 쿼리 문자열.
57 | 
58 |         Returns:
59 |             pd.DataFrame: 쿼리 결과를 담은 DataFrame 객체.
60 | 
61 |         Raises:
62 |             RuntimeError: SQL 실행 중 오류가 발생한 경우.
63 |         """
64 |         try:
65 |             return self.connection.execute(sql).fetchdf()
66 |         except Exception as e:
67 |             logger.error("Failed to execute SQL query: %s", e)
68 |             raise
69 | 
70 |     def close(self) -> None:
71 |         """
72 |         DuckDB 데이터베이스 연결을 종료합니다.
73 | 
74 |         연결이 존재할 경우 안전하게 닫고 리소스를 해제합니다.
75 |         """
76 |         if self.connection:
77 |             self.connection.close()
78 |             logger.info("Connection to DuckDB closed.")
79 |         self.connection = None
80 | 


--------------------------------------------------------------------------------
/interface/core/config/models.py:
--------------------------------------------------------------------------------
 1 | """설정 및 각 레지스트리에서 사용하는 데이터 모델(dataclass) 정의 모듈입니다."""
 2 | 
 3 | from dataclasses import dataclass, field
 4 | from typing import List, Optional, Any, Dict
 5 | 
 6 | 
 7 | @dataclass
 8 | class Config:
 9 |     datahub_server: str
10 |     vectordb_type: str
11 |     vectordb_location: str
12 |     data_source_mode: str | None = None  # "datahub" | "vectordb" | None
13 | 
14 | 
15 | @dataclass
16 | class DataHubSource:
17 |     name: str
18 |     url: str
19 |     faiss_path: Optional[str] = None
20 |     note: Optional[str] = None
21 | 
22 | 
23 | @dataclass
24 | class VectorDBSource:
25 |     name: str
26 |     type: str  # 'faiss' | 'pgvector'
27 |     location: str
28 |     collection_prefix: Optional[str] = None
29 |     note: Optional[str] = None
30 | 
31 | 
32 | @dataclass
33 | class DataSourcesRegistry:
34 |     datahub: List[DataHubSource] = field(default_factory=list)
35 |     vectordb: List[VectorDBSource] = field(default_factory=list)
36 | 
37 | 
38 | @dataclass
39 | class DBConnectionProfile:
40 |     name: str
41 |     type: str  # 'postgresql' | 'mysql' | 'mariadb' | 'oracle' | 'clickhouse' | 'duckdb' | 'sqlite' | 'databricks' | 'snowflake' | 'trino'
42 |     host: Optional[str] = None
43 |     port: Optional[int] = None
44 |     user: Optional[str] = None
45 |     password: Optional[str] = None
46 |     database: Optional[str] = None
47 |     extra: Optional[Dict[str, Any]] = None  # non-secret
48 |     note: Optional[str] = None
49 | 
50 | 
51 | @dataclass
52 | class DBConnectionsRegistry:
53 |     connections: List[DBConnectionProfile] = field(default_factory=list)
54 | 
55 | 
56 | @dataclass
57 | class LLMProfile:
58 |     name: str
59 |     provider: (
60 |         str  # 'openai' | 'azure' | 'bedrock' | 'gemini' | 'ollama' | 'huggingface'
61 |     )
62 |     fields: Dict[str, str] = field(default_factory=dict)  # includes secrets
63 |     note: Optional[str] = None
64 | 
65 | 
66 | @dataclass
67 | class LLMRegistry:
68 |     profiles: List[LLMProfile] = field(default_factory=list)
69 | 
70 | 
71 | @dataclass
72 | class EmbeddingProfile:
73 |     name: str
74 |     provider: (
75 |         str  # 'openai' | 'azure' | 'bedrock' | 'gemini' | 'ollama' | 'huggingface'
76 |     )
77 |     fields: Dict[str, str] = field(default_factory=dict)
78 |     note: Optional[str] = None
79 | 
80 | 
81 | @dataclass
82 | class EmbeddingRegistry:
83 |     profiles: List[EmbeddingProfile] = field(default_factory=list)
84 | 


--------------------------------------------------------------------------------
/infra/monitoring/check_server.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 서버 상태 확인 및 연결 관련 기능을 제공하는 유틸리티 클래스입니다.
 3 | 
 4 | 이 모듈은 HTTP 기반의 서버에 대해 다음과 같은 기능을 제공합니다:
 5 | - `/health` 엔드포인트를 통한 서버 헬스 체크
 6 | - 향후 서버 연결 또는 상태 점검과 관련된 기능 추가 예정
 7 | 
 8 | 각 기능은 요청 실패, 타임아웃, 연결 오류 등의 다양한 예외 상황을 포괄적으로 처리하며,
 9 | 로깅을 통해 상세한 실패 원인을 기록하고 결과를 boolean 또는 적절한 형태로 반환합니다.
10 | """
11 | 
12 | import logging
13 | from urllib.parse import urljoin
14 | 
15 | import requests
16 | 
17 | logging.basicConfig(
18 |     level=logging.INFO,
19 |     format="%(asctime)s [%(levelname)s] %(message)s",
20 |     datefmt="%Y-%m-%d %H:%M:%S",
21 | )
22 | logger = logging.getLogger(__name__)
23 | 
24 | 
25 | class CheckServer:
26 |     """
27 |     서버의 상태를 확인하거나 연결을 테스트하는 유틸리티 메서드를 제공하는 클래스입니다.
28 | 
29 |     현재는 GMS 서버의 `/health` 엔드포인트에 대한 헬스 체크 기능을 포함하고 있으며,
30 |     향후에는 다양한 서버 연결 확인 및 상태 점검 기능이 추가될 수 있도록 확장 가능한 구조로 설계되었습니다.
31 |     모든 기능은 네트워크 오류 및 서버 응답 상태에 따라 예외를 로깅하며, 호출자가 결과를 판단할 수 있도록 boolean 값을 반환합니다.
32 |     """
33 | 
34 |     @staticmethod
35 |     def is_gms_server_healthy(*, url: str) -> bool:
36 |         """
37 |         지정된 GMS 서버의 `/health` 엔드포인트에 요청을 보내 상태를 확인합니다.
38 | 
39 |         서버가 HTTP 200 응답을 반환하면 True를 반환하며,
40 |         요청 실패, 타임아웃, 연결 오류 등의 예외 발생 시 False를 반환하고,
41 |         로깅을 통해 상세한 에러 정보를 출력합니다.
42 | 
43 |         Args:
44 |             url (str): 헬스 체크를 수행할 GMS 서버의 기본 URL (예: "http://localhost:8080")
45 | 
46 |         Returns:
47 |             bool: 서버가 정상적으로 응답하면 True, 예외 발생 시 False
48 |         """
49 | 
50 |         health_url = urljoin(url, "/health")
51 | 
52 |         try:
53 |             response = requests.get(
54 |                 health_url,
55 |                 timeout=3,
56 |             )
57 |             response.raise_for_status()
58 |             logger.info("GMS server is healthy: %s", url)
59 |             return True
60 |         except (
61 |             requests.exceptions.ConnectTimeout,
62 |             requests.exceptions.ReadTimeout,
63 |         ) as e:
64 |             logger.error(
65 |                 "Timeout while connecting to GMS server: %s | %s", health_url, e
66 |             )
67 |         except requests.exceptions.ConnectionError as e:
68 |             logger.error("Failed to connect to GMS server: %s | %s", health_url, e)
69 |         except requests.exceptions.HTTPError as e:
70 |             logger.error("GMS server returned HTTP error: %s | %s", health_url, e)
71 |         except requests.exceptions.RequestException as e:
72 |             logger.exception("Unexpected request error to GMS server: %s", health_url)
73 | 
74 |         return False
75 | 


--------------------------------------------------------------------------------
/infra/observability/README.md:
--------------------------------------------------------------------------------
 1 | # observability
 2 | 
 3 | LLM 응답 메시지에서 토큰 사용량을 집계하고 관찰(observability)하기 위한 유틸리티 모듈을 제공하는 디렉토리입니다.
 4 | 
 5 | ## 디렉토리 구조
 6 | 
 7 | ```
 8 | observability/
 9 | ├── __pycache__/
10 | └── token_usage.py
11 | ```
12 | 
13 | ## 파일 설명
14 | 
15 | ### token_usage.py
16 | 
17 | LLM 응답 메시지에서 토큰 사용량을 집계하기 위한 유틸리티 모듈입니다.
18 | 
19 | #### 주요 내용
20 | 
21 | - **TokenUtils 클래스**: LLM 토큰 사용량 집계 유틸리티 클래스
22 |   - `get_token_usage_summary()`: 메시지 데이터에서 input/output/total 토큰 사용량을 각각 집계하는 정적 메서드
23 |   - `usage_metadata` 필드를 기반으로 입력 토큰, 출력 토큰, 총 토큰 사용량을 계산
24 |   - Streamlit, LangChain 등 LLM 응답을 다루는 애플리케이션에서 비용 분석, 사용량 추적 등에 활용 가능
25 | 
26 | #### 반환 형식
27 | 
28 | ```python
29 | {
30 |     "input_tokens": int,
31 |     "output_tokens": int,
32 |     "total_tokens": int
33 | }
34 | ```
35 | 
36 | ## 사용 방법
37 | 
38 | ### Import
39 | 
40 | 이 모듈은 다음과 같이 import되어 사용됩니다:
41 | 
42 | ```python
43 | from infra.observability.token_usage import TokenUtils
44 | ```
45 | 
46 | ### 실제 사용 예시
47 | 
48 | #### 1. interface/core/result_renderer.py
49 | 
50 | `TokenUtils`는 Lang2SQL 결과 표시 모듈에서 토큰 사용량을 계산하고 Streamlit UI에 표시하는 데 사용됩니다.
51 | 
52 | ```75:85:interface/core/result_renderer.py
53 |     if should_show("show_token_usage"):
54 |         st.markdown("---")
55 |         token_summary = TokenUtils.get_token_usage_summary(data=res["messages"])
56 |         st.write("**토큰 사용량:**")
57 |         st.markdown(
58 |             f"""
59 |             - Input tokens: `{token_summary['input_tokens']}`
60 |             - Output tokens: `{token_summary['output_tokens']}`
61 |             - Total tokens: `{token_summary['total_tokens']}`
62 |             """
63 |         )
64 | ```
65 | 
66 | **사용 컨텍스트**:
67 | - `display_result()` 함수 내에서 LLM 실행 결과(`res`)의 `messages` 리스트를 전달받아 토큰 사용량을 집계
68 | - Streamlit UI에서 토큰 사용량 정보를 마크다운 형식으로 표시
69 | - 사용자가 설정에서 토큰 사용량 표시 옵션(`show_token_usage`)을 활성화한 경우에만 표시
70 | 
71 | **입력 데이터 형식**:
72 | - `data` 파라미터는 각 항목이 `usage_metadata` 속성을 포함할 수 있는 객체 리스트입니다
73 | - 예: LangChain의 `AIMessage` 객체 리스트
74 | 
75 | #### 사용 패턴
76 | 
77 | ```python
78 | # 기본 사용법
79 | token_summary = TokenUtils.get_token_usage_summary(data=messages)
80 | 
81 | # 반환된 딕셔너리 접근
82 | input_tokens = token_summary["input_tokens"]
83 | output_tokens = token_summary["output_tokens"]
84 | total_tokens = token_summary["total_tokens"]
85 | ```
86 | 
87 | ## 로깅
88 | 
89 | 이 모듈은 Python 표준 `logging` 모듈을 사용하여 토큰 사용량 정보를 기록합니다:
90 | 
91 | - **DEBUG 레벨**: 각 메시지별 토큰 사용량 상세 정보
92 | - **INFO 레벨**: 전체 토큰 사용량 요약 정보
93 | 
94 | ## 참고사항
95 | 
96 | - `usage_metadata` 필드가 없는 객체는 토큰 사용량이 0으로 처리됩니다
97 | - 각 메시지의 토큰 사용량은 누적되어 최종 합계를 반환합니다
98 | 
99 | 


--------------------------------------------------------------------------------
/utils/databases/connector/clickhouse_connector.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ClickHouse 데이터베이스 커넥터 모듈.
 3 | 
 4 | 이 모듈은 ClickHouse 서버에 연결하여 SQL 쿼리를 실행하고,
 5 | 그 결과를 pandas DataFrame 형태로 반환하는 기능을 제공합니다.
 6 | """
 7 | 
 8 | import pandas as pd
 9 | from clickhouse_driver import Client
10 | 
11 | from utils.databases.config import DBConfig
12 | from utils.databases.connector.base_connector import BaseConnector
13 | from utils.databases.logger import logger
14 | 
15 | 
16 | class ClickHouseConnector(BaseConnector):
17 |     """
18 |     ClickHouse 데이터베이스 커넥터 클래스.
19 | 
20 |     ClickHouse 서버에 연결하고 SQL 쿼리를 실행하거나 연결을 종료하는 기능을 제공합니다.
21 |     """
22 | 
23 |     client = None
24 | 
25 |     def __init__(self, config: DBConfig):
26 |         """
27 |         ClickHouseConnector 인스턴스를 초기화합니다.
28 | 
29 |         Args:
30 |             config (DBConfig): ClickHouse 연결 설정 정보를 담은 객체.
31 |         """
32 |         self.host = config["host"]
33 |         self.port = config["port"]
34 |         self.user = config["user"]
35 |         self.password = config["password"]
36 |         self.database = config["database"]
37 |         self.connect()
38 | 
39 |     def connect(self) -> None:
40 |         """
41 |         ClickHouse 서버에 연결을 설정합니다.
42 | 
43 |         Raises:
44 |             ConnectionError: 서버 연결에 실패한 경우 발생합니다.
45 |         """
46 |         try:
47 |             self.client = Client(
48 |                 host=self.host,
49 |                 port=self.port,
50 |                 user=self.user,
51 |                 password=self.password,
52 |                 database=self.database,
53 |             )
54 |             logger.info("Successfully connected to ClickHouse.")
55 |         except Exception as e:
56 |             logger.error("Failed to connect to ClickHouse: %s", e)
57 |             raise
58 | 
59 |     def run_sql(self, sql: str) -> pd.DataFrame:
60 |         """
61 |         SQL 쿼리를 실행하고 결과를 DataFrame으로 반환합니다.
62 | 
63 |         Args:
64 |             sql (str): 실행할 SQL 쿼리 문자열.
65 | 
66 |         Returns:
67 |             pd.DataFrame: 쿼리 결과를 담은 DataFrame 객체.
68 | 
69 |         Raises:
70 |             RuntimeError: SQL 실행 중 오류가 발생한 경우.
71 |         """
72 |         if self.client is None:
73 |             self.connect()
74 | 
75 |         try:
76 |             result = self.client.query_dataframe(sql)
77 |             return result
78 |         except Exception as e:
79 |             logger.error("Failed to execute SQL query: %s", e)
80 |             raise
81 | 
82 |     def close(self) -> None:
83 |         """
84 |         ClickHouse 서버와의 연결을 종료합니다.
85 | 
86 |         연결이 존재할 경우 안전하게 닫고 리소스를 해제합니다.
87 |         """
88 |         if self.client:
89 |             self.client.disconnect()
90 |             logger.info("Connection to ClickHouse closed.")
91 |         self.client = None
92 | 


--------------------------------------------------------------------------------
/utils/databases/connector/sqlite_connector.py:
--------------------------------------------------------------------------------
 1 | """
 2 | SQLite 데이터베이스 커넥터 모듈.
 3 | 
 4 | 이 모듈은 SQLite 데이터베이스에 연결하여 SQL 쿼리를 실행하고,
 5 | 그 결과를 pandas DataFrame 형태로 반환하는 기능을 제공합니다.
 6 | """
 7 | 
 8 | import sqlite3
 9 | 
10 | import pandas as pd
11 | 
12 | from utils.databases.config import DBConfig
13 | from utils.databases.connector.base_connector import BaseConnector
14 | from utils.databases.logger import logger
15 | 
16 | 
17 | class SQLiteConnector(BaseConnector):
18 |     """
19 |     SQLite 데이터베이스 커넥터 클래스.
20 | 
21 |     SQLite 파일 또는 인메모리 데이터베이스에 연결하여
22 |     SQL 쿼리를 실행하거나 연결을 종료하는 기능을 제공합니다.
23 |     """
24 | 
25 |     connection = None
26 | 
27 |     def __init__(self, config: DBConfig):
28 |         """
29 |         SQLiteConnector 인스턴스를 초기화합니다.
30 | 
31 |         Args:
32 |             config (DBConfig): SQLite 연결 정보를 담은 설정 객체.
33 |                 - `path` 키를 사용하여 SQLite 파일 경로를 지정합니다.
34 |                 - 값이 None 또는 ":memory:"인 경우 인메모리 데이터베이스를 생성합니다.
35 |         """
36 |         self.database = config.get("path", ":memory:")
37 |         self.connect()
38 | 
39 |     def connect(self) -> None:
40 |         """
41 |         SQLite 데이터베이스에 연결을 설정합니다.
42 | 
43 |         Raises:
44 |             ConnectionError: 데이터베이스 연결에 실패한 경우 발생합니다.
45 |         """
46 |         try:
47 |             self.connection = sqlite3.connect(self.database)
48 |             logger.info("Successfully connected to SQLite (%s).", self.database)
49 |         except Exception as e:
50 |             logger.error("Failed to connect to SQLite: %s", e)
51 |             raise
52 | 
53 |     def run_sql(self, sql: str) -> pd.DataFrame:
54 |         """
55 |         SQL 쿼리를 실행하고 결과를 pandas DataFrame으로 반환합니다.
56 | 
57 |         Args:
58 |             sql (str): 실행할 SQL 쿼리 문자열.
59 | 
60 |         Returns:
61 |             pd.DataFrame: 쿼리 결과를 담은 DataFrame 객체.
62 | 
63 |         Raises:
64 |             RuntimeError: SQL 실행 중 오류가 발생한 경우.
65 |         """
66 |         if self.connection is None:
67 |             self.connect()
68 | 
69 |         try:
70 |             cursor = self.connection.cursor()
71 |             cursor.execute(sql)
72 |             columns = [col[0] for col in cursor.description]
73 |             rows = cursor.fetchall()
74 |             return pd.DataFrame(rows, columns=columns)
75 |         except Exception as e:
76 |             logger.error("Failed to execute SQL query: %s", e)
77 |             raise
78 |         finally:
79 |             cursor.close()
80 | 
81 |     def close(self) -> None:
82 |         """
83 |         SQLite 데이터베이스 연결을 종료합니다.
84 | 
85 |         연결이 존재할 경우 안전하게 닫고 리소스를 해제합니다.
86 |         """
87 |         if self.connection:
88 |             self.connection.close()
89 |             logger.info("Connection to SQLite closed.")
90 |         self.connection = None
91 | 


--------------------------------------------------------------------------------
/infra/observability/token_usage.py:
--------------------------------------------------------------------------------
 1 | """
 2 | token_usage.py
 3 | 
 4 | LLM 응답 메시지에서 토큰 사용량을 집계하기 위한 유틸리티 모듈입니다.
 5 | 
 6 | 이 모듈은 LLM의 `usage_metadata` 필드를 기반으로 입력 토큰, 출력 토큰, 총 토큰 사용량을 계산하는 기능을 제공합니다.
 7 | Streamlit, LangChain 등 LLM 응답을 다루는 애플리케이션에서 비용 분석, 사용량 추적 등에 활용할 수 있습니다.
 8 | """
 9 | 
10 | import logging
11 | from typing import Any, List
12 | 
13 | logging.basicConfig(
14 |     level=logging.INFO,
15 |     format="%(asctime)s [%(levelname)s] %(message)s",
16 |     datefmt="%Y-%m-%d %H:%M:%S",
17 | )
18 | logger = logging.getLogger(__name__)
19 | 
20 | 
21 | class TokenUtils:
22 |     """
23 |     LLM 토큰 사용량 집계 유틸리티 클래스입니다.
24 | 
25 |     이 클래스는 LLM 응답 메시지 리스트에서 usage_metadata 필드를 추출하여
26 |     input_tokens, output_tokens, total_tokens의 합계를 계산합니다.
27 | 
28 |     예를 들어, LangChain 또는 OpenAI API 응답 메시지 객체의 토큰 사용 정보를 분석하고자 할 때
29 |     활용할 수 있습니다.
30 | 
31 |     사용 예:
32 |         >>> from infra.observability.token_usage import TokenUtils
33 |         >>> summary = TokenUtils.get_token_usage_summary(messages)
34 |         >>> print(summary["total_tokens"])
35 | 
36 |     반환 형식:
37 |         {
38 |             "input_tokens": int,
39 |             "output_tokens": int,
40 |             "total_tokens": int,
41 |         }
42 |     """
43 | 
44 |     @staticmethod
45 |     def get_token_usage_summary(*, data: List[Any]) -> dict:
46 |         """
47 |         메시지 데이터에서 input/output/total 토큰 사용량을 각각 집계합니다.
48 | 
49 |         Args:
50 |             data (List[Any]): 각 항목이 usage_metadata를 포함할 수 있는 객체 리스트.
51 | 
52 |         Returns:
53 |             dict: {
54 |                 "input_tokens": int,
55 |                 "output_tokens": int,
56 |                 "total_tokens": int
57 |             }
58 |         """
59 | 
60 |         input_tokens = 0
61 |         output_tokens = 0
62 |         total_tokens = 0
63 | 
64 |         for idx, item in enumerate(data):
65 |             token_usage = getattr(item, "usage_metadata", {})
66 |             in_tok = token_usage.get("input_tokens", 0)
67 |             out_tok = token_usage.get("output_tokens", 0)
68 |             total_tok = token_usage.get("total_tokens", 0)
69 | 
70 |             logger.debug(
71 |                 "Message[%d] → input=%d, output=%d, total=%d",
72 |                 idx,
73 |                 in_tok,
74 |                 out_tok,
75 |                 total_tok,
76 |             )
77 | 
78 |             input_tokens += in_tok
79 |             output_tokens += out_tok
80 |             total_tokens += total_tok
81 | 
82 |         logger.info(
83 |             "Token usage summary → input: %d, output: %d, total: %d",
84 |             input_tokens,
85 |             output_tokens,
86 |             total_tokens,
87 |         )
88 | 
89 |         return {
90 |             "input_tokens": input_tokens,
91 |             "output_tokens": output_tokens,
92 |             "total_tokens": total_tokens,
93 |         }
94 | 


--------------------------------------------------------------------------------
/utils/databases/connector/mysql_connector.py:
--------------------------------------------------------------------------------
 1 | """
 2 | MySQL 데이터베이스 커넥터 모듈.
 3 | 
 4 | 이 모듈은 MySQL 서버에 연결하여 SQL 쿼리를 실행하고,
 5 | 그 결과를 pandas DataFrame 형태로 반환하는 기능을 제공합니다.
 6 | """
 7 | 
 8 | import mysql.connector
 9 | import pandas as pd
10 | 
11 | from utils.databases.config import DBConfig
12 | from utils.databases.connector.base_connector import BaseConnector
13 | from utils.databases.logger import logger
14 | 
15 | 
16 | class MySQLConnector(BaseConnector):
17 |     """
18 |     MySQL 데이터베이스 커넥터 클래스.
19 | 
20 |     MySQL 서버에 연결하여 SQL 쿼리를 실행하거나 연결을 종료하는 기능을 제공합니다.
21 |     """
22 | 
23 |     connection = None
24 | 
25 |     def __init__(self, config: DBConfig):
26 |         """
27 |         MySQLConnector 인스턴스를 초기화합니다.
28 | 
29 |         Args:
30 |             config (DBConfig): MySQL 연결 정보를 담은 설정 객체.
31 |         """
32 |         self.host = config["host"]
33 |         self.port = config.get("port", 3306)
34 |         self.user = config["user"]
35 |         self.password = config["password"]
36 |         self.database = config["database"]
37 |         self.connect()
38 | 
39 |     def connect(self) -> None:
40 |         """
41 |         MySQL 서버에 연결을 설정합니다.
42 | 
43 |         Raises:
44 |             ConnectionError: MySQL 서버 연결에 실패한 경우 발생합니다.
45 |         """
46 |         try:
47 |             self.connection = mysql.connector.connect(
48 |                 host=self.host,
49 |                 port=self.port,
50 |                 user=self.user,
51 |                 password=self.password,
52 |                 database=self.database,
53 |             )
54 |             logger.info("Successfully connected to MySQL.")
55 |         except Exception as e:
56 |             logger.error("Failed to connect to MySQL: %s", e)
57 |             raise
58 | 
59 |     def run_sql(self, sql: str) -> pd.DataFrame:
60 |         """
61 |         SQL 쿼리를 실행하고 결과를 pandas DataFrame으로 반환합니다.
62 | 
63 |         Args:
64 |             sql (str): 실행할 SQL 쿼리 문자열.
65 | 
66 |         Returns:
67 |             pd.DataFrame: 쿼리 결과를 담은 DataFrame 객체.
68 | 
69 |         Raises:
70 |             RuntimeError: SQL 실행 중 오류가 발생한 경우.
71 |         """
72 |         try:
73 |             cursor = self.connection.cursor()
74 |             cursor.execute(sql)
75 |             columns = [column[0] for column in cursor.description]
76 |             rows = cursor.fetchall()
77 |             return pd.DataFrame(rows, columns=columns)
78 |         except Exception as e:
79 |             logger.error("Failed to execute SQL query: %s", e)
80 |             raise
81 |         finally:
82 |             cursor.close()
83 | 
84 |     def close(self) -> None:
85 |         """
86 |         MySQL 서버와의 연결을 종료합니다.
87 | 
88 |         연결이 존재할 경우 안전하게 닫고 리소스를 해제합니다.
89 |         """
90 |         if self.connection:
91 |             self.connection.close()
92 |             logger.info("Connection to MySQL closed.")
93 |         self.connection = None
94 | 


--------------------------------------------------------------------------------
/utils/databases/connector/postgres_connector.py:
--------------------------------------------------------------------------------
 1 | """
 2 | PostgreSQL 데이터베이스 커넥터 모듈.
 3 | 
 4 | 이 모듈은 PostgreSQL 서버에 연결하여 SQL 쿼리를 실행하고,
 5 | 결과를 pandas DataFrame 형태로 반환하는 기능을 제공합니다.
 6 | """
 7 | 
 8 | import pandas as pd
 9 | import psycopg2
10 | 
11 | from utils.databases.config import DBConfig
12 | from utils.databases.connector.base_connector import BaseConnector
13 | from utils.databases.logger import logger
14 | 
15 | 
16 | class PostgresConnector(BaseConnector):
17 |     """
18 |     PostgreSQL 데이터베이스 커넥터 클래스.
19 | 
20 |     PostgreSQL 서버에 연결하고 SQL 쿼리를 실행하거나 연결을 종료하는 기능을 제공합니다.
21 |     """
22 | 
23 |     connection = None
24 | 
25 |     def __init__(self, config: DBConfig):
26 |         """
27 |         PostgresConnector 인스턴스를 초기화합니다.
28 | 
29 |         Args:
30 |             config (DBConfig): PostgreSQL 연결 정보를 담은 설정 객체.
31 |         """
32 |         self.host = config["host"]
33 |         self.port = config["port"]
34 |         self.user = config["user"]
35 |         self.password = config["password"]
36 |         self.database = config["database"]
37 |         self.connect()
38 | 
39 |     def connect(self) -> None:
40 |         """
41 |         PostgreSQL 서버에 연결합니다.
42 | 
43 |         Raises:
44 |             ConnectionError: 서버 연결에 실패한 경우 발생합니다.
45 |         """
46 |         try:
47 |             self.connection = psycopg2.connect(
48 |                 host=self.host,
49 |                 port=self.port,
50 |                 user=self.user,
51 |                 password=self.password,
52 |                 dbname=self.database,
53 |             )
54 |             logger.info("Successfully connected to PostgreSQL.")
55 |         except Exception as e:
56 |             logger.error("Failed to connect to PostgreSQL: %s", e)
57 |             raise
58 | 
59 |     def run_sql(self, sql: str) -> pd.DataFrame:
60 |         """
61 |         SQL 쿼리를 실행하고 결과를 DataFrame으로 반환합니다.
62 | 
63 |         Args:
64 |             sql (str): 실행할 SQL 쿼리 문자열.
65 | 
66 |         Returns:
67 |             pd.DataFrame: 쿼리 결과를 담은 DataFrame 객체.
68 | 
69 |         Raises:
70 |             RuntimeError: SQL 실행 중 오류가 발생한 경우.
71 |         """
72 |         try:
73 |             cursor = self.connection.cursor()
74 |             cursor.execute(sql)
75 |             columns = [desc[0] for desc in cursor.description]
76 |             rows = cursor.fetchall()
77 |             return pd.DataFrame(rows, columns=columns)
78 |         except Exception as e:
79 |             logger.error("Failed to execute SQL query: %s", e)
80 |             raise
81 |         finally:
82 |             cursor.close()
83 | 
84 |     def close(self) -> None:
85 |         """
86 |         PostgreSQL 서버와의 연결을 종료합니다.
87 | 
88 |         연결이 존재할 경우 안전하게 닫고 리소스를 해제합니다.
89 |         """
90 |         if self.connection:
91 |             self.connection.close()
92 |             logger.info("Connection to PostgreSQL closed.")
93 |         self.connection = None
94 | 


--------------------------------------------------------------------------------
/utils/llm/vectordb/pgvector_db.py:
--------------------------------------------------------------------------------
 1 | """
 2 | pgvector VectorDB 구현
 3 | """
 4 | 
 5 | import os
 6 | from typing import Optional
 7 | 
 8 | import psycopg2
 9 | from langchain_postgres.vectorstores import PGVector
10 | 
11 | from utils.llm.core import get_embeddings
12 | from utils.llm.tools import get_info_from_db
13 | 
14 | 
15 | def _check_collection_exists(connection_string: str, collection_name: str) -> bool:
16 |     """PostgreSQL에서 collection이 존재하는지 확인합니다."""
17 |     try:
18 |         # 연결 문자열에서 연결 정보 추출
19 |         conn = psycopg2.connect(connection_string)
20 |         cursor = conn.cursor()
21 | 
22 |         # langchain_pg_embedding 테이블에서 collection_name이 존재하는지 확인
23 |         cursor.execute(
24 |             "SELECT COUNT(*) FROM langchain_pg_embedding WHERE collection_name = %s",
25 |             (collection_name,),
26 |         )
27 |         result = cursor.fetchone()
28 |         count = result[0] if result else 0
29 | 
30 |         cursor.close()
31 |         conn.close()
32 | 
33 |         return count > 0
34 |     except Exception as e:
35 |         print(f"Collection 존재 여부 확인 중 오류: {e}")
36 |         return False
37 | 
38 | 
39 | def get_pgvector_db(
40 |     connection_string: Optional[str] = None, collection_name: Optional[str] = None
41 | ):
42 |     """pgvector 벡터 데이터베이스를 로드하거나 생성합니다."""
43 |     embeddings = get_embeddings()
44 | 
45 |     if connection_string is None:
46 |         # 환경 변수에서 연결 정보 읽기 (기존 방식)
47 |         host = os.getenv("PGVECTOR_HOST", "localhost")
48 |         port = os.getenv("PGVECTOR_PORT", "5432")
49 |         user = os.getenv("PGVECTOR_USER", "postgres")
50 |         password = os.getenv("PGVECTOR_PASSWORD", "postgres")
51 |         database = os.getenv("PGVECTOR_DATABASE", "postgres")
52 |         connection_string = f"postgresql://{user}:{password}@{host}:{port}/{database}"
53 | 
54 |     if collection_name is None:
55 |         collection_name = os.getenv("PGVECTOR_COLLECTION", "lang2sql_table_info_db")
56 |     try:
57 |         vector_store = PGVector(
58 |             embeddings=embeddings,
59 |             collection_name=collection_name,
60 |             connection=connection_string,
61 |         )
62 | 
63 |         results = vector_store.similarity_search("test", k=1)
64 |         if not results:
65 |             raise RuntimeError(f"Collection '{collection_name}' is empty")
66 | 
67 |         # 컬렉션이 존재하면 실제 검색도 진행해 볼 수 있습니다.
68 |         vector_store.similarity_search("test", k=1)
69 |         return vector_store
70 | 
71 |     except Exception as e:
72 |         print(f"exception: {e}")
73 |         # 컬렉션이 없거나 불러오기에 실패한 경우, 문서를 다시 인덱싱
74 |         documents = get_info_from_db()
75 |         vector_store = PGVector.from_documents(
76 |             documents=documents,
77 |             embedding=embeddings,
78 |             connection=connection_string,
79 |             collection_name=collection_name,
80 |         )
81 |         return vector_store
82 | 


--------------------------------------------------------------------------------
/utils/data/datahub_services/base_client.py:
--------------------------------------------------------------------------------
 1 | """
 2 | DataHub 기본 클라이언트 모듈
 3 | 
 4 | DataHub GMS 서버와의 기본 연결 및 통신 기능을 제공합니다.
 5 | """
 6 | 
 7 | import requests
 8 | from datahub.emitter.rest_emitter import DatahubRestEmitter
 9 | 
10 | 
11 | class DataHubBaseClient:
12 |     """DataHub 기본 클라이언트 클래스"""
13 | 
14 |     def __init__(self, gms_server="http://localhost:8080", extra_headers={}):
15 |         """
16 |         DataHub 클라이언트 초기화
17 | 
18 |         Args:
19 |             gms_server (str): DataHub GMS 서버 URL
20 |             extra_headers (dict): 추가 HTTP 헤더
21 |         """
22 |         # gms_server 주소 유효성 검사
23 |         if not self._is_valid_gms_server(gms_server):
24 |             raise ValueError(f"유효하지 않은 GMS 서버 주소: {gms_server}")
25 | 
26 |         self.gms_server = gms_server
27 |         self.extra_headers = extra_headers
28 | 
29 |         # DataHub 클라이언트 초기화
30 |         self.emitter = DatahubRestEmitter(
31 |             gms_server=gms_server, extra_headers=extra_headers
32 |         )
33 |         self.datahub_graph = self.emitter.to_graph()
34 | 
35 |     def _is_valid_gms_server(self, gms_server):
36 |         """
37 |         GMS 서버 주소의 유효성을 검사하는 함수
38 | 
39 |         Args:
40 |             gms_server (str): 검사할 GMS 서버 URL
41 | 
42 |         Returns:
43 |             bool: 서버가 유효한 경우 True
44 |         """
45 |         query = {"query": "{ health { status } }"}
46 |         headers = {"Content-Type": "application/json"}
47 | 
48 |         try:
49 |             response = requests.post(
50 |                 f"{gms_server}/api/graphql", json=query, headers=headers
51 |             )
52 |             return response.status_code == 200
53 |         except requests.exceptions.RequestException:
54 |             return False
55 | 
56 |     def execute_graphql_query(self, query, variables=None):
57 |         """
58 |         GraphQL 쿼리 실행
59 | 
60 |         Args:
61 |             query (str): GraphQL 쿼리 문자열
62 |             variables (dict, optional): 쿼리 변수
63 | 
64 |         Returns:
65 |             dict: GraphQL 응답
66 |         """
67 |         headers = {"Content-Type": "application/json"}
68 |         payload = {"query": query}
69 | 
70 |         if variables:
71 |             payload["variables"] = variables
72 | 
73 |         response = requests.post(
74 |             f"{self.gms_server}/api/graphql",
75 |             json=payload,
76 |             headers=headers,
77 |         )
78 | 
79 |         if response.status_code == 200:
80 |             return response.json()
81 |         else:
82 |             return {
83 |                 "error": True,
84 |                 "status_code": response.status_code,
85 |                 "message": response.text,
86 |             }
87 | 
88 |     def get_datahub_graph(self):
89 |         """DataHub Graph 클라이언트 반환"""
90 |         return self.datahub_graph
91 | 
92 |     def get_urns(self):
93 |         """필터를 적용하여 데이터셋의 URN 가져오기"""
94 |         return self.datahub_graph.get_urns_by_filter()
95 | 


--------------------------------------------------------------------------------
/interface/app_pages/sidebar_components/llm_selector.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import streamlit as st
 3 | 
 4 | from interface.core.config import (
 5 |     update_llm_settings,
 6 |     get_llm_registry,
 7 | )
 8 | 
 9 | 
10 | def render_sidebar_llm_selector() -> None:
11 |     st.sidebar.markdown("### LLM 선택")
12 | 
13 |     reg = get_llm_registry()
14 |     if not reg.profiles:
15 |         st.sidebar.info(
16 |             "저장된 LLM 프로파일이 없습니다. 설정 > LLM에서 프로파일을 저장하세요."
17 |         )
18 |         # 기존 방식 fallback
19 |         default_llm = (
20 |             (
21 |                 st.session_state.get("LLM_PROVIDER")
22 |                 or os.getenv("LLM_PROVIDER")
23 |                 or "openai"
24 |             )
25 |         ).lower()
26 |         selected_provider = st.sidebar.selectbox(
27 |             "LLM 공급자",
28 |             options=["openai", "azure", "bedrock", "gemini", "ollama", "huggingface"],
29 |             index=(
30 |                 ["openai", "azure", "bedrock", "gemini", "ollama", "huggingface"].index(
31 |                     default_llm
32 |                 )
33 |                 if default_llm
34 |                 in {"openai", "azure", "bedrock", "gemini", "ollama", "huggingface"}
35 |                 else 0
36 |             ),
37 |             key="sidebar_llm_provider_fallback",
38 |         )
39 |         if selected_provider != default_llm:
40 |             try:
41 |                 update_llm_settings(provider=selected_provider, values={})
42 |                 st.sidebar.success(
43 |                     f"LLM 공급자가 '{selected_provider}'로 변경되었습니다."
44 |                 )
45 |             except Exception as e:
46 |                 st.sidebar.error(f"LLM 공급자 변경 실패: {e}")
47 |         return
48 | 
49 |     names = [p.name for p in reg.profiles]
50 |     # 기본 선택: 세션의 LLM_PROVIDER와 같은 provider를 가진 첫 프로파일
51 |     current_provider = (
52 |         st.session_state.get("LLM_PROVIDER") or os.getenv("LLM_PROVIDER") or ""
53 |     ).lower()
54 |     default_index = 0
55 |     if current_provider:
56 |         for idx, p in enumerate(reg.profiles):
57 |             if p.provider == current_provider:
58 |                 default_index = idx
59 |                 break
60 | 
61 |     sel_name = st.sidebar.selectbox(
62 |         "LLM 프로파일", options=names, index=default_index, key="sidebar_llm_profile"
63 |     )
64 |     selected = next((p for p in reg.profiles if p.name == sel_name), None)
65 |     if selected is None:
66 |         st.sidebar.error("선택한 LLM 프로파일을 찾을 수 없습니다.")
67 |         return
68 | 
69 |     if st.sidebar.button("적용", key="sidebar_apply_llm_profile"):
70 |         try:
71 |             # provider 설정 + 프로파일의 비민감 필드만 적용
72 |             update_llm_settings(provider=selected.provider, values=selected.fields)
73 |             st.sidebar.success(f"LLM 프로파일 적용됨: {selected.name}")
74 |         except Exception as e:
75 |             st.sidebar.error(f"LLM 프로파일 적용 실패: {e}")
76 | 
77 |     # Embeddings 관련 UI는 embedding_selector.py에서 처리
78 | 


--------------------------------------------------------------------------------
/utils/databases/connector/oracle_connector.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Oracle 데이터베이스 커넥터 모듈.
 3 | 
 4 | 이 모듈은 Oracle 데이터베이스에 연결하여 SQL 쿼리를 실행하고,
 5 | 결과를 pandas DataFrame 형태로 반환하는 기능을 제공합니다.
 6 | """
 7 | 
 8 | import oracledb
 9 | import pandas as pd
10 | 
11 | from utils.databases.config import DBConfig
12 | from utils.databases.connector.base_connector import BaseConnector
13 | from utils.databases.logger import logger
14 | 
15 | 
16 | class OracleConnector(BaseConnector):
17 |     """
18 |     Oracle 데이터베이스 커넥터 클래스.
19 | 
20 |     Oracle 서버에 연결하여 SQL 쿼리를 실행하거나 연결을 종료하는 기능을 제공합니다.
21 |     """
22 | 
23 |     connection = None
24 | 
25 |     def __init__(self, config: DBConfig):
26 |         """
27 |         OracleConnector 인스턴스를 초기화합니다.
28 | 
29 |         Args:
30 |             config (DBConfig): Oracle 연결 정보를 담은 설정 객체.
31 |                 - 필수 키: host, port, user, password
32 |                 - 선택 키: extra.service_name (기본값: "orcl")
33 |         """
34 |         self.host = config["host"]
35 |         self.port = config["port"]
36 |         self.user = config["user"]
37 |         self.password = config["password"]
38 |         self.service_name = config.get("extra").get("service_name", "orcl")
39 |         self.connect()
40 | 
41 |     def connect(self) -> None:
42 |         """
43 |         Oracle 데이터베이스에 연결을 설정합니다.
44 | 
45 |         Raises:
46 |             ConnectionError: Oracle 서버 연결에 실패한 경우 발생합니다.
47 |         """
48 |         try:
49 |             self.connection = oracledb.connect(
50 |                 user=self.user,
51 |                 password=self.password,
52 |                 dsn=f"{self.host}:{self.port}/{self.service_name}",
53 |             )
54 |             logger.info("Successfully connected to Oracle.")
55 |         except Exception as e:
56 |             logger.error("Failed to connect to Oracle: %s", e)
57 |             raise
58 | 
59 |     def run_sql(self, sql: str) -> pd.DataFrame:
60 |         """
61 |         SQL 쿼리를 실행하고 결과를 pandas DataFrame으로 반환합니다.
62 | 
63 |         Args:
64 |             sql (str): 실행할 SQL 쿼리 문자열.
65 | 
66 |         Returns:
67 |             pd.DataFrame: 쿼리 결과를 담은 DataFrame 객체.
68 | 
69 |         Raises:
70 |             RuntimeError: SQL 실행 중 오류가 발생한 경우.
71 |         """
72 |         try:
73 |             cursor = self.connection.cursor()
74 |             cursor.execute(sql)
75 |             columns = [desc[0] for desc in cursor.description]
76 |             rows = cursor.fetchall()
77 |             return pd.DataFrame(rows, columns=columns)
78 |         except Exception as e:
79 |             logger.error("Failed to execute SQL query: %s", e)
80 |             raise
81 |         finally:
82 |             cursor.close()
83 | 
84 |     def close(self) -> None:
85 |         """
86 |         Oracle 데이터베이스 연결을 종료합니다.
87 | 
88 |         연결이 존재할 경우 안전하게 닫고 리소스를 해제합니다.
89 |         """
90 |         if self.connection:
91 |             self.connection.close()
92 |             logger.info("Connection to Oracle closed.")
93 |         self.connection = None
94 | 


--------------------------------------------------------------------------------
/utils/databases/connector/mariadb_connector.py:
--------------------------------------------------------------------------------
 1 | """
 2 | MariaDB 데이터베이스 커넥터 모듈.
 3 | 
 4 | 이 모듈은 mysql-connector-python을 사용하여 MariaDB 서버에 연결하고,
 5 | SQL 쿼리를 실행하여 pandas DataFrame 형태로 결과를 반환하는 기능을 제공합니다.
 6 | """
 7 | 
 8 | import mysql.connector
 9 | import pandas as pd
10 | 
11 | from utils.databases.config import DBConfig
12 | from utils.databases.connector.base_connector import BaseConnector
13 | from utils.databases.logger import logger
14 | 
15 | 
16 | class MariaDBConnector(BaseConnector):
17 |     """
18 |     MariaDB 데이터베이스 커넥터 클래스.
19 | 
20 |     mysql-connector-python을 이용해 MariaDB 서버에 연결하고,
21 |     SQL 쿼리를 실행하거나 연결을 종료하는 기능을 제공합니다.
22 |     """
23 | 
24 |     connection = None
25 | 
26 |     def __init__(self, config: DBConfig):
27 |         """
28 |         MariaDBConnector 인스턴스를 초기화합니다.
29 | 
30 |         Args:
31 |             config (DBConfig): MariaDB 연결 정보를 담은 설정 객체.
32 |         """
33 |         self.host = config["host"]
34 |         self.port = config.get("port", 3306)
35 |         self.user = config["user"]
36 |         self.password = config["password"]
37 |         self.database = config["database"]
38 |         self.connect()
39 | 
40 |     def connect(self) -> None:
41 |         """
42 |         mysql-connector-python을 사용하여 MariaDB 서버에 연결을 설정합니다.
43 | 
44 |         Raises:
45 |             ConnectionError: MariaDB 서버 연결에 실패한 경우 발생합니다.
46 |         """
47 |         try:
48 |             self.connection = mysql.connector.connect(
49 |                 host=self.host,
50 |                 port=self.port,
51 |                 user=self.user,
52 |                 password=self.password,
53 |                 database=self.database,
54 |             )
55 |             logger.info("Successfully connected to MariaDB.")
56 |         except Exception as e:
57 |             logger.error("Failed to connect to MariaDB: %s", e)
58 |             raise
59 | 
60 |     def run_sql(self, sql: str) -> pd.DataFrame:
61 |         """
62 |         SQL 쿼리를 실행하고 결과를 pandas DataFrame으로 반환합니다.
63 | 
64 |         Args:
65 |             sql (str): 실행할 SQL 쿼리 문자열.
66 | 
67 |         Returns:
68 |             pd.DataFrame: 쿼리 결과를 담은 DataFrame 객체.
69 | 
70 |         Raises:
71 |             RuntimeError: SQL 실행 중 오류가 발생한 경우.
72 |         """
73 |         try:
74 |             cursor = self.connection.cursor()
75 |             cursor.execute(sql)
76 |             columns = [column[0] for column in cursor.description]
77 |             rows = cursor.fetchall()
78 |             return pd.DataFrame(rows, columns=columns)
79 |         except Exception as e:
80 |             logger.error("Failed to execute SQL query: %s", e)
81 |             raise
82 |         finally:
83 |             cursor.close()
84 | 
85 |     def close(self) -> None:
86 |         """
87 |         MariaDB 서버와의 연결을 종료합니다.
88 | 
89 |         연결이 존재할 경우 안전하게 닫고 리소스를 해제합니다.
90 |         """
91 |         if self.connection:
92 |             self.connection.close()
93 |             logger.info("Connection to MariaDB closed.")
94 |         self.connection = None
95 | 


--------------------------------------------------------------------------------
/interface/app_pages/sidebar_components/embedding_selector.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import streamlit as st
 3 | 
 4 | from interface.core.config import (
 5 |     update_embedding_settings,
 6 |     get_embedding_registry,
 7 | )
 8 | 
 9 | 
10 | def render_sidebar_embedding_selector() -> None:
11 |     st.sidebar.markdown("### Embeddings 선택")
12 | 
13 |     e_reg = get_embedding_registry()
14 |     if not e_reg.profiles:
15 |         st.sidebar.info(
16 |             "저장된 Embeddings 프로파일이 없습니다. 설정 > LLM에서 저장하세요."
17 |         )
18 |         # fallback: 간단 공급자 선택 유지
19 |         default_emb = (
20 |             (
21 |                 st.session_state.get("EMBEDDING_PROVIDER")
22 |                 or os.getenv("EMBEDDING_PROVIDER")
23 |                 or "openai"
24 |             )
25 |         ).lower()
26 |         selected = st.sidebar.selectbox(
27 |             "Embeddings 공급자",
28 |             options=["openai", "azure", "bedrock", "gemini", "ollama", "huggingface"],
29 |             index=(
30 |                 ["openai", "azure", "bedrock", "gemini", "ollama", "huggingface"].index(
31 |                     default_emb
32 |                 )
33 |                 if default_emb
34 |                 in {"openai", "azure", "bedrock", "gemini", "ollama", "huggingface"}
35 |                 else 0
36 |             ),
37 |             key="sidebar_embedding_provider_fallback",
38 |         )
39 |         if selected != default_emb:
40 |             try:
41 |                 update_embedding_settings(provider=selected, values={})
42 |                 st.sidebar.success(
43 |                     f"Embeddings 공급자가 '{selected}'로 변경되었습니다."
44 |                 )
45 |             except Exception as e:
46 |                 st.sidebar.error(f"Embeddings 공급자 변경 실패: {e}")
47 |         return
48 | 
49 |     e_names = [p.name for p in e_reg.profiles]
50 |     current_emb_provider = (
51 |         st.session_state.get("EMBEDDING_PROVIDER")
52 |         or os.getenv("EMBEDDING_PROVIDER")
53 |         or ""
54 |     ).lower()
55 |     e_default_index = 0
56 |     if current_emb_provider:
57 |         for idx, p in enumerate(e_reg.profiles):
58 |             if p.provider == current_emb_provider:
59 |                 e_default_index = idx
60 |                 break
61 | 
62 |     e_sel_name = st.sidebar.selectbox(
63 |         "Embeddings 프로파일",
64 |         options=e_names,
65 |         index=e_default_index,
66 |         key="sidebar_embedding_profile",
67 |     )
68 | 
69 |     e_selected = next((p for p in e_reg.profiles if p.name == e_sel_name), None)
70 |     if e_selected is None:
71 |         st.sidebar.error("선택한 Embeddings 프로파일을 찾을 수 없습니다.")
72 |         return
73 | 
74 |     if st.sidebar.button("Embeddings 적용", key="sidebar_apply_embedding_profile"):
75 |         try:
76 |             update_embedding_settings(
77 |                 provider=e_selected.provider, values=e_selected.fields
78 |             )
79 |             st.sidebar.success(f"Embeddings 프로파일 적용됨: {e_selected.name}")
80 |         except Exception as e:
81 |             st.sidebar.error(f"Embeddings 프로파일 적용 실패: {e}")
82 | 


--------------------------------------------------------------------------------
/test/test_llm_utils/test_llm_response_parser.py:
--------------------------------------------------------------------------------
  1 | """
  2 | LLMResponseParser 클래스의 기능을 테스트하는 단위 테스트 모듈입니다.
  3 | 
  4 | 주요 테스트 항목:
  5 | - <SQL> 블록에서 SQL 쿼리 추출 성공/실패
  6 | - <해석> 블록에서 자연어 설명 추출 성공/실패
  7 | - 다양한 입력 형식(들여쓰기, 공백 등)에 대한 정규식 대응 여부 확인
  8 | """
  9 | 
 10 | import unittest
 11 | 
 12 | from utils.llm.llm_response_parser import LLMResponseParser
 13 | 
 14 | 
 15 | class TestLLMResponseParser(unittest.TestCase):
 16 |     """
 17 |     LLMResponseParser 클래스의 정적 메서드 동작을 검증하는 테스트 케이스입니다.
 18 | 
 19 |     각 테스트는 SQL 및 해석 블록 추출 기능이 정상적으로 작동하는지,
 20 |     예외 상황에 올바르게 대응하는지를 검증합니다.
 21 |     """
 22 | 
 23 |     def test_extract_sql_success(self):
 24 |         """
 25 |         <SQL> 블록과 ```sql``` 코드 블록이 정상적으로 포함된 문자열에서
 26 |         SQL 쿼리가 정확히 추출되는지 확인합니다.
 27 |         """
 28 | 
 29 |         text = """
 30 |         <SQL>
 31 |         ```sql
 32 |         SELECT * FROM users;
 33 |         ````
 34 | 
 35 |         <해석>
 36 | 
 37 |         ```plaintext
 38 |         사용자 테이블의 모든 데이터를 조회합니다.
 39 |         ```
 40 | 
 41 |         """
 42 |         expected_sql = "SELECT * FROM users;"
 43 |         result = LLMResponseParser.extract_sql(text)
 44 |         self.assertEqual(result, expected_sql)
 45 | 
 46 |     def test_extract_sql_missing(self):
 47 |         """
 48 |         <SQL> 블록은 존재하지만 코드 블록이 없을 경우,
 49 |         ValueError 예외가 발생하는지 확인합니다.
 50 |         """
 51 | 
 52 |         text = "<SQL> no code block here"
 53 |         with self.assertRaises(ValueError):
 54 |             LLMResponseParser.extract_sql(text)
 55 | 
 56 |     def test_extract_interpretation_success(self):
 57 |         """
 58 |         <해석> 블록과 ```plaintext``` 코드 블록이 포함된 문자열에서
 59 |         해석 텍스트가 정상적으로 추출되는지 확인합니다.
 60 |         """
 61 | 
 62 |         text = """
 63 |         ```
 64 | 
 65 |         <SQL>
 66 |         ```sql
 67 |         SELECT * FROM users;
 68 |         ```
 69 |         <해석>
 70 |         ```plaintext
 71 |         사용자 테이블의 모든 데이터를 조회합니다.
 72 |         ```
 73 |         """
 74 |         expected = "사용자 테이블의 모든 데이터를 조회합니다."
 75 |         result = LLMResponseParser.extract_interpretation(text)
 76 |         self.assertEqual(result, expected)
 77 | 
 78 |     def test_extract_interpretation_empty(self):
 79 |         """
 80 |         <해석> 태그는 존재하지만 코드 블록이 없는 경우,
 81 |         빈 문자열을 반환하는지 확인합니다.
 82 |         """
 83 | 
 84 |         text = "<해석> 블록이 없습니다."
 85 |         result = LLMResponseParser.extract_interpretation(text)
 86 |         self.assertEqual(result, "")
 87 | 
 88 |     def test_extract_sql_with_leading_whitespace(self):
 89 |         """
 90 |         <SQL> 블록이 들여쓰기되어 있는 경우에도 SQL 쿼리를 정확히 추출하는지 확인합니다.
 91 |         """
 92 | 
 93 |         text = """
 94 |         ```
 95 | 
 96 |         <SQL>
 97 |             ```sql
 98 |             SELECT id FROM orders;
 99 |             ```
100 |         <해석>
101 |         ```plaintext
102 |         주문 테이블에서 ID 조회
103 |         ```
104 |         """
105 |         expected = "SELECT id FROM orders;"
106 |         result = LLMResponseParser.extract_sql(text)
107 |         self.assertEqual(result, expected.strip())
108 | 


--------------------------------------------------------------------------------
/utils/databases/connector/databricks_connector.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Databricks SQL Warehouse 커넥터 모듈.
  3 | 
  4 | 이 모듈은 Databricks SQL Warehouse에 연결하여 SQL 쿼리를 실행하고,
  5 | 결과를 pandas DataFrame 형태로 반환하는 기능을 제공합니다.
  6 | """
  7 | 
  8 | import pandas as pd
  9 | from databricks import sql
 10 | 
 11 | from utils.databases.config import DBConfig
 12 | from utils.databases.connector.base_connector import BaseConnector
 13 | from utils.databases.logger import logger
 14 | 
 15 | 
 16 | class DatabricksConnector(BaseConnector):
 17 |     """
 18 |     Databricks SQL Warehouse 커넥터 클래스.
 19 | 
 20 |     Databricks SQL 엔드포인트에 연결하여 쿼리를 실행하고,
 21 |     결과를 DataFrame으로 반환하는 기능을 제공합니다.
 22 |     """
 23 | 
 24 |     connection = None
 25 | 
 26 |     def __init__(self, config: DBConfig):
 27 |         """
 28 |         DatabricksConnector 인스턴스를 초기화합니다.
 29 | 
 30 |         Args:
 31 |             config (DBConfig): Databricks 연결 정보를 담은 설정 객체.
 32 |                 - 필수 키: host, extra.http_path, extra.access_token
 33 |                 - 선택 키: extra.catalog, extra.schema
 34 |         """
 35 |         self.server_hostname = config["host"]
 36 |         self.http_path = config["extra"]["http_path"]
 37 |         self.access_token = config["extra"]["access_token"]
 38 |         self.catalog = config.get("extra", {}).get("catalog")
 39 |         self.schema = config.get("extra", {}).get("schema")
 40 |         self.connect()
 41 | 
 42 |     def connect(self) -> None:
 43 |         """
 44 |         Databricks SQL Warehouse에 연결을 설정합니다.
 45 | 
 46 |         Raises:
 47 |             ConnectionError: 연결 설정 중 오류가 발생한 경우.
 48 |         """
 49 |         try:
 50 |             self.connection = sql.connect(
 51 |                 server_hostname=self.server_hostname,
 52 |                 http_path=self.http_path,
 53 |                 access_token=self.access_token,
 54 |                 catalog=self.catalog,
 55 |                 schema=self.schema,
 56 |             )
 57 |             logger.info("Successfully connected to Databricks.")
 58 |         except Exception as e:
 59 |             logger.error("Failed to connect to Databricks: %s", e)
 60 |             raise
 61 | 
 62 |     def run_sql(self, sql: str) -> pd.DataFrame:
 63 |         """
 64 |         SQL 쿼리를 실행하고 결과를 pandas DataFrame으로 반환합니다.
 65 | 
 66 |         Args:
 67 |             sql (str): 실행할 SQL 쿼리 문자열.
 68 | 
 69 |         Returns:
 70 |             pd.DataFrame: 쿼리 결과를 담은 DataFrame 객체.
 71 | 
 72 |         Raises:
 73 |             RuntimeError: SQL 실행 중 오류가 발생한 경우.
 74 |         """
 75 |         if self.connection is None:
 76 |             self.connect()
 77 | 
 78 |         try:
 79 |             cursor = self.connection.cursor()
 80 |             cursor.execute(sql)
 81 |             columns = [desc[0] for desc in cursor.description]
 82 |             rows = cursor.fetchall()
 83 |             return pd.DataFrame(rows, columns=columns)
 84 |         except Exception as e:
 85 |             logger.error("Failed to execute SQL query: %s", e)
 86 |             raise
 87 |         finally:
 88 |             cursor.close()
 89 | 
 90 |     def close(self) -> None:
 91 |         """
 92 |         Databricks SQL Warehouse와의 연결을 종료합니다.
 93 | 
 94 |         연결이 존재할 경우 안전하게 닫고 리소스를 해제합니다.
 95 |         """
 96 |         if self.connection:
 97 |             self.connection.close()
 98 |             logger.info("Connection to Databricks closed.")
 99 |         self.connection = None
100 | 


--------------------------------------------------------------------------------
/interface/app_pages/sidebar_components/data_source_selector.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | 
 3 | from interface.core.config import (
 4 |     load_config,
 5 |     get_data_sources_registry,
 6 |     update_datahub_server,
 7 |     update_vectordb_settings,
 8 |     update_data_source_mode,
 9 | )
10 | 
11 | 
12 | def render_sidebar_data_source_selector(config=None) -> None:
13 |     if config is None:
14 |         config = load_config()
15 | 
16 |     registry = get_data_sources_registry()
17 | 
18 |     st.sidebar.markdown("### 데이터 소스")
19 | 
20 |     mode_index = 0 if (config.data_source_mode or "datahub").lower() == "datahub" else 1
21 |     selected_mode = st.sidebar.radio(
22 |         "소스 종류", options=["DataHub", "VectorDB"], index=mode_index, horizontal=True
23 |     )
24 | 
25 |     if selected_mode == "DataHub":
26 |         datahub_names = [s.name for s in registry.datahub]
27 |         if not datahub_names:
28 |             st.sidebar.warning(
29 |                 "등록된 DataHub가 없습니다. 설정 > 데이터 소스에서 추가하세요."
30 |             )
31 |             return
32 |         dh_name = st.sidebar.selectbox(
33 |             "DataHub 인스턴스", options=datahub_names, key="sidebar_dh_select"
34 |         )
35 |         if st.sidebar.button("소스 적용", key="sidebar_apply_dh"):
36 |             selected = next((s for s in registry.datahub if s.name == dh_name), None)
37 |             if selected is None:
38 |                 st.sidebar.error("선택한 DataHub를 찾을 수 없습니다.")
39 |                 return
40 |             try:
41 |                 update_datahub_server(config, selected.url)
42 |                 # DataHub 선택 시, FAISS 경로가 정의되어 있으면 기본 VectorDB 로케이션으로도 반영
43 |                 if selected.faiss_path:
44 |                     try:
45 |                         update_vectordb_settings(
46 |                             config,
47 |                             vectordb_type="faiss",
48 |                             vectordb_location=selected.faiss_path,
49 |                         )
50 |                     except Exception as e:
51 |                         st.sidebar.warning(f"FAISS 경로 적용 경고: {e}")
52 |                 update_data_source_mode(config, "datahub")
53 |                 st.sidebar.success(f"DataHub 적용됨: {selected.name}")
54 |             except Exception as e:
55 |                 st.sidebar.error(f"적용 실패: {e}")
56 |     else:
57 |         vdb_names = [s.name for s in registry.vectordb]
58 |         if not vdb_names:
59 |             st.sidebar.warning(
60 |                 "등록된 VectorDB가 없습니다. 설정 > 데이터 소스에서 추가하세요."
61 |             )
62 |             return
63 |         vdb_name = st.sidebar.selectbox(
64 |             "VectorDB 인스턴스", options=vdb_names, key="sidebar_vdb_select"
65 |         )
66 |         if st.sidebar.button("소스 적용", key="sidebar_apply_vdb"):
67 |             selected = next((s for s in registry.vectordb if s.name == vdb_name), None)
68 |             if selected is None:
69 |                 st.sidebar.error("선택한 VectorDB를 찾을 수 없습니다.")
70 |                 return
71 |             try:
72 |                 update_vectordb_settings(
73 |                     config,
74 |                     vectordb_type=selected.type,
75 |                     vectordb_location=selected.location,
76 |                 )
77 |                 update_data_source_mode(config, "vectordb")
78 |                 st.sidebar.success(f"VectorDB 적용됨: {selected.name}")
79 |             except Exception as e:
80 |                 st.sidebar.error(f"적용 실패: {e}")
81 | 


--------------------------------------------------------------------------------
/cli/commands/quary.py:
--------------------------------------------------------------------------------
  1 | """자연어 질문을 SQL 쿼리로 변환하는 CLI 명령어 정의 모듈.
  2 | 
  3 | 이 모듈은 사용자가 입력한 자연어 질문을 SQL 쿼리로 변환하여 출력하는
  4 | `query` CLI 명령어를 제공합니다.
  5 | """
  6 | 
  7 | import os
  8 | 
  9 | import click
 10 | 
 11 | from cli.utils.logger import configure_logging
 12 | 
 13 | logger = configure_logging()
 14 | 
 15 | 
 16 | @click.command(name="query")
 17 | @click.argument("question", type=str)
 18 | @click.option(
 19 |     "--database-env",
 20 |     default="clickhouse",
 21 |     help="사용할 데이터베이스 환경 (기본값: clickhouse)",
 22 | )
 23 | @click.option(
 24 |     "--retriever-name",
 25 |     default="기본",
 26 |     help="테이블 검색기 이름 (기본값: 기본)",
 27 | )
 28 | @click.option(
 29 |     "--top-n",
 30 |     type=int,
 31 |     default=5,
 32 |     help="검색된 상위 테이블 수 제한 (기본값: 5)",
 33 | )
 34 | @click.option(
 35 |     "--device",
 36 |     default="cpu",
 37 |     help="LLM 실행에 사용할 디바이스 (기본값: cpu)",
 38 | )
 39 | @click.option(
 40 |     "--use-enriched-graph",
 41 |     is_flag=True,
 42 |     help="확장된 그래프(프로파일 추출 + 컨텍스트 보강) 사용 여부",
 43 | )
 44 | @click.option(
 45 |     "--vectordb-type",
 46 |     type=click.Choice(["faiss", "pgvector"]),
 47 |     default="faiss",
 48 |     help="사용할 벡터 데이터베이스 타입 (기본값: faiss)",
 49 | )
 50 | @click.option(
 51 |     "--vectordb-location",
 52 |     help=(
 53 |         "VectorDB 위치 설정\n"
 54 |         "- FAISS: 디렉토리 경로 (예: ./my_vectordb)\n"
 55 |         "- pgvector: 연결 문자열 (예: postgresql://user:pass@host:port/db)\n"
 56 |         "기본값: FAISS는 './dev/table_info_db', pgvector는 환경변수 사용"
 57 |     ),
 58 | )
 59 | def query_command(
 60 |     question: str,
 61 |     database_env: str,
 62 |     retriever_name: str,
 63 |     top_n: int,
 64 |     device: str,
 65 |     use_enriched_graph: bool,
 66 |     vectordb_type: str = "faiss",
 67 |     vectordb_location: str = None,
 68 | ) -> None:
 69 |     """자연어 질문을 SQL 쿼리로 변환하여 출력합니다.
 70 | 
 71 |     Args:
 72 |         question (str): SQL로 변환할 자연어 질문
 73 |         database_env (str): 사용할 데이터베이스 환경
 74 |         retriever_name (str): 테이블 검색기 이름
 75 |         top_n (int): 검색된 상위 테이블 수 제한
 76 |         device (str): LLM 실행 디바이스
 77 |         use_enriched_graph (bool): 확장된 그래프 사용 여부
 78 |         vectordb_type (str): 벡터 데이터베이스 타입 ("faiss" 또는 "pgvector")
 79 |         vectordb_location (Optional[str]): 벡터DB 경로 또는 연결 URL
 80 |     """
 81 |     try:
 82 |         from engine.query_executor import execute_query, extract_sql_from_result
 83 | 
 84 |         os.environ["VECTORDB_TYPE"] = vectordb_type
 85 | 
 86 |         if vectordb_location:
 87 |             os.environ["VECTORDB_LOCATION"] = vectordb_location
 88 | 
 89 |         res = execute_query(
 90 |             query=question,
 91 |             database_env=database_env,
 92 |             retriever_name=retriever_name,
 93 |             top_n=top_n,
 94 |             device=device,
 95 |             use_enriched_graph=use_enriched_graph,
 96 |         )
 97 | 
 98 |         sql = extract_sql_from_result(res)
 99 |         if sql:
100 |             print(sql)
101 |         else:
102 |             generated_query = res.get("generated_query")
103 |             if generated_query:
104 |                 query_text = (
105 |                     generated_query.content
106 |                     if hasattr(generated_query, "content")
107 |                     else str(generated_query)
108 |                 )
109 |                 print(query_text)
110 | 
111 |     except Exception as e:
112 |         logger.error("쿼리 처리 중 오류 발생: %s", e)
113 |         raise
114 | 


--------------------------------------------------------------------------------
/utils/databases/connector/snowflake_connector.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Snowflake 데이터베이스 커넥터 모듈.
  3 | 
  4 | 이 모듈은 Snowflake 데이터베이스에 연결하여 SQL 쿼리를 실행하고,
  5 | 결과를 pandas DataFrame 형태로 반환하는 기능을 제공합니다.
  6 | """
  7 | 
  8 | import pandas as pd
  9 | from snowflake import connector
 10 | 
 11 | from utils.databases.config import DBConfig
 12 | from utils.databases.connector.base_connector import BaseConnector
 13 | from utils.databases.logger import logger
 14 | 
 15 | 
 16 | class SnowflakeConnector(BaseConnector):
 17 |     """
 18 |     Snowflake 데이터베이스 커넥터 클래스.
 19 | 
 20 |     Snowflake 서버에 연결하여 SQL 쿼리를 실행하거나 연결을 종료하는 기능을 제공합니다.
 21 |     """
 22 | 
 23 |     connection = None
 24 | 
 25 |     def __init__(self, config: DBConfig):
 26 |         """
 27 |         SnowflakeConnector 인스턴스를 초기화합니다.
 28 | 
 29 |         Args:
 30 |             config (DBConfig): Snowflake 연결 정보를 담은 설정 객체.
 31 |                 - 필수 키: user, password, extra.account
 32 |                 - 선택 키: extra.warehouse, database, extra.schema
 33 |         """
 34 |         self.user = config["user"]
 35 |         self.password = config["password"]
 36 |         self.account = config["extra"]["account"]
 37 |         self.warehouse = config.get("extra", {}).get("warehouse")
 38 |         self.database = config.get("database")
 39 |         self.schema = config.get("extra", {}).get("schema")
 40 |         self.connect()
 41 | 
 42 |     def connect(self) -> None:
 43 |         """
 44 |         Snowflake 데이터베이스에 연결을 설정합니다.
 45 | 
 46 |         Raises:
 47 |             ConnectionError: Snowflake 서버 연결에 실패한 경우 발생합니다.
 48 |         """
 49 |         try:
 50 |             self.connection = connector.connect(
 51 |                 user=self.user,
 52 |                 password=self.password,
 53 |                 account=self.account,
 54 |                 warehouse=self.warehouse,
 55 |                 database=self.database,
 56 |                 schema=self.schema,
 57 |             )
 58 |             logger.info("Successfully connected to Snowflake.")
 59 |             self.cursor = self.connection.cursor()
 60 |         except Exception as e:
 61 |             logger.error("Failed to connect to Snowflake: %s", e)
 62 |             raise
 63 | 
 64 |     def run_sql(self, sql: str) -> pd.DataFrame:
 65 |         """
 66 |         SQL 쿼리를 실행하고 결과를 pandas DataFrame으로 반환합니다.
 67 | 
 68 |         Args:
 69 |             sql (str): 실행할 SQL 쿼리 문자열.
 70 | 
 71 |         Returns:
 72 |             pd.DataFrame: 쿼리 결과를 담은 DataFrame 객체.
 73 | 
 74 |         Raises:
 75 |             RuntimeError: SQL 실행 중 오류가 발생한 경우.
 76 |         """
 77 |         if self.connection is None:
 78 |             self.connect()
 79 | 
 80 |         cursor = self.connection.cursor()
 81 | 
 82 |         try:
 83 |             self.cursor.execute(sql)
 84 |             columns = [col[0] for col in self.cursor.description]
 85 |             data = self.cursor.fetchall()
 86 |             return pd.DataFrame(data, columns=columns)
 87 |         except Exception as e:
 88 |             logger.error("Failed to execute SQL query: %s", e)
 89 |             raise
 90 |         finally:
 91 |             cursor.close()
 92 | 
 93 |     def close(self) -> None:
 94 |         """
 95 |         Snowflake 데이터베이스 연결을 종료합니다.
 96 | 
 97 |         연결이 존재할 경우 안전하게 닫고 리소스를 해제합니다.
 98 |         """
 99 |         if self.connection:
100 |             self.connection.close()
101 |             logger.info("Connection to Snowflake closed.")
102 |         self.connection = None
103 | 


--------------------------------------------------------------------------------
/utils/llm/output_schema/README.md:
--------------------------------------------------------------------------------
  1 | # output_schema 모듈
  2 | 
  3 | LLM 구조화 출력을 위한 Pydantic 모델 정의 모듈입니다.
  4 | 
  5 | ## 디렉토리 구조
  6 | 
  7 | ```
  8 | output_schema/
  9 | ├── __pycache__/
 10 | ├── document_suitability.py
 11 | └── question_suitability.py
 12 | ```
 13 | 
 14 | ## 파일 목록 및 설명
 15 | 
 16 | ### document_suitability.py
 17 | 
 18 | **목적**: LLM 구조화 출력으로부터 테이블별 적합성 평가 결과를 표현하는 Pydantic 모델을 정의합니다.
 19 | 
 20 | **주요 클래스**:
 21 | 
 22 | - `DocumentSuitability`: 단일 테이블에 대한 적합성 평가 결과를 표현하는 모델
 23 |   - `table_name` (str): 테이블명
 24 |   - `score` (float): 0.0~1.0 사이의 적합도 점수
 25 |   - `reason` (str): 한국어 한두 문장 근거
 26 |   - `matched_columns` (List[str]): 질문과 직접 연관된 컬럼명 목록
 27 |   - `missing_entities` (List[str]): 부족한 엔티티/지표/기간 등
 28 | 
 29 | - `DocumentSuitabilityList`: 문서 적합성 평가 결과 리스트 래퍼
 30 |   - `results` (List[DocumentSuitability]): 평가 결과 목록
 31 |   - OpenAI Structured Outputs 호환을 위해 명시적 최상위 키(`results`)를 제공
 32 | 
 33 | ### question_suitability.py
 34 | 
 35 | **목적**: LLM 구조화 출력으로부터 SQL 적합성 판단 결과를 표현하는 Pydantic 모델을 정의합니다.
 36 | 
 37 | **주요 클래스**:
 38 | 
 39 | - `QuestionSuitability`: SQL 생성 적합성 결과 모델
 40 |   - `reason` (str): 보완/설명 사유 요약
 41 |   - `missing_entities` (list[str]): 질문에서 누락된 핵심 엔터티/기간 등
 42 |   - `requires_data_science` (bool): SQL을 넘어 ML/통계 분석이 필요한지 여부
 43 | 
 44 | ## 사용 방법
 45 | 
 46 | ### Import 및 사용 위치
 47 | 
 48 | 이 모듈의 클래스들은 `utils/llm/chains.py`에서 import되어 사용됩니다:
 49 | 
 50 | ```python
 51 | from utils.llm.output_schema.document_suitability import DocumentSuitabilityList
 52 | from utils.llm.output_schema.question_suitability import QuestionSuitability
 53 | ```
 54 | 
 55 | ### 사용 예시
 56 | 
 57 | #### 1. QuestionSuitability 사용
 58 | 
 59 | `create_question_gate_chain()` 함수에서 질문 적합성을 판단하는 체인을 생성할 때 사용됩니다:
 60 | 
 61 | ```python
 62 | def create_question_gate_chain(llm):
 63 |     """
 64 |     질문 적합성(Question Gate) 체인을 생성합니다.
 65 |     
 66 |     Returns:
 67 |         Runnable: invoke({"question": str}) -> QuestionSuitability
 68 |     """
 69 |     prompt = get_prompt_template("question_gate_prompt")
 70 |     gate_prompt = ChatPromptTemplate.from_messages(
 71 |         [SystemMessagePromptTemplate.from_template(prompt)]
 72 |     )
 73 |     return gate_prompt | llm.with_structured_output(QuestionSuitability)
 74 | ```
 75 | 
 76 | **사용 흐름**:
 77 | 1. 사용자 질문을 입력으로 받음
 78 | 2. LLM이 구조화된 출력으로 `QuestionSuitability` 객체를 반환
 79 | 3. SQL 생성이 적합한지 여부와 필요 보완 사항을 판단
 80 | 
 81 | #### 2. DocumentSuitabilityList 사용
 82 | 
 83 | `create_document_suitability_chain()` 함수에서 문서(테이블) 적합성을 평가하는 체인을 생성할 때 사용됩니다:
 84 | 
 85 | ```python
 86 | def create_document_suitability_chain(llm):
 87 |     """
 88 |     문서 적합성 평가 체인을 생성합니다.
 89 |     
 90 |     Returns:
 91 |         Runnable: invoke({"question": str, "tables": dict}) -> {"results": DocumentSuitability[]}
 92 |     """
 93 |     prompt = get_prompt_template("document_suitability_prompt")
 94 |     doc_prompt = ChatPromptTemplate.from_messages(
 95 |         [SystemMessagePromptTemplate.from_template(prompt)]
 96 |     )
 97 |     return doc_prompt | llm.with_structured_output(DocumentSuitabilityList)
 98 | ```
 99 | 
100 | **사용 흐름**:
101 | 1. 사용자 질문과 검색된 테이블 메타데이터를 입력으로 받음
102 | 2. LLM이 각 테이블에 대한 적합도 점수와 평가 결과를 포함한 `DocumentSuitabilityList` 객체를 반환
103 | 3. 가장 적합한 테이블을 선택하거나 적합도가 낮은 경우 사용자에게 알림
104 | 
105 | ### 구조화 출력 활용
106 | 
107 | 두 모델 모두 LangChain의 `with_structured_output()` 메서드와 함께 사용되어 LLM의 출력을 자동으로 Pydantic 모델로 변환합니다. 이를 통해:
108 | 
109 | - 타입 안전성 보장
110 | - 자동 검증 및 직렬화
111 | - 명확한 API 계약
112 | 
113 | 을 제공합니다.
114 | 
115 | 


--------------------------------------------------------------------------------
/cli/utils/env_loader.py:
--------------------------------------------------------------------------------
  1 | """환경 변수 유틸리티 모듈.
  2 | 
  3 | .env 파일 로드, 프롬프트 디렉토리 설정,
  4 | VectorDB 타입 및 위치 설정을 제공합니다.
  5 | """
  6 | 
  7 | import os
  8 | from pathlib import Path
  9 | from typing import Optional
 10 | 
 11 | import click
 12 | import dotenv
 13 | 
 14 | 
 15 | def load_env(
 16 |     *,
 17 |     env_file_path: Optional[str] = None,
 18 | ) -> None:
 19 |     """환경 변수 파일(.env)을 로드합니다.
 20 | 
 21 |     Args:
 22 |         env_file_path (Optional[str]): .env 파일 경로. None이면 기본 경로 사용.
 23 |     """
 24 |     try:
 25 |         if env_file_path:
 26 |             loaded = dotenv.load_dotenv(env_file_path, override=True)
 27 |             if loaded:
 28 |                 click.secho(f".env 파일 로드 성공: {env_file_path}", fg="green")
 29 |             else:
 30 |                 click.secho(f".env 파일을 찾을 수 없음: {env_file_path}", fg="yellow")
 31 |         else:
 32 |             dotenv.load_dotenv(override=True)
 33 |             click.secho("기본 .env 파일 로드 시도", fg="blue")
 34 |     except Exception as e:
 35 |         click.secho(f".env 파일 로드 중 오류 발생: {e}", fg="red")
 36 |         raise
 37 | 
 38 | 
 39 | def set_prompt_dir(
 40 |     *,
 41 |     prompt_dir_path: Optional[str],
 42 | ) -> None:
 43 |     """프롬프트 템플릿 디렉토리 경로를 설정합니다.
 44 | 
 45 |     Args:
 46 |         prompt_dir_path (Optional[str]): 디렉토리 경로. None이면 설정하지 않음.
 47 | 
 48 |     Raises:
 49 |         ValueError: 경로가 유효하지 않을 경우.
 50 |     """
 51 |     if not prompt_dir_path:
 52 |         click.secho(
 53 |             "프롬프트 디렉토리 경로가 지정되지 않아 설정을 건너뜁니다.", fg="yellow"
 54 |         )
 55 |         return
 56 | 
 57 |     path_obj = Path(prompt_dir_path)
 58 |     if not path_obj.exists() or not path_obj.is_dir():
 59 |         click.secho(f"유효하지 않은 디렉토리 경로: {prompt_dir_path}", fg="red")
 60 |         raise ValueError(f"Invalid prompt directory path: {prompt_dir_path}")
 61 | 
 62 |     os.environ["PROMPT_TEMPLATES_DIR"] = str(path_obj.resolve())
 63 |     click.secho(f"프롬프트 디렉토리 환경변수 설정됨: {path_obj.resolve()}", fg="green")
 64 | 
 65 | 
 66 | def set_vectordb(
 67 |     *,
 68 |     vectordb_type: str,
 69 |     vectordb_location: Optional[str] = None,
 70 | ) -> None:
 71 |     """VectorDB 타입과 위치를 설정합니다.
 72 | 
 73 |     Args:
 74 |         vectordb_type (str): VectorDB 타입 ("faiss" 또는 "pgvector").
 75 |         vectordb_location (Optional[str]): 경로 또는 연결 URL.
 76 | 
 77 |     Raises:
 78 |         ValueError: 잘못된 타입이나 경로/URL일 경우.
 79 |     """
 80 | 
 81 |     if vectordb_type not in ("faiss", "pgvector"):
 82 |         raise ValueError(f"지원하지 않는 VectorDB 타입: {vectordb_type}")
 83 | 
 84 |     os.environ["VECTORDB_TYPE"] = vectordb_type
 85 |     click.secho(f"VectorDB 타입 설정됨: {vectordb_type}", fg="green")
 86 | 
 87 |     if vectordb_location:
 88 |         if vectordb_type == "faiss":
 89 |             path = Path(vectordb_location)
 90 |             if not path.exists() or not path.is_dir():
 91 |                 raise ValueError(
 92 |                     f"유효하지 않은 FAISS 디렉토리 경로: {vectordb_location}"
 93 |                 )
 94 |         elif vectordb_type == "pgvector":
 95 |             if not vectordb_location.startswith("postgresql://"):
 96 |                 raise ValueError(
 97 |                     f"pgvector URL은 'postgresql://'로 시작해야 합니다: {vectordb_location}"
 98 |                 )
 99 | 
100 |         os.environ["VECTORDB_LOCATION"] = vectordb_location
101 |         click.secho(f"VectorDB 경로 설정됨: {vectordb_location}", fg="green")
102 |     else:
103 |         click.secho("VectorDB 경로가 지정되지 않아 기본값을 사용합니다.", fg="yellow")
104 | 


--------------------------------------------------------------------------------
/interface/core/config/__init__.py:
--------------------------------------------------------------------------------
  1 | """config 패키지의 공개 API를 재노출하여 기존 import 호환성을 유지합니다.
  2 | 모델, 경로/지속성, 레지스트리, 설정 업데이트 유틸을 한 곳에서 제공합니다.
  3 | """
  4 | 
  5 | from .models import (
  6 |     Config,
  7 |     DataHubSource,
  8 |     VectorDBSource,
  9 |     DataSourcesRegistry,
 10 |     DBConnectionProfile,
 11 |     DBConnectionsRegistry,
 12 |     LLMProfile,
 13 |     LLMRegistry,
 14 |     EmbeddingProfile,
 15 |     EmbeddingRegistry,
 16 | )
 17 | 
 18 | from .settings import (
 19 |     load_config,
 20 |     update_datahub_server,
 21 |     update_data_source_mode,
 22 |     update_vectordb_settings,
 23 |     update_llm_settings,
 24 |     update_embedding_settings,
 25 |     update_db_settings,
 26 | )
 27 | 
 28 | from .registry_data_sources import (
 29 |     get_data_sources_registry,
 30 |     add_datahub_source,
 31 |     update_datahub_source,
 32 |     delete_datahub_source,
 33 |     add_vectordb_source,
 34 |     update_vectordb_source,
 35 |     delete_vectordb_source,
 36 | )
 37 | 
 38 | from .registry_db import (
 39 |     get_db_connections_registry,
 40 |     add_db_connection,
 41 |     update_db_connection,
 42 |     delete_db_connection,
 43 | )
 44 | 
 45 | from .registry_llm import (
 46 |     get_llm_registry,
 47 |     save_llm_profile,
 48 |     get_embedding_registry,
 49 |     save_embedding_profile,
 50 | )
 51 | 
 52 | from .paths import (
 53 |     get_registry_file_path,
 54 |     get_db_registry_file_path,
 55 |     get_llm_registry_file_path,
 56 |     get_embedding_registry_file_path,
 57 |     ensure_parent_dir,
 58 | )
 59 | 
 60 | from .persist import (
 61 |     save_registry_to_disk,
 62 |     save_db_registry_to_disk,
 63 |     save_llm_registry_to_disk,
 64 |     save_embedding_registry_to_disk,
 65 |     load_registry_from_disk,
 66 |     load_db_registry_from_disk,
 67 |     load_llm_registry_from_disk,
 68 |     load_embedding_registry_from_disk,
 69 | )
 70 | 
 71 | __all__ = [
 72 |     # Models
 73 |     "Config",
 74 |     "DataHubSource",
 75 |     "VectorDBSource",
 76 |     "DataSourcesRegistry",
 77 |     "DBConnectionProfile",
 78 |     "DBConnectionsRegistry",
 79 |     "LLMProfile",
 80 |     "LLMRegistry",
 81 |     "EmbeddingProfile",
 82 |     "EmbeddingRegistry",
 83 |     # Settings APIs
 84 |     "load_config",
 85 |     "update_datahub_server",
 86 |     "update_data_source_mode",
 87 |     "update_vectordb_settings",
 88 |     "update_llm_settings",
 89 |     "update_embedding_settings",
 90 |     "update_db_settings",
 91 |     # Registries - data sources
 92 |     "get_data_sources_registry",
 93 |     "add_datahub_source",
 94 |     "update_datahub_source",
 95 |     "delete_datahub_source",
 96 |     "add_vectordb_source",
 97 |     "update_vectordb_source",
 98 |     "delete_vectordb_source",
 99 |     # Registries - db connections
100 |     "get_db_connections_registry",
101 |     "add_db_connection",
102 |     "update_db_connection",
103 |     "delete_db_connection",
104 |     # Registries - llm/embedding
105 |     "get_llm_registry",
106 |     "save_llm_profile",
107 |     "get_embedding_registry",
108 |     "save_embedding_profile",
109 |     # Persistence helpers and paths (for backward compatibility)
110 |     "get_registry_file_path",
111 |     "get_db_registry_file_path",
112 |     "get_llm_registry_file_path",
113 |     "get_embedding_registry_file_path",
114 |     "ensure_parent_dir",
115 |     "save_registry_to_disk",
116 |     "save_db_registry_to_disk",
117 |     "save_llm_registry_to_disk",
118 |     "save_embedding_registry_to_disk",
119 |     "load_registry_from_disk",
120 |     "load_db_registry_from_disk",
121 |     "load_llm_registry_from_disk",
122 |     "load_embedding_registry_from_disk",
123 | ]
124 | 


--------------------------------------------------------------------------------
/interface/core/config/registry_db.py:
--------------------------------------------------------------------------------
  1 | """DB 연결 프로파일 레지스트리를 세션+디스크에 관리하는 모듈입니다.
  2 | get/add/update/delete 연산과 Streamlit 세션 연동을 제공합니다.
  3 | """
  4 | 
  5 | from typing import Any, Dict, Optional
  6 | 
  7 | try:
  8 |     import streamlit as st  # type: ignore
  9 | except Exception:  # pragma: no cover
 10 |     st = None  # type: ignore
 11 | 
 12 | from .models import DBConnectionsRegistry, DBConnectionProfile
 13 | from .persist import load_db_registry_from_disk, save_db_registry_to_disk
 14 | 
 15 | 
 16 | def get_db_connections_registry() -> DBConnectionsRegistry:
 17 |     if st is not None and "db_connections_registry" in st.session_state:
 18 |         reg = st.session_state["db_connections_registry"]
 19 |         return reg  # stored as DBConnectionsRegistry
 20 |     try:
 21 |         registry = load_db_registry_from_disk()
 22 |     except Exception:
 23 |         registry = DBConnectionsRegistry()
 24 |     if st is not None:
 25 |         st.session_state["db_connections_registry"] = registry
 26 |     return registry
 27 | 
 28 | 
 29 | def _save_db_registry(registry: DBConnectionsRegistry) -> None:
 30 |     if st is not None:
 31 |         st.session_state["db_connections_registry"] = registry
 32 |     try:
 33 |         save_db_registry_to_disk(registry)
 34 |     except Exception:
 35 |         # fail-soft; UI will still have session copy
 36 |         pass
 37 | 
 38 | 
 39 | def add_db_connection(
 40 |     *,
 41 |     name: str,
 42 |     db_type: str,
 43 |     host: Optional[str] = None,
 44 |     port: Optional[int] = None,
 45 |     user: Optional[str] = None,
 46 |     password: Optional[str] = None,
 47 |     database: Optional[str] = None,
 48 |     extra: Optional[Dict[str, Any]] = None,
 49 |     note: Optional[str] = None,
 50 | ) -> None:
 51 |     db_type_norm = (db_type or "").lower()
 52 |     registry = get_db_connections_registry()
 53 |     if any(c.name == name for c in registry.connections):
 54 |         raise ValueError(f"이미 존재하는 DB 프로파일 이름입니다: {name}")
 55 |     registry.connections.append(
 56 |         DBConnectionProfile(
 57 |             name=name,
 58 |             type=db_type_norm,
 59 |             host=host,
 60 |             port=port,
 61 |             user=user,
 62 |             password=password,
 63 |             database=database,
 64 |             extra=extra or None,
 65 |             note=note or None,
 66 |         )
 67 |     )
 68 |     _save_db_registry(registry)
 69 | 
 70 | 
 71 | def update_db_connection(
 72 |     *,
 73 |     name: str,
 74 |     db_type: str,
 75 |     host: Optional[str],
 76 |     port: Optional[int],
 77 |     user: Optional[str],
 78 |     password: Optional[str],
 79 |     database: Optional[str],
 80 |     extra: Optional[Dict[str, Any]],
 81 |     note: Optional[str],
 82 | ) -> None:
 83 |     db_type_norm = (db_type or "").lower()
 84 |     registry = get_db_connections_registry()
 85 |     for idx, c in enumerate(registry.connections):
 86 |         if c.name == name:
 87 |             registry.connections[idx] = DBConnectionProfile(
 88 |                 name=name,
 89 |                 type=db_type_norm,
 90 |                 host=host,
 91 |                 port=port,
 92 |                 user=user,
 93 |                 password=password,
 94 |                 database=database,
 95 |                 extra=extra or None,
 96 |                 note=note or None,
 97 |             )
 98 |             _save_db_registry(registry)
 99 |             return
100 |     raise ValueError(f"존재하지 않는 DB 프로파일 이름입니다: {name}")
101 | 
102 | 
103 | def delete_db_connection(*, name: str) -> None:
104 |     registry = get_db_connections_registry()
105 |     registry.connections = [c for c in registry.connections if c.name != name]
106 |     _save_db_registry(registry)
107 | 


--------------------------------------------------------------------------------
/cli/__init__.py:
--------------------------------------------------------------------------------
  1 | """Lang2SQL CLI 프로그램입니다.
  2 | 이 프로그램은 환경 초기화와 Streamlit 실행을 제공합니다.
  3 | 
  4 | 주의: --datahub_server 옵션은 더 이상 사용되지 않습니다(deprecated).
  5 | DataHub 설정은 UI의 설정 > 데이터 소스 탭에서 관리하세요.
  6 | """
  7 | 
  8 | import click
  9 | 
 10 | from cli.commands.quary import query_command
 11 | from cli.commands.run_streamlit import run_streamlit_cli_command
 12 | from cli.core.environment import initialize_environment
 13 | from cli.core.streamlit_runner import run_streamlit_command
 14 | from cli.utils.logger import configure_logging
 15 | 
 16 | from version import __version__
 17 | 
 18 | logger = configure_logging()
 19 | 
 20 | 
 21 | # pylint: disable=redefined-outer-name,broad-exception-caught
 22 | @click.group()
 23 | @click.version_option(version=__version__)
 24 | @click.pass_context
 25 | @click.option(
 26 |     "--datahub_server",
 27 |     default=None,
 28 |     help=("[Deprecated] DataHub GMS URL. 이제는 UI 설정 > 데이터 소스에서 관리하세요."),
 29 | )
 30 | @click.option(
 31 |     "--run-streamlit",
 32 |     is_flag=True,
 33 |     help=(
 34 |         "이 옵션을 지정하면 CLI 실행 시 Streamlit 애플리케이션을 바로 실행합니다. "
 35 |         "별도의 명령어 입력 없이 웹 인터페이스를 띄우고 싶을 때 사용합니다."
 36 |     ),
 37 | )
 38 | @click.option(
 39 |     "-p",
 40 |     "--port",
 41 |     type=int,
 42 |     default=8501,
 43 |     help=(
 44 |         "Streamlit 서버가 바인딩될 포트 번호를 지정합니다. "
 45 |         "기본 포트는 8501이며, 포트 충돌을 피하거나 여러 인스턴스를 실행할 때 변경할 수 있습니다."
 46 |     ),
 47 | )
 48 | @click.option(
 49 |     "--env-file-path",
 50 |     type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True),
 51 |     help="환경 변수를 로드할 .env 파일의 경로를 지정합니다. 지정하지 않으면 기본 경로를 사용합니다.",
 52 | )
 53 | @click.option(
 54 |     "--prompt-dir-path",
 55 |     type=click.Path(exists=True, file_okay=False, dir_okay=True, readable=True),
 56 |     help="프롬프트 템플릿(.md 파일)이 저장된 디렉토리 경로를 지정합니다. 지정하지 않으면 기본 경로를 사용합니다.",
 57 | )
 58 | @click.option(
 59 |     "--vectordb-type",
 60 |     default=None,
 61 |     help="[Deprecated] VectorDB 타입. 이제는 UI 설정 > 데이터 소스에서 관리하세요.",
 62 | )
 63 | @click.option(
 64 |     "--vectordb-location",
 65 |     default=None,
 66 |     help="[Deprecated] VectorDB 위치. 이제는 UI 설정 > 데이터 소스에서 관리하세요.",
 67 | )
 68 | def cli(
 69 |     ctx: click.Context,
 70 |     datahub_server: str | None,
 71 |     run_streamlit: bool,
 72 |     port: int,
 73 |     env_file_path: str | None = None,
 74 |     prompt_dir_path: str | None = None,
 75 |     vectordb_type: str | None = None,
 76 |     vectordb_location: str | None = None,
 77 | ) -> None:
 78 |     """Lang2SQL CLI 엔트리포인트.
 79 | 
 80 |     - 환경 변수 및 VectorDB 설정 초기화
 81 |     - 필요 시 Streamlit 애플리케이션 실행
 82 |     """
 83 | 
 84 |     try:
 85 |         initialize_environment(
 86 |             env_file_path=env_file_path, prompt_dir_path=prompt_dir_path
 87 |         )
 88 |     except Exception:
 89 |         logger.error("Initialization failed.", exc_info=True)
 90 |         ctx.exit(1)
 91 | 
 92 |     logger.info(
 93 |         "Initialization started: run_streamlit = %s, port = %d",
 94 |         run_streamlit,
 95 |         port,
 96 |     )
 97 | 
 98 |     # Deprecated 안내: CLI에서 DataHub 설정은 더 이상 처리하지 않습니다
 99 |     if datahub_server:
100 |         click.secho(
101 |             "[Deprecated] --datahub_server 옵션은 더 이상 사용되지 않습니다. 설정 > 데이터 소스 탭에서 설정하세요.",
102 |             fg="yellow",
103 |         )
104 | 
105 |     # Deprecated 안내: CLI에서 VectorDB 설정은 더 이상 처리하지 않습니다
106 |     if vectordb_type or vectordb_location:
107 |         click.secho(
108 |             "[Deprecated] --vectordb-type/--vectordb-location 옵션은 더 이상 사용되지 않습니다. 설정 > 데이터 소스 탭에서 설정하세요.",
109 |             fg="yellow",
110 |         )
111 | 
112 |     if run_streamlit:
113 |         run_streamlit_command(port)
114 | 
115 | 
116 | cli.add_command(run_streamlit_cli_command)
117 | cli.add_command(query_command)
118 | 


--------------------------------------------------------------------------------
/utils/llm/retrieval.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from langchain.retrievers import ContextualCompressionRetriever
  4 | from langchain.retrievers.document_compressors import CrossEncoderReranker
  5 | from langchain_community.cross_encoders import HuggingFaceCrossEncoder
  6 | from transformers import AutoModelForSequenceClassification, AutoTokenizer
  7 | 
  8 | from utils.llm.vectordb import get_vector_db
  9 | 
 10 | 
 11 | def load_reranker_model(device: str = "cpu"):
 12 |     """한국어 reranker 모델을 로드하거나 다운로드합니다."""
 13 |     local_model_path = os.path.join(os.getcwd(), "ko_reranker_local")
 14 | 
 15 |     # 로컬에 저장된 모델이 있으면 불러오고, 없으면 다운로드 후 저장
 16 |     if os.path.exists(local_model_path) and os.path.isdir(local_model_path):
 17 |         print("🔄 ko-reranker 모델 로컬에서 로드 중...")
 18 |     else:
 19 |         print("⬇️ ko-reranker 모델 다운로드 및 저장 중...")
 20 |         model = AutoModelForSequenceClassification.from_pretrained(
 21 |             "Dongjin-kr/ko-reranker"
 22 |         )
 23 |         tokenizer = AutoTokenizer.from_pretrained("Dongjin-kr/ko-reranker")
 24 |         model.save_pretrained(local_model_path)
 25 |         tokenizer.save_pretrained(local_model_path)
 26 | 
 27 |     return HuggingFaceCrossEncoder(
 28 |         model_name=local_model_path,
 29 |         model_kwargs={"device": device},
 30 |     )
 31 | 
 32 | 
 33 | def get_retriever(retriever_name: str = "기본", top_n: int = 5, device: str = "cpu"):
 34 |     """검색기 타입에 따라 적절한 검색기를 생성합니다.
 35 | 
 36 |     Args:
 37 |         retriever_name: 사용할 검색기 이름 ("기본", "재순위", 등)
 38 |         top_n: 반환할 상위 결과 개수
 39 |     """
 40 |     print(device)
 41 |     retrievers = {
 42 |         "기본": lambda: get_vector_db().as_retriever(search_kwargs={"k": top_n}),
 43 |         "Reranker": lambda: ContextualCompressionRetriever(
 44 |             base_compressor=CrossEncoderReranker(
 45 |                 model=load_reranker_model(device), top_n=top_n
 46 |             ),
 47 |             base_retriever=get_vector_db().as_retriever(search_kwargs={"k": top_n}),
 48 |         ),
 49 |     }
 50 | 
 51 |     if retriever_name not in retrievers:
 52 |         print(
 53 |             f"경고: '{retriever_name}' 검색기를 찾을 수 없습니다. 기본 검색기를 사용합니다."
 54 |         )
 55 |         retriever_name = "기본"
 56 | 
 57 |     return retrievers[retriever_name]()
 58 | 
 59 | 
 60 | def search_tables(
 61 |     query: str, retriever_name: str = "기본", top_n: int = 5, device: str = "cpu"
 62 | ):
 63 |     """쿼리에 맞는 테이블 정보를 검색합니다."""
 64 |     if retriever_name == "기본":
 65 |         db = get_vector_db()
 66 |         doc_res = db.similarity_search(query, k=top_n)
 67 |     else:
 68 |         retriever = get_retriever(
 69 |             retriever_name=retriever_name, top_n=top_n, device=device
 70 |         )
 71 |         doc_res = retriever.invoke(query)
 72 | 
 73 |     # 결과를 사전 형태로 변환
 74 |     documents_dict = {}
 75 |     for doc in doc_res:
 76 |         lines = doc.page_content.split("\n")
 77 | 
 78 |         # 테이블명 및 설명 추출
 79 |         table_name, table_desc = lines[0].split(": ", 1)
 80 | 
 81 |         # 섹션별로 정보 추출 (테이블/컬럼만 사용)
 82 |         columns = {}
 83 |         current_section = None
 84 | 
 85 |         for i, line in enumerate(lines[1:], 1):
 86 |             line = line.strip()
 87 | 
 88 |             # 섹션 헤더 확인
 89 |             if line == "Columns:":
 90 |                 current_section = "columns"
 91 |                 continue
 92 | 
 93 |             # 각 섹션의 내용 파싱
 94 |             if current_section == "columns" and ": " in line:
 95 |                 col_name, col_desc = line.split(": ", 1)
 96 |                 columns[col_name.strip()] = col_desc.strip()
 97 | 
 98 |         # 딕셔너리 저장
 99 |         documents_dict[table_name] = {
100 |             "table_description": table_desc.strip(),
101 |             **columns,  # 컬럼 정보 추가
102 |         }
103 | 
104 |     return documents_dict
105 | 


--------------------------------------------------------------------------------
/cli/core/README.md:
--------------------------------------------------------------------------------
  1 | # CLI Core 모듈
  2 | 
  3 | Lang2SQL CLI의 핵심 기능을 제공하는 모듈입니다.
  4 | 
  5 | ## 디렉토리 구조
  6 | 
  7 | ```
  8 | cli/core/
  9 | ├── environment.py      # 환경 변수 초기화 모듈
 10 | └── streamlit_runner.py # Streamlit 실행 유틸리티 모듈
 11 | ```
 12 | 
 13 | ## 모듈 설명
 14 | 
 15 | ### 1. environment.py
 16 | 
 17 | 환경 변수 초기화를 담당하는 모듈입니다. VectorDB 설정은 UI에서 관리합니다.
 18 | 
 19 | #### 주요 기능
 20 | 
 21 | - `initialize_environment()`: 환경 변수를 초기화하는 함수
 22 | 
 23 | #### 함수 상세
 24 | 
 25 | ##### `initialize_environment(env_file_path, prompt_dir_path)`
 26 | 
 27 | 환경 변수를 초기화합니다. VectorDB 설정은 UI에서 관리합니다.
 28 | 
 29 | **매개변수:**
 30 | - `env_file_path` (Optional[str]): 로드할 .env 파일 경로. None이면 기본값 사용.
 31 | - `prompt_dir_path` (Optional[str]): 프롬프트 템플릿 디렉토리 경로. None이면 설정하지 않음.
 32 | 
 33 | **예외:**
 34 | - `Exception`: 초기화 과정에서 오류가 발생한 경우.
 35 | 
 36 | **내부 동작:**
 37 | - `cli.utils.env_loader.load_env()`: .env 파일을 로드합니다.
 38 | - `cli.utils.env_loader.set_prompt_dir()`: 프롬프트 템플릿 디렉토리 경로를 환경 변수로 설정합니다.
 39 | 
 40 | #### 사용 예시
 41 | 
 42 | ```python
 43 | from cli.core.environment import initialize_environment
 44 | 
 45 | # 기본 경로로 초기화
 46 | initialize_environment(env_file_path=None, prompt_dir_path=None)
 47 | 
 48 | # 사용자 정의 경로로 초기화
 49 | initialize_environment(
 50 |     env_file_path="/path/to/.env",
 51 |     prompt_dir_path="/path/to/prompts"
 52 | )
 53 | ```
 54 | 
 55 | #### import 및 사용 위치
 56 | 
 57 | 이 모듈의 `initialize_environment` 함수는 다음과 같이 사용됩니다:
 58 | 
 59 | - **`cli/__init__.py`** (85-90번째 줄): CLI 진입점에서 환경 초기화 시 호출
 60 |   ```python
 61 |   from cli.core.environment import initialize_environment
 62 |   
 63 |   initialize_environment(
 64 |       env_file_path=env_file_path, 
 65 |       prompt_dir_path=prompt_dir_path
 66 |   )
 67 |   ```
 68 | 
 69 | ### 2. streamlit_runner.py
 70 | 
 71 | Streamlit 애플리케이션 실행을 담당하는 유틸리티 모듈입니다.
 72 | 
 73 | #### 주요 기능
 74 | 
 75 | - `run_streamlit_command()`: 지정된 포트에서 Streamlit 애플리케이션을 실행하는 함수
 76 | 
 77 | #### 함수 상세
 78 | 
 79 | ##### `run_streamlit_command(port)`
 80 | 
 81 | 지정된 포트에서 Streamlit 애플리케이션을 실행합니다.
 82 | 
 83 | **매개변수:**
 84 | - `port` (int): 바인딩할 포트 번호.
 85 | 
 86 | **예외:**
 87 | - `subprocess.CalledProcessError`: 실행 실패 시 발생.
 88 | 
 89 | **내부 동작:**
 90 | - `subprocess.run()`을 사용하여 `streamlit run` 명령을 실행합니다.
 91 | - 실행 대상: `interface/streamlit_app.py`
 92 | - 서버 주소: `0.0.0.0`
 93 | - 포트: 사용자 지정 값 (기본값: 8501)
 94 | - 로깅: `cli.utils.logger.configure_logging()`을 통해 로그 출력
 95 | 
 96 | #### 사용 예시
 97 | 
 98 | ```python
 99 | from cli.core.streamlit_runner import run_streamlit_command
100 | 
101 | # 기본 포트(8501)로 실행
102 | run_streamlit_command(port=8501)
103 | 
104 | # 사용자 정의 포트로 실행
105 | run_streamlit_command(port=8080)
106 | ```
107 | 
108 | #### import 및 사용 위치
109 | 
110 | 이 모듈의 `run_streamlit_command` 함수는 다음과 같이 사용됩니다:
111 | 
112 | 1. **`cli/__init__.py`** (113번째 줄): CLI의 `--run-streamlit` 옵션이 활성화된 경우 호출
113 |    ```python
114 |    from cli.core.streamlit_runner import run_streamlit_command
115 |    
116 |    if run_streamlit:
117 |        run_streamlit_command(port)
118 |    ```
119 | 
120 | 2. **`cli/commands/run_streamlit.py`** (29번째 줄): `run-streamlit` CLI 명령 실행 시 호출
121 |    ```python
122 |    from cli.core.streamlit_runner import run_streamlit_command
123 |    
124 |    @click.command(name="run-streamlit")
125 |    def run_streamlit_cli_command(port: int):
126 |        logger.info("Executing 'run-streamlit' command on port %d...", port)
127 |        run_streamlit_command(port)
128 |    ```
129 | 
130 | ## 의존성
131 | 
132 | ### 내부 의존성
133 | 
134 | - `cli.utils.env_loader`: 환경 변수 로드 및 프롬프트 디렉토리 설정
135 | - `cli.utils.logger`: 로깅 설정
136 | 
137 | ### 외부 의존성
138 | 
139 | - `subprocess`: 프로세스 실행 (streamlit_runner.py)
140 | 
141 | ## 주요 특징
142 | 
143 | 1. **환경 관리**: CLI 진입점에서 일관된 환경 변수 초기화 보장
144 | 2. **UI 중심 설계**: VectorDB 설정은 UI에서 관리하여 사용자 편의성 향상
145 | 3. **유연한 실행**: 다양한 포트에서 Streamlit 애플리케이션 실행 지원
146 | 4. **로깅 지원**: 실행 상태 및 오류 추적 가능
147 | 
148 | 


--------------------------------------------------------------------------------
/utils/databases/factory.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 데이터베이스 커넥터 팩토리 모듈.
  3 | 
  4 | 이 모듈은 DB 타입에 따라 알맞은 커넥터 클래스를 동적으로 로드하여
  5 | 해당 DB에 연결할 수 있는 인스턴스를 생성하는 기능을 제공합니다.
  6 | 환경변수로부터 접속 설정을 자동으로 로드하는 유틸리티 함수도 포함합니다.
  7 | """
  8 | 
  9 | import importlib
 10 | import inspect
 11 | import os
 12 | from typing import Optional
 13 | 
 14 | from utils.databases.config import DBConfig
 15 | from utils.databases.logger import logger
 16 | 
 17 | 
 18 | class DatabaseFactory:
 19 |     """
 20 |     데이터베이스 커넥터 팩토리 클래스.
 21 | 
 22 |     DB 타입에 따라 알맞은 Connector 클래스를 동적으로 로드하고,
 23 |     해당 인스턴스를 반환하는 기능을 제공합니다.
 24 |     """
 25 | 
 26 |     @staticmethod
 27 |     def get_connector(db_type: Optional[str] = None, config: Optional[DBConfig] = None):
 28 |         """
 29 |         주어진 DB 타입에 해당하는 Connector 인스턴스를 반환합니다.
 30 | 
 31 |         지정된 DB 타입에 맞는 커넥터 모듈을 동적으로 로드하고,
 32 |         해당 모듈 내의 Connector 클래스를 탐색하여 인스턴스를 생성합니다.
 33 |         DB 타입이 지정되지 않은 경우 환경 변수(DB_TYPE)에서 자동으로 가져옵니다.
 34 | 
 35 |         Args:
 36 |             db_type (Optional[str]): 데이터베이스 타입 문자열 (예: 'postgres', 'mysql', 'trino').
 37 |             config (Optional[DBConfig]): 데이터베이스 연결 설정 객체.
 38 |                 지정되지 않은 경우 환경 변수에서 자동으로 로드됩니다.
 39 | 
 40 |         Returns:
 41 |             BaseConnector: 주어진 DB 타입에 해당하는 Connector 인스턴스.
 42 | 
 43 |         Raises:
 44 |             ValueError: DB_TYPE이 지정되지 않았거나,
 45 |                 지원되지 않는 DB 타입이거나,
 46 |                 모듈 또는 Connector 클래스를 찾을 수 없는 경우.
 47 |         """
 48 |         if not db_type:
 49 |             db_type = os.getenv("DB_TYPE")
 50 |             if not db_type:
 51 |                 raise ValueError("DB_TYPE이 환경변수 또는 인자로 제공되어야 합니다.")
 52 |         db_type = db_type.lower()
 53 | 
 54 |         if not config:
 55 |             config = load_config_from_env(db_type.upper())
 56 | 
 57 |         try:
 58 |             module_name = f"utils.databases.connector.{db_type}_connector"
 59 |             module = importlib.import_module(module_name)
 60 | 
 61 |             connector_class = None
 62 |             for name, cls in inspect.getmembers(module, inspect.isclass):
 63 |                 if name.lower() == f"{db_type}connector":
 64 |                     connector_class = cls
 65 |                     break
 66 |             if connector_class is None:
 67 |                 raise ValueError(f"No matching Connector class found for {db_type}")
 68 |         except (ImportError, AttributeError) as e:
 69 |             logger.error(
 70 |                 "지원되지 않는 DB 타입이거나 모듈을 로드할 수 없습니다: %s",
 71 |                 db_type,
 72 |             )
 73 |             raise ValueError(f"Unsupported DB type: {db_type}") from e
 74 | 
 75 |         return connector_class(config)
 76 | 
 77 | 
 78 | def load_config_from_env(prefix: str) -> DBConfig:
 79 |     """
 80 |     환경변수에서 데이터베이스 접속 설정을 로드합니다.
 81 | 
 82 |     Args:
 83 |         prefix (str): 환경변수 접두어 (예: 'POSTGRES', 'MYSQL').
 84 | 
 85 |     Returns:
 86 |         DBConfig: 환경변수에서 로드된 설정 정보를 담은 DBConfig 객체.
 87 |     """
 88 |     base_keys = {
 89 |         "HOST",
 90 |         "PORT",
 91 |         "USER",
 92 |         "PASSWORD",
 93 |         "DATABASE",
 94 |     }
 95 |     config = {
 96 |         "host": os.getenv(f"{prefix}_HOST"),
 97 |         "port": (
 98 |             int(os.getenv(f"{prefix}_PORT")) if os.getenv(f"{prefix}_PORT") else None
 99 |         ),
100 |         "user": os.getenv(f"{prefix}_USER"),
101 |         "password": os.getenv(f"{prefix}_PASSWORD"),
102 |         "database": os.getenv(f"{prefix}_DATABASE"),
103 |     }
104 | 
105 |     extra = {}
106 |     for key, value in os.environ.items():
107 |         if (
108 |             key.startswith(f"{prefix}_")
109 |             and key.split("_", 1)[1].upper() not in base_keys
110 |         ):
111 |             extra[key[len(prefix) + 1 :].lower()] = value
112 |     if extra:
113 |         config["extra"] = extra
114 | 
115 |     return DBConfig(**config)
116 | 


--------------------------------------------------------------------------------
/utils/visualization/README.md:
--------------------------------------------------------------------------------
  1 | ## utils.visualization 개요
  2 | 
  3 | Lang2SQL 파이프라인에서 SQL 쿼리 결과를 시각화하기 위한 유틸리티 모듈입니다. LLM을 활용하여 적절한 차트를 자동 생성하고 Plotly를 통해 렌더링합니다.
  4 | 
  5 | ### 파일 구조
  6 | 
  7 | ```
  8 | utils/visualization/
  9 | └── display_chart.py    # SQL 결과를 Plotly 차트로 변환하는 핵심 모듈
 10 | ```
 11 | 
 12 | ### 각 파일 상세 설명
 13 | 
 14 | #### display_chart.py
 15 | 
 16 | **목적**: SQL 쿼리 실행 결과를 다양한 형태의 Plotly 차트로 자동 변환하는 모듈
 17 | 
 18 | **주요 클래스**:
 19 | 
 20 | - **`DisplayChart`**: SQL 결과 시각화를 담당하는 메인 클래스
 21 |   - `question` (str): 사용자가 입력한 자연어 질문
 22 |   - `sql` (str): 실행된 SQL 쿼리
 23 |   - `df_metadata` (str): 데이터프레임의 메타데이터 정보
 24 | 
 25 | **주요 메서드**:
 26 | 
 27 | 1. **`llm_model_for_chart(message_log)`**
 28 |    - 환경변수 `LLM_PROVIDER`가 "openai"일 경우 ChatOpenAI로 차트 코드 생성
 29 |    - 필요 환경변수: `OPEN_AI_KEY`, `OPEN_AI_LLM_MODEL` (기본: "gpt-4o")
 30 |    - 반환: 생성된 차트 코드 또는 None
 31 | 
 32 | 2. **`generate_plotly_code()`**
 33 |    - 사용자 질문, SQL 쿼리, 데이터프레임 메타데이터를 프롬프트로 구성
 34 |    - LLM이 데이터에 맞는 적절한 Plotly 코드 생성
 35 |    - 반환: Python 코드 문자열
 36 | 
 37 | 3. **`get_plotly_figure(plotly_code, df, dark_mode=True)`**
 38 |    - 생성된 Plotly 코드를 실행하여 Figure 객체 생성
 39 |    - 에러 발생 시 데이터 타입 기반 fallback 차트 생성:
 40 |      - 숫자 컬럼 2개 이상 → scatter plot
 41 |      - 숫자 1개 + 범주 1개 → bar plot
 42 |      - 범주 1개 (고유값 < 10) → pie chart
 43 |      - 기타 → line plot
 44 |    - dark_mode=True 시 "plotly_dark" 템플릿 적용
 45 |    - 반환: Plotly Figure 객체 또는 None
 46 | 
 47 | 4. **내부 헬퍼 메서드**:
 48 |    - `_extract_python_code(markdown_string)`: 마크다운에서 Python 코드 블록 추출
 49 |    - `_sanitize_plotly_code(raw_plotly_code)`: 불필요한 `fig.show()` 문 제거
 50 | 
 51 | **의존성**:
 52 | - `pandas`: 데이터프레임 처리
 53 | - `plotly.express` (px): 간단한 차트 생성
 54 | - `plotly.graph_objects` (go): 고급 차트 구성
 55 | - `langchain_openai.ChatOpenAI`: LLM 차트 코드 생성
 56 | - `langchain_core.messages`: SystemMessage, HumanMessage
 57 | 
 58 | ### 사용 방법
 59 | 
 60 | #### 1. 기본 사용법 (interface/core/result_renderer.py에서 실제 사용)
 61 | 
 62 | ```python
 63 | from utils.visualization.display_chart import DisplayChart
 64 | import pandas as pd
 65 | 
 66 | # DisplayChart 인스턴스 생성
 67 | display_code = DisplayChart(
 68 |     question="지난달 매출 추이를 보여줘",
 69 |     sql="SELECT date, revenue FROM sales WHERE ...",
 70 |     df_metadata=f"Running df.dtypes gives:\n{df.dtypes}"
 71 | )
 72 | 
 73 | # Plotly 코드 생성
 74 | plotly_code = display_code.generate_plotly_code()
 75 | 
 76 | # Figure 객체 생성
 77 | fig = display_code.get_plotly_figure(plotly_code=plotly_code, df=df)
 78 | 
 79 | # Streamlit에서 차트 표시
 80 | st.plotly_chart(fig)
 81 | ```
 82 | 
 83 | #### 2. 통합 흐름 (Lang2SQL 파이프라인 내)
 84 | 
 85 | `interface/core/result_renderer.py`의 `display_result()` 함수에서 사용:
 86 | 
 87 | 1. SQL 쿼리 실행 후 pandas DataFrame 반환
 88 | 2. `DisplayChart` 초기화 (질문, SQL, 메타데이터)
 89 | 3. `generate_plotly_code()`로 LLM 기반 차트 코드 생성
 90 | 4. `get_plotly_figure()`로 실행 및 Figure 객체 획득
 91 | 5. `st.plotly_chart()`로 Streamlit UI에 렌더링
 92 | 
 93 | **경로**: `interface/core/result_renderer.py` (200-211번째 줄)
 94 | 
 95 | ### import 관계
 96 | 
 97 | **import하는 파일**:
 98 | - `interface/core/result_renderer.py`: `from utils.visualization.display_chart import DisplayChart`
 99 | 
100 | **외부 의존성**:
101 | - `langchain_openai.ChatOpenAI`: OpenAI LLM API 호출
102 | - `plotly`: 차트 렌더링 및 Figure 객체 관리
103 | - `pandas`: 데이터프레임 처리
104 | - 환경변수: `LLM_PROVIDER`, `OPEN_AI_KEY`, `OPEN_AI_LLM_MODEL`
105 | 
106 | ### 환경 변수 요약
107 | 
108 | - **`LLM_PROVIDER`**: LLM 공급자 지정 (현재 "openai"만 지원)
109 | - **`OPEN_AI_KEY`**: OpenAI API 키
110 | - **`OPEN_AI_LLM_MODEL`**: 사용할 모델 (기본값: "gpt-4o")
111 | 
112 | ### 주요 특징
113 | 
114 | 1. **LLM 기반 지능형 차트 생성**: 데이터 구조와 질문 내용에 맞춰 적절한 차트 유형 자동 선택
115 | 2. **Fallback 메커니즘**: LLM 코드 생성 실패 시 데이터 타입 기반 대체 차트 제공
116 | 3. **다크 모드 지원**: 기본적으로 plotly_dark 템플릿 적용
117 | 4. **에러 안전성**: 코드 실행 중 예외 발생 시에도 항상 유효한 Figure 객체 반환
118 | 
119 | ### 개선 가능 영역
120 | 
121 | - 다른 LLM 공급자 지원 (현재 OpenAI만 지원)
122 | - 더 다양한 차트 유형 지원
123 | - 컬러 스킴 및 스타일 커스터마이징 옵션
124 | - 성능 최적화 (코드 생성 시간 단축)
125 | 
126 | 


--------------------------------------------------------------------------------
/interface/core/config/registry_llm.py:
--------------------------------------------------------------------------------
  1 | """LLM/Embedding 프로파일 레지스트리를 세션+디스크에 관리하는 모듈입니다.
  2 | 프로파일 저장(upsert)과 Streamlit 세션 연동을 제공합니다.
  3 | """
  4 | 
  5 | try:
  6 |     import streamlit as st  # type: ignore
  7 | except Exception:  # pragma: no cover
  8 |     st = None  # type: ignore
  9 | 
 10 | from .models import (
 11 |     LLMRegistry,
 12 |     LLMProfile,
 13 |     EmbeddingRegistry,
 14 |     EmbeddingProfile,
 15 | )
 16 | from .persist import (
 17 |     load_llm_registry_from_disk,
 18 |     save_llm_registry_to_disk,
 19 |     load_embedding_registry_from_disk,
 20 |     save_embedding_registry_to_disk,
 21 | )
 22 | 
 23 | 
 24 | def get_llm_registry() -> LLMRegistry:
 25 |     if st is not None and "llm_registry" in st.session_state:
 26 |         return st.session_state["llm_registry"]
 27 |     try:
 28 |         registry = load_llm_registry_from_disk()
 29 |     except Exception:
 30 |         registry = LLMRegistry()
 31 |     if st is not None:
 32 |         st.session_state["llm_registry"] = registry
 33 |     return registry
 34 | 
 35 | 
 36 | def _save_llm_registry(registry: LLMRegistry) -> None:
 37 |     if st is not None:
 38 |         st.session_state["llm_registry"] = registry
 39 |     try:
 40 |         save_llm_registry_to_disk(registry)
 41 |     except Exception:
 42 |         pass
 43 | 
 44 | 
 45 | def get_embedding_registry() -> EmbeddingRegistry:
 46 |     if st is not None and "embedding_registry" in st.session_state:
 47 |         return st.session_state["embedding_registry"]
 48 |     try:
 49 |         registry = load_embedding_registry_from_disk()
 50 |     except Exception:
 51 |         registry = EmbeddingRegistry()
 52 |     if st is not None:
 53 |         st.session_state["embedding_registry"] = registry
 54 |     return registry
 55 | 
 56 | 
 57 | def _save_embedding_registry(registry: EmbeddingRegistry) -> None:
 58 |     if st is not None:
 59 |         st.session_state["embedding_registry"] = registry
 60 |     try:
 61 |         save_embedding_registry_to_disk(registry)
 62 |     except Exception:
 63 |         pass
 64 | 
 65 | 
 66 | def save_llm_profile(
 67 |     *, name: str, provider: str, values: dict[str, str | None], note: str | None = None
 68 | ) -> None:
 69 |     provider_norm = (provider or "").lower()
 70 |     stored_fields: dict[str, str] = {}
 71 |     for k, v in (values or {}).items():
 72 |         if v is None:
 73 |             continue
 74 |         stored_fields[k] = str(v)
 75 | 
 76 |     reg = get_llm_registry()
 77 |     # upsert by name
 78 |     for idx, p in enumerate(reg.profiles):
 79 |         if p.name == name:
 80 |             reg.profiles[idx] = LLMProfile(
 81 |                 name=name, provider=provider_norm, fields=stored_fields, note=note
 82 |             )
 83 |             _save_llm_registry(reg)
 84 |             return
 85 |     reg.profiles.append(
 86 |         LLMProfile(name=name, provider=provider_norm, fields=stored_fields, note=note)
 87 |     )
 88 |     _save_llm_registry(reg)
 89 | 
 90 | 
 91 | def save_embedding_profile(
 92 |     *, name: str, provider: str, values: dict[str, str | None], note: str | None = None
 93 | ) -> None:
 94 |     provider_norm = (provider or "").lower()
 95 |     stored_fields: dict[str, str] = {}
 96 |     for k, v in (values or {}).items():
 97 |         if v is None:
 98 |             continue
 99 |         stored_fields[k] = str(v)
100 | 
101 |     reg = get_embedding_registry()
102 |     for idx, p in enumerate(reg.profiles):
103 |         if p.name == name:
104 |             reg.profiles[idx] = EmbeddingProfile(
105 |                 name=name, provider=provider_norm, fields=stored_fields, note=note
106 |             )
107 |             _save_embedding_registry(reg)
108 |             return
109 |     reg.profiles.append(
110 |         EmbeddingProfile(
111 |             name=name, provider=provider_norm, fields=stored_fields, note=note
112 |         )
113 |     )
114 |     _save_embedding_registry(reg)
115 | 


--------------------------------------------------------------------------------
/infra/monitoring/README.md:
--------------------------------------------------------------------------------
  1 | # infra/monitoring 패키지
  2 | 
  3 | 서버 상태 확인 및 헬스 체크 기능을 제공하는 모니터링 패키지입니다.
  4 | 
  5 | ## 디렉토리 구조
  6 | 
  7 | ```
  8 | infra/monitoring/
  9 | ├── __init__.py
 10 | ├── __pycache__/
 11 | └── check_server.py
 12 | ```
 13 | 
 14 | ## 파일 설명
 15 | 
 16 | ### `__init__.py`
 17 | 
 18 | 모니터링/헬스체크 패키지의 초기화 파일입니다.
 19 | 
 20 | **내용:**
 21 | - 패키지 문서화 문자열: "모니터링/헬스체크 패키지"
 22 | 
 23 | **역할:**
 24 | - `infra.monitoring` 패키지를 Python 패키지로 인식시키는 초기화 파일
 25 | 
 26 | ---
 27 | 
 28 | ### `check_server.py`
 29 | 
 30 | 서버 상태 확인 및 연결 관련 기능을 제공하는 유틸리티 클래스입니다.
 31 | 
 32 | **주요 구성 요소:**
 33 | 
 34 | 1. **HTTP 기반 서버 헬스 체크**
 35 |    - `/health` 엔드포인트를 통한 서버 상태 확인
 36 |    - 향후 서버 연결 또는 상태 점검 기능 확장 가능한 구조
 37 | 
 38 | 2. **예외 처리 및 로깅**
 39 |    - 요청 실패, 타임아웃, 연결 오류 등의 다양한 예외 상황 처리
 40 |    - 로깅을 통해 상세한 실패 원인 기록
 41 |    - 결과를 boolean 값으로 반환
 42 | 
 43 | **주요 클래스:**
 44 | 
 45 | #### `CheckServer`
 46 | 
 47 | 서버의 상태를 확인하거나 연결을 테스트하는 유틸리티 메서드를 제공하는 클래스입니다.
 48 | 
 49 | 현재는 GMS 서버의 `/health` 엔드포인트에 대한 헬스 체크 기능을 포함하고 있으며, 향후에는 다양한 서버 연결 확인 및 상태 점검 기능이 추가될 수 있도록 확장 가능한 구조로 설계되었습니다.
 50 | 
 51 | **메서드:**
 52 | 
 53 | - `is_gms_server_healthy(*, url: str) -> bool` (정적 메서드):
 54 |   - 지정된 GMS 서버의 `/health` 엔드포인트에 요청을 보내 상태를 확인합니다.
 55 |   - Parameters:
 56 |     - `url` (str): 헬스 체크를 수행할 GMS 서버의 기본 URL (예: "http://localhost:8080")
 57 |   - Returns:
 58 |     - `bool`: 서버가 정상적으로 응답하면 `True`, 예외 발생 시 `False`
 59 |   - 기능:
 60 |     - 서버 URL과 `/health` 경로를 결합하여 헬스 체크 엔드포인트 생성
 61 |     - 3초 타임아웃으로 GET 요청 수행
 62 |     - HTTP 200 응답 시 `True` 반환
 63 |     - 다음 예외 상황 처리:
 64 |       - `ConnectTimeout`, `ReadTimeout`: 타임아웃 오류 로깅
 65 |       - `ConnectionError`: 연결 실패 로깅
 66 |       - `HTTPError`: HTTP 오류 로깅
 67 |       - `RequestException`: 기타 요청 예외 로깅
 68 |     - 예외 발생 시 `False` 반환
 69 | 
 70 | **의존성:**
 71 | - `requests`: HTTP 요청 수행
 72 | - `urllib.parse.urljoin`: URL 경로 결합
 73 | - `logging`: 로깅 기능
 74 | 
 75 | **사용 예시:**
 76 | 
 77 | ```python
 78 | from infra.monitoring.check_server import CheckServer
 79 | 
 80 | # GMS 서버 헬스 체크
 81 | is_healthy = CheckServer.is_gms_server_healthy(url="http://localhost:8080")
 82 | 
 83 | if is_healthy:
 84 |     print("서버가 정상입니다.")
 85 | else:
 86 |     print("서버 연결에 문제가 있습니다.")
 87 | ```
 88 | 
 89 | ## Import 및 사용 현황
 90 | 
 91 | ### 사용 위치
 92 | 
 93 | **`interface/app_pages/settings_sections/data_source_section.py`**
 94 | 
 95 | 이 모듈에서 `CheckServer` 클래스를 import하여 사용합니다.
 96 | 
 97 | **Import:**
 98 | ```python
 99 | from infra.monitoring.check_server import CheckServer
100 | ```
101 | 
102 | **사용 방법:**
103 | 
104 | 1. **DataHub 편집 시 헬스 체크** (117번째 줄)
105 |    ```python
106 |    if st.button("헬스 체크", key="dh_edit_health"):
107 |        ok = CheckServer.is_gms_server_healthy(url=new_url)
108 |        st.session_state["datahub_last_health"] = bool(ok)
109 |        if ok:
110 |            st.success("GMS 서버가 정상입니다.")
111 |        else:
112 |            st.error("GMS 서버 헬스 체크 실패. URL과 네트워크를 확인하세요.")
113 |    ```
114 | 
115 | 2. **DataHub 추가 시 헬스 체크** (160번째 줄)
116 |    ```python
117 |    if st.button("헬스 체크", key="dh_health_new"):
118 |        ok = CheckServer.is_gms_server_healthy(url=dh_url)
119 |        st.session_state["datahub_last_health"] = bool(ok)
120 |        if ok:
121 |            st.success("GMS 서버가 정상입니다.")
122 |        else:
123 |            st.error("GMS 서버 헬스 체크 실패. URL과 네트워크를 확인하세요.")
124 |    ```
125 | 
126 | **사용 목적:**
127 | - Streamlit UI에서 DataHub 서버 설정 시, 사용자가 입력한 URL이 유효한지 확인
128 | - 헬스 체크 결과를 세션 상태에 저장하여 상태 배너에 표시
129 | - 서버 연결 성공/실패에 따라 사용자에게 적절한 피드백 제공
130 | 
131 | ## 로깅
132 | 
133 | 모듈은 Python의 `logging` 모듈을 사용하여 다음 정보를 로깅합니다:
134 | - 서버가 정상일 때: INFO 레벨로 성공 메시지
135 | - 타임아웃 발생 시: ERROR 레벨로 타임아웃 오류 메시지
136 | - 연결 실패 시: ERROR 레벨로 연결 오류 메시지
137 | - HTTP 오류 발생 시: ERROR 레벨로 HTTP 오류 메시지
138 | - 기타 요청 예외 발생 시: ERROR 레벨로 예외 정보 로깅
139 | 
140 | 로깅 레벨은 `INFO`로 설정되어 있으며, 타임스탬프와 로그 레벨 정보가 포함됩니다.
141 | 
142 | **로깅 포맷:**
143 | ```
144 | %(asctime)s [%(levelname)s] %(message)s
145 | ```
146 | 
147 | 날짜 형식: `%Y-%m-%d %H:%M:%S`
148 | 
149 | 


--------------------------------------------------------------------------------
/interface/core/dialects.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 다이얼렉트 프리셋과 옵션 정의 모듈.
  3 | 
  4 | 이 모듈은 다음을 제공합니다:
  5 | 
  6 | - DialectOption: 각 SQL 엔진 특성 데이터클래스
  7 |   - name: 엔진 표시 이름 (예: "PostgreSQL", "ClickHouse")
  8 |   - supports_ilike: 대소문자 무시 비교(ILIKE) 지원 여부
  9 |   - hints: 자주 쓰이는/효과적인 함수의 간결 목록 + 짧은 메모
 10 |     - 예: ["DATE_TRUNC (날짜 절단)", "STRING_AGG (문자 집계)"]
 11 | 
 12 | - PRESET_DIALECTS: 대표 SQL 엔진들의 기본 프리셋 모음
 13 |   - PostgreSQL, ClickHouse, Trino, Snowflake, Redshift, BigQuery, MSSQL, Oracle, DuckDB
 14 | 
 15 | 주 사용처:
 16 | - Streamlit UI에서 프리셋 선택 및 커스텀 다이얼렉트 입력의 기준 데이터
 17 | - Lang2SQL 파이프라인에서 프롬프트/키워드 힌트 구성
 18 | 
 19 | 주의:
 20 | - hints는 프롬프트 가이드용이며 실행 보장을 의미하지 않습니다.
 21 | - 실제 문법/함수 지원은 엔진 버전 및 설정에 따라 달라질 수 있습니다.
 22 | """
 23 | 
 24 | from __future__ import annotations
 25 | 
 26 | from dataclasses import asdict, dataclass, field
 27 | from typing import Dict, List
 28 | 
 29 | 
 30 | @dataclass
 31 | class DialectOption:
 32 |     name: str
 33 |     supports_ilike: bool = False
 34 |     hints: List[str] = field(default_factory=list)
 35 | 
 36 |     def to_dict(self) -> Dict:
 37 |         return asdict(self)
 38 | 
 39 |     @staticmethod
 40 |     def from_dict(data: Dict) -> "DialectOption":
 41 |         return DialectOption(
 42 |             name=data.get("name", "Custom"),
 43 |             supports_ilike=bool(data.get("supports_ilike", False)),
 44 |             hints=list(data.get("hints", data.get("keyword_hints", []))),
 45 |         )
 46 | 
 47 | 
 48 | PRESET_DIALECTS: Dict[str, DialectOption] = {
 49 |     "PostgreSQL": DialectOption(
 50 |         name="PostgreSQL",
 51 |         supports_ilike=True,
 52 |         hints=[
 53 |             "COALESCE (널 대체)",
 54 |             "DATE_TRUNC (날짜 절단)",
 55 |             "STRING_AGG (문자 집계)",
 56 |             "GENERATE_SERIES (시퀀스 생성)",
 57 |         ],
 58 |     ),
 59 |     "ClickHouse": DialectOption(
 60 |         name="ClickHouse",
 61 |         supports_ilike=False,
 62 |         hints=[
 63 |             "toDate (날짜 변환)",
 64 |             "dateDiff (날짜 차이)",
 65 |             "arrayJoin (배열 펼치기)",
 66 |             "groupArray (배열 집계)",
 67 |         ],
 68 |     ),
 69 |     "Trino": DialectOption(
 70 |         name="Trino",
 71 |         supports_ilike=False,
 72 |         hints=[
 73 |             "date_trunc (날짜 절단)",
 74 |             "try_cast (안전 변환)",
 75 |             "coalesce (널 대체)",
 76 |             "regexp_like (정규식 매칭)",
 77 |         ],
 78 |     ),
 79 |     "Snowflake": DialectOption(
 80 |         name="Snowflake",
 81 |         supports_ilike=True,
 82 |         hints=[
 83 |             "IFF (조건 분기)",
 84 |             "TO_DATE (날짜 변환)",
 85 |             "DATE_TRUNC (날짜 절단)",
 86 |             "LISTAGG (문자 집계)",
 87 |         ],
 88 |     ),
 89 |     "Redshift": DialectOption(
 90 |         name="Redshift",
 91 |         supports_ilike=True,
 92 |         hints=[
 93 |             "COALESCE (널 대체)",
 94 |             "DATE_TRUNC (날짜 절단)",
 95 |             "LISTAGG (문자 집계)",
 96 |             "REGEXP_REPLACE (정규식 치환)",
 97 |         ],
 98 |     ),
 99 |     "BigQuery": DialectOption(
100 |         name="BigQuery",
101 |         supports_ilike=False,
102 |         hints=[
103 |             "SAFE_CAST (안전 변환)",
104 |             "DATE_TRUNC (날짜 절단)",
105 |             "ARRAY_AGG (배열 집계)",
106 |             "REGEXP_CONTAINS (정규식 포함)",
107 |         ],
108 |     ),
109 |     "MSSQL": DialectOption(
110 |         name="MSSQL",
111 |         supports_ilike=False,
112 |         hints=[
113 |             "ISNULL (널 대체)",
114 |             "DATEADD (날짜 가감)",
115 |             "CONVERT (형 변환)",
116 |             "STRING_AGG (문자 집계)",
117 |         ],
118 |     ),
119 |     "Oracle": DialectOption(
120 |         name="Oracle",
121 |         supports_ilike=False,
122 |         hints=[
123 |             "NVL (널 대체)",
124 |             "TO_DATE (날짜 변환)",
125 |             "TRUNC (날짜 절단)",
126 |             "LISTAGG (문자 집계)",
127 |         ],
128 |     ),
129 |     "DuckDB": DialectOption(
130 |         name="DuckDB",
131 |         supports_ilike=True,
132 |         hints=[
133 |             "date_trunc (날짜 절단)",
134 |             "string_agg (문자 집계)",
135 |             "coalesce (널 대체)",
136 |             "regexp_replace (정규식 치환)",
137 |         ],
138 |     ),
139 | }
140 | 


--------------------------------------------------------------------------------
/interface/core/config/registry_data_sources.py:
--------------------------------------------------------------------------------
  1 | """DataHub/VectorDB 소스 레지스트리를 세션+디스크에 관리하는 모듈입니다.
  2 | get/add/update/delete 연산과 Streamlit 세션 연동을 제공합니다.
  3 | """
  4 | 
  5 | from typing import Optional
  6 | 
  7 | try:
  8 |     import streamlit as st  # type: ignore
  9 | except Exception:  # pragma: no cover
 10 |     st = None  # type: ignore
 11 | 
 12 | from .models import DataSourcesRegistry, DataHubSource, VectorDBSource
 13 | from .persist import (
 14 |     load_registry_from_disk,
 15 |     save_registry_to_disk,
 16 | )
 17 | 
 18 | 
 19 | def get_data_sources_registry() -> DataSourcesRegistry:
 20 |     if st is not None and "data_sources_registry" in st.session_state:
 21 |         reg = st.session_state["data_sources_registry"]
 22 |         return reg  # stored as DataSourcesRegistry
 23 |     # Try load from disk
 24 |     try:
 25 |         registry = load_registry_from_disk()
 26 |     except Exception:
 27 |         registry = DataSourcesRegistry()
 28 |     if st is not None:
 29 |         st.session_state["data_sources_registry"] = registry
 30 |     return registry
 31 | 
 32 | 
 33 | def _save_registry(registry: DataSourcesRegistry) -> None:
 34 |     if st is not None:
 35 |         st.session_state["data_sources_registry"] = registry
 36 |     try:
 37 |         save_registry_to_disk(registry)
 38 |     except Exception:
 39 |         # fail-soft; UI will still have session copy
 40 |         pass
 41 | 
 42 | 
 43 | def add_datahub_source(
 44 |     *, name: str, url: str, faiss_path: Optional[str] = None, note: Optional[str] = None
 45 | ) -> None:
 46 |     registry = get_data_sources_registry()
 47 |     if any(s.name == name for s in registry.datahub):
 48 |         raise ValueError(f"이미 존재하는 DataHub 이름입니다: {name}")
 49 |     registry.datahub.append(
 50 |         DataHubSource(name=name, url=url, faiss_path=faiss_path, note=note)
 51 |     )
 52 |     _save_registry(registry)
 53 | 
 54 | 
 55 | def update_datahub_source(
 56 |     *, name: str, url: str, faiss_path: Optional[str], note: Optional[str]
 57 | ) -> None:
 58 |     registry = get_data_sources_registry()
 59 |     for idx, s in enumerate(registry.datahub):
 60 |         if s.name == name:
 61 |             registry.datahub[idx] = DataHubSource(
 62 |                 name=name, url=url, faiss_path=faiss_path, note=note
 63 |             )
 64 |             _save_registry(registry)
 65 |             return
 66 |     raise ValueError(f"존재하지 않는 DataHub 이름입니다: {name}")
 67 | 
 68 | 
 69 | def delete_datahub_source(*, name: str) -> None:
 70 |     registry = get_data_sources_registry()
 71 |     registry.datahub = [s for s in registry.datahub if s.name != name]
 72 |     _save_registry(registry)
 73 | 
 74 | 
 75 | def add_vectordb_source(
 76 |     *,
 77 |     name: str,
 78 |     vtype: str,
 79 |     location: str,
 80 |     collection_prefix: Optional[str] = None,
 81 |     note: Optional[str] = None,
 82 | ) -> None:
 83 |     vtype = (vtype or "").lower()
 84 |     if vtype not in ("faiss", "pgvector"):
 85 |         raise ValueError("VectorDB 타입은 'faiss' 또는 'pgvector'여야 합니다")
 86 |     registry = get_data_sources_registry()
 87 |     if any(s.name == name for s in registry.vectordb):
 88 |         raise ValueError(f"이미 존재하는 VectorDB 이름입니다: {name}")
 89 |     registry.vectordb.append(
 90 |         VectorDBSource(
 91 |             name=name,
 92 |             type=vtype,
 93 |             location=location,
 94 |             collection_prefix=collection_prefix,
 95 |             note=note,
 96 |         )
 97 |     )
 98 |     _save_registry(registry)
 99 | 
100 | 
101 | def update_vectordb_source(
102 |     *,
103 |     name: str,
104 |     vtype: str,
105 |     location: str,
106 |     collection_prefix: Optional[str],
107 |     note: Optional[str],
108 | ) -> None:
109 |     vtype = (vtype or "").lower()
110 |     if vtype not in ("faiss", "pgvector"):
111 |         raise ValueError("VectorDB 타입은 'faiss' 또는 'pgvector'여야 합니다")
112 |     registry = get_data_sources_registry()
113 |     for idx, s in enumerate(registry.vectordb):
114 |         if s.name == name:
115 |             registry.vectordb[idx] = VectorDBSource(
116 |                 name=name,
117 |                 type=vtype,
118 |                 location=location,
119 |                 collection_prefix=collection_prefix,
120 |                 note=note,
121 |             )
122 |             _save_registry(registry)
123 |             return
124 |     raise ValueError(f"존재하지 않는 VectorDB 이름입니다: {name}")
125 | 
126 | 
127 | def delete_vectordb_source(*, name: str) -> None:
128 |     registry = get_data_sources_registry()
129 |     registry.vectordb = [s for s in registry.vectordb if s.name != name]
130 |     _save_registry(registry)
131 | 


--------------------------------------------------------------------------------
/utils/llm/chains.py:
--------------------------------------------------------------------------------
  1 | """
  2 | LLM 체인 생성 모듈.
  3 | 
  4 | 이 모듈은 Lang2SQL에서 사용하는 다양한 LangChain 기반 체인을 정의합니다.
  5 | - Query Maker
  6 | - Query Enrichment
  7 | - Profile Extraction
  8 | - Question Gate (SQL 적합성 분류)
  9 | """
 10 | 
 11 | from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
 12 | from pydantic import BaseModel, Field
 13 | 
 14 | from prompt.template_loader import get_prompt_template
 15 | from utils.llm.core import get_llm
 16 | from utils.llm.output_schema.document_suitability import DocumentSuitabilityList
 17 | from utils.llm.output_schema.question_suitability import QuestionSuitability
 18 | 
 19 | llm = get_llm()
 20 | 
 21 | 
 22 | class QuestionProfile(BaseModel):
 23 |     """
 24 |     자연어 질문의 특징을 구조화해 표현하는 프로파일 모델.
 25 | 
 26 |     이 프로파일은 이후 컨텍스트 보강 및 SQL 생성 시 힌트로 사용됩니다.
 27 |     """
 28 | 
 29 |     is_timeseries: bool = Field(description="시계열 분석 필요 여부")
 30 |     is_aggregation: bool = Field(description="집계 함수 필요 여부")
 31 |     has_filter: bool = Field(description="조건 필터 필요 여부")
 32 |     is_grouped: bool = Field(description="그룹화 필요 여부")
 33 |     has_ranking: bool = Field(description="정렬/순위 필요 여부")
 34 |     has_temporal_comparison: bool = Field(description="기간 비교 포함 여부")
 35 |     intent_type: str = Field(description="질문의 주요 의도 유형")
 36 | 
 37 | 
 38 | # QueryMakerChain
 39 | def create_query_maker_chain(llm):
 40 |     """
 41 |     SQL 쿼리 생성을 위한 체인을 생성합니다.
 42 | 
 43 |     Args:
 44 |         llm: LangChain 호환 LLM 인스턴스
 45 | 
 46 |     Returns:
 47 |         Runnable: 입력 프롬프트를 받아 SQL을 생성하는 체인
 48 |     """
 49 |     prompt = get_prompt_template("query_maker_prompt")
 50 |     query_maker_prompt = ChatPromptTemplate.from_messages(
 51 |         [
 52 |             SystemMessagePromptTemplate.from_template(prompt),
 53 |         ]
 54 |     )
 55 |     return query_maker_prompt | llm
 56 | 
 57 | 
 58 | def create_query_enrichment_chain(llm):
 59 |     """
 60 |     사용자 질문을 메타데이터로 보강하기 위한 체인을 생성합니다.
 61 | 
 62 |     Args:
 63 |         llm: LangChain 호환 LLM 인스턴스
 64 | 
 65 |     Returns:
 66 |         Runnable: 보강된 질문 텍스트를 반환하는 체인
 67 |     """
 68 |     prompt = get_prompt_template("query_enrichment_prompt")
 69 | 
 70 |     enrichment_prompt = ChatPromptTemplate.from_messages(
 71 |         [
 72 |             SystemMessagePromptTemplate.from_template(prompt),
 73 |         ]
 74 |     )
 75 | 
 76 |     chain = enrichment_prompt | llm
 77 |     return chain
 78 | 
 79 | 
 80 | def create_profile_extraction_chain(llm):
 81 |     """
 82 |     질문으로부터 `QuestionProfile`을 추출하는 체인을 생성합니다.
 83 | 
 84 |     Args:
 85 |         llm: LangChain 호환 LLM 인스턴스
 86 | 
 87 |     Returns:
 88 |         Runnable: `QuestionProfile` 구조화 출력을 반환하는 체인
 89 |     """
 90 |     prompt = get_prompt_template("profile_extraction_prompt")
 91 | 
 92 |     profile_prompt = ChatPromptTemplate.from_messages(
 93 |         [
 94 |             SystemMessagePromptTemplate.from_template(prompt),
 95 |         ]
 96 |     )
 97 | 
 98 |     chain = profile_prompt | llm.with_structured_output(QuestionProfile)
 99 |     return chain
100 | 
101 | 
102 | def create_question_gate_chain(llm):
103 |     """
104 |     질문 적합성(Question Gate) 체인을 생성합니다.
105 | 
106 |     ChatPromptTemplate(SystemMessage) + LLM 구조화 출력으로
107 |     `QuestionSuitability`를 반환합니다.
108 | 
109 |     Args:
110 |         llm: LangChain 호환 LLM 인스턴스
111 | 
112 |     Returns:
113 |         Runnable: invoke({"question": str}) -> QuestionSuitability
114 |     """
115 | 
116 |     prompt = get_prompt_template("question_gate_prompt")
117 |     gate_prompt = ChatPromptTemplate.from_messages(
118 |         [SystemMessagePromptTemplate.from_template(prompt)]
119 |     )
120 |     return gate_prompt | llm.with_structured_output(QuestionSuitability)
121 | 
122 | 
123 | def create_document_suitability_chain(llm):
124 |     """
125 |     문서 적합성 평가 체인을 생성합니다.
126 | 
127 |     질문(question)과 검색 결과(tables)를 입력으로 받아
128 |     테이블별 적합도 점수를 포함한 JSON 딕셔너리를 반환합니다.
129 | 
130 |     Returns:
131 |         Runnable: invoke({"question": str, "tables": dict}) -> {"results": DocumentSuitability[]}
132 |     """
133 | 
134 |     prompt = get_prompt_template("document_suitability_prompt")
135 |     doc_prompt = ChatPromptTemplate.from_messages(
136 |         [SystemMessagePromptTemplate.from_template(prompt)]
137 |     )
138 |     return doc_prompt | llm.with_structured_output(DocumentSuitabilityList)
139 | 
140 | 
141 | query_maker_chain = create_query_maker_chain(llm)
142 | profile_extraction_chain = create_profile_extraction_chain(llm)
143 | query_enrichment_chain = create_query_enrichment_chain(llm)
144 | question_gate_chain = create_question_gate_chain(llm)
145 | document_suitability_chain = create_document_suitability_chain(llm)
146 | 


--------------------------------------------------------------------------------
/engine/query_executor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Lang2SQL 쿼리 실행을 위한 공용 모듈입니다.
  3 | 
  4 | 이 모듈은 CLI와 Streamlit 인터페이스에서 공통으로 사용할 수 있는
  5 | 쿼리 실행 함수를 제공합니다.
  6 | """
  7 | 
  8 | import logging
  9 | from typing import Any, Dict, Optional, Union
 10 | 
 11 | from langchain_core.messages import HumanMessage
 12 | 
 13 | from utils.llm.graph_utils.basic_graph import builder as basic_builder
 14 | from utils.llm.graph_utils.enriched_graph import builder as enriched_builder
 15 | from utils.llm.llm_response_parser import LLMResponseParser
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | 
 20 | def execute_query(
 21 |     *,
 22 |     query: str,
 23 |     database_env: str,
 24 |     retriever_name: str = "기본",
 25 |     top_n: int = 5,
 26 |     device: str = "cpu",
 27 |     use_enriched_graph: bool = False,
 28 |     session_state: Optional[Union[Dict[str, Any], Any]] = None,
 29 | ) -> Dict[str, Any]:
 30 |     """
 31 |     자연어 쿼리를 SQL로 변환하고 실행 결과를 반환하는 공용 함수입니다.
 32 | 
 33 |     이 함수는 Lang2SQL 파이프라인(graph)을 사용하여 사용자의 자연어 질문을
 34 |     SQL 쿼리로 변환하고 관련 메타데이터와 함께 결과를 반환합니다.
 35 |     CLI와 Streamlit 인터페이스에서 공통으로 사용할 수 있습니다.
 36 | 
 37 |     Args:
 38 |         query (str): 사용자가 입력한 자연어 기반 질문.
 39 |         database_env (str): 사용할 데이터베이스 환경 이름 또는 키 (예: "dev", "prod").
 40 |         retriever_name (str, optional): 테이블 검색기 이름. 기본값은 "기본".
 41 |         top_n (int, optional): 검색된 상위 테이블 수 제한. 기본값은 5.
 42 |         device (str, optional): LLM 실행에 사용할 디바이스 ("cpu" 또는 "cuda"). 기본값은 "cpu".
 43 |         use_enriched_graph (bool, optional): 확장된 그래프 사용 여부. 기본값은 False.
 44 |         session_state (Optional[Union[Dict[str, Any], Any]], optional): Streamlit 세션 상태 (Streamlit에서만 사용).
 45 | 
 46 |     Returns:
 47 |         Dict[str, Any]: 다음 정보를 포함한 Lang2SQL 실행 결과 딕셔너리:
 48 |             - "generated_query": 생성된 SQL 쿼리 (`AIMessage`)
 49 |             - "messages": 전체 LLM 응답 메시지 목록
 50 |             - "searched_tables": 참조된 테이블 목록 등 추가 정보
 51 |     """
 52 |     logger.info("Processing query: %s", query)
 53 | 
 54 |     # 그래프 선택
 55 |     if use_enriched_graph:
 56 |         graph_type = "enriched"
 57 |         graph_builder = enriched_builder
 58 |     else:
 59 |         graph_type = "basic"
 60 |         graph_builder = basic_builder
 61 | 
 62 |     logger.info("Using %s graph", graph_type)
 63 | 
 64 |     # 그래프 선택 및 컴파일
 65 |     if session_state is not None:
 66 |         # Streamlit 환경: 세션 상태에서 그래프 재사용
 67 |         graph = session_state.get("graph")
 68 |         if graph is None:
 69 |             graph = graph_builder.compile()
 70 |             session_state["graph"] = graph
 71 |     else:
 72 |         # CLI 환경: 매번 새로운 그래프 컴파일
 73 |         graph = graph_builder.compile()
 74 | 
 75 |     # 그래프 실행
 76 |     res = graph.invoke(
 77 |         input={
 78 |             "messages": [HumanMessage(content=query)],
 79 |             "user_database_env": database_env,
 80 |             "best_practice_query": "",
 81 |             "retriever_name": retriever_name,
 82 |             "top_n": top_n,
 83 |             "device": device,
 84 |             # 다이얼렉트 정보 주입 (있다면 세션에서, 없으면 기본값)
 85 |             "dialect_name": (
 86 |                 session_state.get("selected_dialect_option", {}).get("name")
 87 |                 if session_state is not None
 88 |                 else database_env
 89 |             ),
 90 |             "supports_ilike": (
 91 |                 bool(
 92 |                     session_state.get("selected_dialect_option", {}).get(
 93 |                         "supports_ilike", False
 94 |                     )
 95 |                 )
 96 |                 if session_state is not None
 97 |                 else False
 98 |             ),
 99 |             "dialect_hints": (
100 |                 session_state.get("selected_dialect_option", {}).get("hints", [])
101 |                 if session_state is not None
102 |                 else []
103 |             ),
104 |         }
105 |     )
106 | 
107 |     return res
108 | 
109 | 
110 | def extract_sql_from_result(res: Dict[str, Any]) -> Optional[str]:
111 |     """
112 |     Lang2SQL 실행 결과에서 SQL 쿼리를 추출합니다.
113 | 
114 |     Args:
115 |         res (Dict[str, Any]): execute_query 함수의 반환 결과
116 | 
117 |     Returns:
118 |         Optional[str]: 추출된 SQL 쿼리 문자열. 추출 실패 시 None
119 |     """
120 |     generated_query = res.get("generated_query")
121 |     if not generated_query:
122 |         logger.error("생성된 쿼리가 없습니다.")
123 |         return None
124 | 
125 |     query_text = (
126 |         generated_query.content
127 |         if hasattr(generated_query, "content")
128 |         else str(generated_query)
129 |     )
130 | 
131 |     try:
132 |         sql = LLMResponseParser.extract_sql(query_text)
133 |         return sql
134 |     except ValueError:
135 |         logger.error("SQL을 추출할 수 없습니다.")
136 |         return None
137 | 


--------------------------------------------------------------------------------
/utils/databases/connector/trino_connector.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Trino 데이터베이스 커넥터 모듈.
  3 | 
  4 | 이 모듈은 Trino 클러스터에 연결하여 SQL 쿼리를 실행하고,
  5 | 그 결과를 pandas DataFrame 형태로 반환하는 기능을 제공합니다.
  6 | """
  7 | 
  8 | import pandas as pd
  9 | 
 10 | from utils.databases.config import DBConfig
 11 | from utils.databases.connector.base_connector import BaseConnector
 12 | from utils.databases.logger import logger
 13 | 
 14 | 
 15 | class TrinoConnector(BaseConnector):
 16 |     """
 17 |     Trino 데이터베이스 커넥터 클래스.
 18 | 
 19 |     Trino 클러스터에 연결하여 SQL 쿼리를 실행하거나
 20 |     연결을 종료하는 기능을 제공합니다.
 21 |     """
 22 | 
 23 |     connection = None
 24 | 
 25 |     def __init__(self, config: DBConfig):
 26 |         """
 27 |         TrinoConnector 인스턴스를 초기화합니다.
 28 | 
 29 |         Args:
 30 |             config (DBConfig): Trino 연결 정보를 담은 설정 객체.
 31 |                 - 필수 키: host, port
 32 |                 - 선택 키: user, password, database, extra.catalog, extra.schema, extra.http_scheme
 33 |                 - database가 "catalog.schema" 형태일 경우 자동으로 분리되어 설정됩니다.
 34 |         """
 35 |         # pylint: disable=import-outside-toplevel
 36 |         try:
 37 |             import trino
 38 | 
 39 |             self.trino = trino
 40 |         except ImportError as e:
 41 |             logger.error(
 42 |                 "Trino 드라이버가 설치되어 있지 않습니다. pip install trino 명령을 실행하세요."
 43 |             )
 44 |             raise ImportError("Trino 라이브러리가 설치되어 있지 않습니다.") from e
 45 | 
 46 |         self.host = config["host"]
 47 |         self.port = config["port"] or 8080
 48 |         self.user = config.get("user") or "anonymous"
 49 |         self.password = config.get("password")
 50 |         self.database = config.get("database")  # e.g., catalog.schema
 51 |         self.extra = config.get("extra") or {}
 52 |         self.http_scheme = self.extra.get("http_scheme", "http")
 53 |         self.catalog = self.extra.get("catalog")
 54 |         self.schema = self.extra.get("schema")
 55 | 
 56 |         # If database given as "catalog.schema", split into fields
 57 |         if self.database and (not self.catalog or not self.schema):
 58 |             if "." in self.database:
 59 |                 db_catalog, db_schema = self.database.split(".", 1)
 60 |                 self.catalog = self.catalog or db_catalog
 61 |                 self.schema = self.schema or db_schema
 62 | 
 63 |         self.connect()
 64 | 
 65 |     def connect(self) -> None:
 66 |         """
 67 |         Trino 클러스터에 연결을 설정합니다.
 68 | 
 69 |         Raises:
 70 |             ImportError: trino 드라이버를 불러오지 못한 경우 발생합니다.
 71 |             ConnectionError: Trino 서버 연결에 실패한 경우 발생합니다.
 72 |         """
 73 |         try:
 74 |             auth = None
 75 |             if self.password and self.http_scheme == "https":
 76 |                 auth = self.trino.auth.BasicAuthentication(self.user, self.password)
 77 | 
 78 |             self.connection = self.trino.dbapi.connect(
 79 |                 host=self.host,
 80 |                 port=self.port,
 81 |                 user=self.user,
 82 |                 http_scheme=self.http_scheme,
 83 |                 catalog=self.catalog,
 84 |                 schema=self.schema,
 85 |                 auth=auth,
 86 |             )
 87 |             logger.info("Successfully connected to Trino.")
 88 |         except Exception as e:
 89 |             logger.error("Failed to connect to Trino: %s", e)
 90 |             raise
 91 | 
 92 |     def run_sql(self, sql: str) -> pd.DataFrame:
 93 |         """
 94 |         SQL 쿼리를 실행하고 결과를 pandas DataFrame으로 반환합니다.
 95 | 
 96 |         Args:
 97 |             sql (str): 실행할 SQL 쿼리 문자열.
 98 | 
 99 |         Returns:
100 |             pd.DataFrame: 쿼리 결과를 담은 DataFrame 객체.
101 | 
102 |         Raises:
103 |             RuntimeError: SQL 실행 중 오류가 발생한 경우.
104 |         """
105 |         try:
106 |             cursor = self.connection.cursor()
107 |             cursor.execute(sql)
108 |             columns = (
109 |                 [desc[0] for desc in cursor.description] if cursor.description else []
110 |             )
111 |             rows = cursor.fetchall() if cursor.description else []
112 |             return pd.DataFrame(rows, columns=columns)
113 |         except Exception as e:
114 |             logger.error("Failed to execute SQL query on Trino: %s", e)
115 |             raise
116 |         finally:
117 |             try:
118 |                 cursor.close()
119 |                 logger.info("Cursor closed successfully.")
120 |             except Exception as e:  # pylint: disable=broad-exception-caught
121 |                 logger.error("Failed to close cursor: %s", e)
122 | 
123 |     def close(self) -> None:
124 |         """
125 |         Trino 클러스터와의 연결을 종료합니다.
126 | 
127 |         연결이 존재할 경우 안전하게 닫고 리소스를 해제합니다.
128 |         """
129 |         if self.connection:
130 |             self.connection.close()
131 |             logger.info("Connection to Trino closed.")
132 |         self.connection = None
133 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
  1 | ###############################################
  2 | ############## LLM API SELECTION ##############
  3 | ###############################################
  4 | LLM_PROVIDER=openai
  5 | 
  6 | OPEN_AI_KEY=sk-proj-
  7 | LANGCHAIN_TRACING_V2=true
  8 | LANGCHAIN_PROJECT=langgraph_tutorial
  9 | LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
 10 | LANGCHAIN_API_KEY=lsv2_
 11 | 
 12 | 
 13 | 
 14 | # LLM_PROVIDER=openai
 15 | # OPEN_AI_KEY=sk-proj-----
 16 | OPEN_AI_LLM_MODEL=gpt-4.1
 17 | 
 18 | # LLM_PROVIDER=gemini
 19 | # GEMINI_API_KEY=
 20 | # GEMINI_LLM_MODEL=gemini-2.0-flash-lite
 21 | 
 22 | # LLM_PROVIDER=azure
 23 | # AZURE_OPENAI_LLM_ENDPOINT=https://-------.openai.azure.com/
 24 | # AZURE_OPENAI_LLM_KEY=-
 25 | # AZURE_OPENAI_LLM_MODEL=gpt4o
 26 | # AZURE_OPENAI_LLM_API_VERSION=2024-07-01-preview
 27 | 
 28 | # LLM_PROVIDER=ollama
 29 | # OLLAMA_LLM_BASE_URL=
 30 | # OLLAMA_LLM_MODEL=
 31 | 
 32 | # LLM_PROVIDER=huggingface
 33 | # HUGGING_FACE_LLM_REPO_ID=
 34 | # HUGGING_FACE_LLM_ENDPOINT=
 35 | # HUGGING_FACE_LLM_API_TOKEN=
 36 | 
 37 | # LLM_PROVIDER=bedrock
 38 | # AWS_BEDROCK_LLM_ACCESS_KEY_ID=
 39 | # AWS_BEDROCK_LLM_SECRET_ACCESS_KEY=
 40 | # AWS_BEDROCK_LLM_REGION=us-west-2
 41 | # AWS_BEDROCK_LLM_ENDPOINT_URL=https://bedrock.us-west-2.amazonaws.com
 42 | # AWS_BEDROCK_LLM_MODEL=anthropic.claude-3-5-sonnet-20241022-v2:0\
 43 | 
 44 | ###############################################
 45 | ########### Embedding API SElECTION ###########
 46 | ###############################################
 47 | # Only used if you are using an LLM that does not natively support embedding (openai or Azure)
 48 | EMBEDDING_PROVIDER='openai'
 49 | OPEN_AI_EMBEDDING_MODEL='text-embedding-ada-002'
 50 | 
 51 | # EMBEDDING_PROVIDER=azure
 52 | # AZURE_OPENAI_EMBEDDING_ENDPOINT=https://-------.openai.azure.com/openai/deployments
 53 | # AZURE_OPENAI_EMBEDDING_KEY=-
 54 | # AZURE_OPENAI_EMBEDDING_MODEL='textembeddingada002' # This is the "deployment" on Azure you want to use for embeddings. Not the base model. Valid base model is text-embedding-ada-002
 55 | # AZURE_OPENAI_EMBEDDING_API_VERSION=2023-09-15-preview
 56 | 
 57 | # EMBEDDING_PROVIDER='ollama'
 58 | # EMBEDDING_BASE_PATH='http://host.docker.internal:11434'
 59 | # EMBEDDING_MODEL='nomic-embed-text:latest'
 60 | # EMBEDDING_MODEL_MAX_CHUNK_LENGTH=8192
 61 | 
 62 | # EMBEDDING_PROVIDER='bedrock'
 63 | # AWS_BEDROCK_EMBEDDING_ACCESS_KEY_ID=--
 64 | # AWS_BEDROCK_EMBEDDING_SECRET_ACCESS_KEY=-/-+-+-
 65 | # AWS_BEDROCK_EMBEDDING_REGION=us-west-2
 66 | # AWS_BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
 67 | 
 68 | # EMBEDDING_PROVIDER='gemini'
 69 | # GEMINI_EMBEDDING_API_KEY=
 70 | # EMBEDDING_MODEL='text-embedding-004'
 71 | 
 72 | # EMBEDDING_PROVIDER='huggingface'
 73 | # HUGGING_FACE_EMBEDDING_REPO_ID=
 74 | # HUGGING_FACE_EMBEDDING_MODEL=
 75 | # HUGGING_FACE_EMBEDDING_API_TOKEN=
 76 | 
 77 | DATAHUB_SERVER = 'http://localhost:8080'
 78 | 
 79 | 
 80 | ###############################################
 81 | ######## Database Connector SELECTION #########
 82 | ###############################################
 83 | 
 84 | # clickhouse
 85 | DB_TYPE=clickhouse
 86 | CLICKHOUSE_HOST=localhost
 87 | CLICKHOUSE_PORT=9001
 88 | CLICKHOUSE_USER=clickhouse
 89 | CLICKHOUSE_PASSWORD=clickhouse
 90 | CLICKHOUSE_DATABASE=default
 91 | 
 92 | # databricks
 93 | # DB_TYPE=databricks
 94 | # DATABRICKS_HOST=_
 95 | # DATABRICKS_HTTP_PATH=_
 96 | # DATABRICKS_ACCESS_TOKEN=_
 97 | 
 98 | # duckdb
 99 | # DB_TYPE=duckdb
100 | # DUCKDB_PATH=./data/duckdb.db
101 | 
102 | # mariadb
103 | # DB_TYPE=mariadb
104 | # MARIADB_HOST=_
105 | # MARIADB_PORT=3306
106 | # MARIADB_USER=_
107 | # MARIADB_PASSWORD=_
108 | # MARIADB_DATABASE=_
109 | 
110 | # mysql
111 | # DB_TYPE=mysql
112 | # MYSQL_HOST=_
113 | # MYSQL_PORT=3306
114 | # MYSQL_USER=_
115 | # MYSQL_PASSWORD=_
116 | # MYSQL_DATABASE=_
117 | 
118 | # oracle
119 | # DB_TYPE=oracle
120 | # ORACLE_HOST=_
121 | # ORACLE_PORT=1521
122 | # ORACLE_USER=_
123 | # ORACLE_PASSWORD=_
124 | # ORACLE_DATABASE=_
125 | # ORACLE_SERVICE_NAME=_
126 | 
127 | # postgresql
128 | # DB_TYPE=postgresql
129 | # POSTGRESQL_HOST=_
130 | # POSTGRESQL_PORT=5432
131 | # POSTGRESQL_USER=_
132 | # POSTGRESQL_PASSWORD=_
133 | # POSTGRESQL_DATABASE=_
134 | 
135 | # snowflake
136 | # DB_TYPE=snowflake
137 | # SNOWFLAKE_USER=_
138 | # SNOWFLAKE_PASSWORD=_
139 | # SNOWFLAKE_ACCOUNT=_
140 | 
141 | # sqlite
142 | # DB_TYPE=sqlite
143 | # SQLITE_PATH=./data/sqlite.db
144 | 
145 | 
146 | # pgvector 설정 (VECTORDB_TYPE=pgvector일 때 사용)
147 | PGVECTOR_HOST=localhost
148 | PGVECTOR_PORT=5432
149 | PGVECTOR_USER=postgres
150 | PGVECTOR_PASSWORD=postgres
151 | PGVECTOR_DATABASE=postgres
152 | PGVECTOR_COLLECTION=table_info_db
153 | 
154 | # VectorDB 설정
155 | VECTORDB_TYPE=faiss  # faiss 또는 pgvector
156 | 
157 | 
158 | # TRINO_HOST=localhost
159 | # TRINO_PORT=8080
160 | # TRINO_USER=admin
161 | # TRINO_PASSWORD=password
162 | # TRINO_CATALOG=delta
163 | # TRINO_SCHEMA=default
164 | 


--------------------------------------------------------------------------------
/cli/utils/README.md:
--------------------------------------------------------------------------------
  1 | # CLI Utils 모듈
  2 | 
  3 | CLI 애플리케이션에서 사용되는 유틸리티 함수들을 제공하는 모듈입니다.
  4 | 
  5 | ## 디렉토리 구조
  6 | 
  7 | ```
  8 | cli/utils/
  9 | ├── __pycache__/
 10 | ├── env_loader.py
 11 | ├── logger.py
 12 | └── README.md
 13 | ```
 14 | 
 15 | ## 파일 목록 및 설명
 16 | 
 17 | ### env_loader.py
 18 | 
 19 | 환경 변수 유틸리티 모듈입니다. `.env` 파일 로드, 프롬프트 디렉토리 설정, VectorDB 타입 및 위치 설정을 제공합니다.
 20 | 
 21 | **주요 함수:**
 22 | 
 23 | #### `load_env(env_file_path: Optional[str] = None) -> None`
 24 | 환경 변수 파일(.env)을 로드합니다.
 25 | 
 26 | **파라미터:**
 27 | - `env_file_path` (Optional[str]): .env 파일 경로. None이면 기본 경로 사용.
 28 | 
 29 | **동작:**
 30 | - 지정된 경로의 `.env` 파일을 로드하거나, 경로가 없으면 기본 경로의 `.env` 파일을 로드합니다.
 31 | - 성공/실패 메시지를 컬러로 출력합니다.
 32 | - 로드 실패 시 예외를 발생시킵니다.
 33 | 
 34 | **사용 예시:**
 35 | ```python
 36 | from cli.utils.env_loader import load_env
 37 | 
 38 | # 기본 .env 파일 로드
 39 | load_env()
 40 | 
 41 | # 특정 경로의 .env 파일 로드
 42 | load_env(env_file_path="/path/to/.env")
 43 | ```
 44 | 
 45 | #### `set_prompt_dir(prompt_dir_path: Optional[str]) -> None`
 46 | 프롬프트 템플릿 디렉토리 경로를 환경 변수로 설정합니다.
 47 | 
 48 | **파라미터:**
 49 | - `prompt_dir_path` (Optional[str]): 디렉토리 경로. None이면 설정하지 않음.
 50 | 
 51 | **환경 변수:**
 52 | - `PROMPT_TEMPLATES_DIR`: 설정된 프롬프트 디렉토리 경로
 53 | 
 54 | **Raises:**
 55 | - `ValueError`: 경로가 유효하지 않을 경우
 56 | 
 57 | **사용 예시:**
 58 | ```python
 59 | from cli.utils.env_loader import set_prompt_dir
 60 | 
 61 | set_prompt_dir(prompt_dir_path="/path/to/prompt/templates")
 62 | ```
 63 | 
 64 | #### `set_vectordb(vectordb_type: str, vectordb_location: Optional[str] = None) -> None`
 65 | VectorDB 타입과 위치를 환경 변수로 설정합니다.
 66 | 
 67 | **파라미터:**
 68 | - `vectordb_type` (str): VectorDB 타입 ("faiss" 또는 "pgvector")
 69 | - `vectordb_location` (Optional[str]): 경로 또는 연결 URL
 70 | 
 71 | **환경 변수:**
 72 | - `VECTORDB_TYPE`: 설정된 VectorDB 타입
 73 | - `VECTORDB_LOCATION`: 설정된 VectorDB 경로 또는 연결 URL (지정된 경우)
 74 | 
 75 | **Raises:**
 76 | - `ValueError`: 잘못된 타입이나 경로/URL일 경우
 77 | 
 78 | **사용 예시:**
 79 | ```python
 80 | from cli.utils.env_loader import set_vectordb
 81 | 
 82 | # FAISS 설정
 83 | set_vectordb(vectordb_type="faiss", vectordb_location="/path/to/faiss/db")
 84 | 
 85 | # pgvector 설정
 86 | set_vectordb(
 87 |     vectordb_type="pgvector",
 88 |     vectordb_location="postgresql://user:pass@host:port/db"
 89 | )
 90 | ```
 91 | 
 92 | **사용처:**
 93 | - `cli/core/environment.py` (5번 라인): `load_env`, `set_prompt_dir` 함수를 import하여 사용
 94 |   - `initialize_environment` 함수에서 환경 변수 초기화 시 사용
 95 |   ```python
 96 |   from cli.utils.env_loader import load_env, set_prompt_dir
 97 |   
 98 |   def initialize_environment(
 99 |       *,
100 |       env_file_path: Optional[str],
101 |       prompt_dir_path: Optional[str],
102 |   ) -> None:
103 |       load_env(env_file_path=env_file_path)
104 |       set_prompt_dir(prompt_dir_path=prompt_dir_path)
105 |   ```
106 | 
107 | ### logger.py
108 | 
109 | CLI 전용 로깅 유틸리티 모듈입니다. 로깅 설정을 구성하고 기본 로거 인스턴스를 반환합니다.
110 | 
111 | **주요 함수:**
112 | 
113 | #### `configure_logging(level: int = logging.INFO) -> logging.Logger`
114 | 로깅을 설정하고 기본 로거를 반환합니다.
115 | 
116 | **파라미터:**
117 | - `level` (int, optional): 로깅 레벨. 기본값은 `logging.INFO`.
118 | 
119 | **반환값:**
120 | - `logging.Logger`: 설정된 로거 인스턴스. 로거 이름은 "cli"입니다.
121 | 
122 | **로깅 설정:**
123 | - 레벨: 지정된 레벨 (기본값: `INFO`)
124 | - 포맷: `%(asctime)s [%(levelname)s] %(message)s`
125 | - 날짜 포맷: `%Y-%m-%d %H:%M:%S`
126 | 
127 | **사용 예시:**
128 | ```python
129 | from cli.utils.logger import configure_logging
130 | 
131 | # 기본 설정으로 로거 생성
132 | logger = configure_logging()
133 | 
134 | # DEBUG 레벨로 로거 생성
135 | logger = configure_logging(level=logging.DEBUG)
136 | 
137 | # 로깅 사용
138 | logger.info("Information message")
139 | logger.error("Error message")
140 | ```
141 | 
142 | **사용처:**
143 | 
144 | 1. **`cli/__init__.py`** (14번 라인)
145 |    - CLI 진입점에서 로거 초기화
146 |    ```python
147 |    from cli.utils.logger import configure_logging
148 |    
149 |    logger = configure_logging()
150 |    ```
151 |    - 사용 위치: 18번 라인에서 로거 인스턴스 생성, 89번, 92번 라인에서 에러 및 정보 로깅
152 | 
153 | 2. **`cli/commands/quary.py`** (11번 라인)
154 |    - query 명령어 실행 시 로거 초기화
155 |    ```python
156 |    from cli.utils.logger import configure_logging
157 |    
158 |    logger = configure_logging()
159 |    ```
160 |    - 사용 위치: 13번 라인에서 로거 인스턴스 생성, 112번 라인에서 에러 로깅
161 | 
162 | 3. **`cli/core/streamlit_runner.py`** (5번 라인)
163 |    - Streamlit 실행 모듈에서 로거 초기화
164 |    ```python
165 |    from cli.utils.logger import configure_logging
166 |    
167 |    logger = configure_logging()
168 |    ```
169 |    - 사용 위치: 7번 라인에서 로거 인스턴스 생성, 19번, 33번, 35번 라인에서 정보 및 에러 로깅
170 | 
171 | 4. **`cli/commands/run_streamlit.py`** (6번 라인)
172 |    - run-streamlit 명령어 실행 시 로거 초기화
173 |    ```python
174 |    from cli.utils.logger import configure_logging
175 |    
176 |    logger = configure_logging()
177 |    ```
178 |    - 사용 위치: 8번 라인에서 로거 인스턴스 생성, 28번 라인에서 정보 로깅
179 | 
180 | ## 의존성
181 | 
182 | - `click`: CLI 출력 및 메시지 표시
183 | - `dotenv`: `.env` 파일 로드
184 | - `logging`: 로깅 기능 (Python 표준 라이브러리)
185 | - `pathlib.Path`: 경로 처리
186 | - `os`: 환경 변수 설정
187 | 
188 | 


--------------------------------------------------------------------------------
/interface/app_pages/chatbot.py:
--------------------------------------------------------------------------------
  1 | """
  2 | AI ChatBot 페이지
  3 | LangGraph와 OpenAI를 활용한 대화형 인터페이스
  4 | """
  5 | 
  6 | import os
  7 | import streamlit as st
  8 | 
  9 | from utils.llm.chatbot import ChatBot
 10 | from interface.app_pages.sidebar_components import (
 11 |     render_sidebar_data_source_selector,
 12 |     render_sidebar_llm_selector,
 13 |     render_sidebar_embedding_selector,
 14 |     render_sidebar_db_selector,
 15 |     render_sidebar_chatbot_session_controller,
 16 | )
 17 | from interface.core.config import load_config
 18 | 
 19 | 
 20 | def initialize_session_state():
 21 |     """세션 상태 초기화 함수
 22 | 
 23 |     Streamlit의 session_state를 사용하여 앱의 상태를 유지합니다.
 24 |     LLM 설정을 sidebar의 llm_selector에서 선택한 값으로부터 가져옵니다.
 25 |     """
 26 |     # 채팅 메시지 기록 저장 (자동으로 시작)
 27 |     if "chatbot_messages" not in st.session_state:
 28 |         st.session_state.chatbot_messages = []
 29 | 
 30 |     # LLM 공급자 확인 (현재 ChatBot은 OpenAI만 지원)
 31 |     llm_provider = (
 32 |         st.session_state.get("LLM_PROVIDER") or os.getenv("LLM_PROVIDER") or "openai"
 33 |     ).lower()
 34 | 
 35 |     if llm_provider != "openai":
 36 |         st.error(
 37 |             f"⚠️ ChatBot은 현재 OpenAI만 지원합니다. 설정 > LLM에서 OpenAI 프로파일을 선택하거나 LLM_PROVIDER를 'openai'로 설정해주세요."
 38 |         )
 39 |         st.stop()
 40 | 
 41 |     # OpenAI API 키 확인
 42 |     openai_api_key = st.session_state.get("OPEN_AI_KEY") or os.getenv("OPEN_AI_KEY")
 43 | 
 44 |     if not openai_api_key:
 45 |         st.error(
 46 |             "⚠️ OpenAI API 키가 설정되지 않았습니다. 설정 > LLM에서 OpenAI API 키를 입력하거나, 사이드바에서 LLM 프로파일을 적용해주세요."
 47 |         )
 48 |         st.stop()
 49 | 
 50 |     # 사용할 모델명 가져오기 (llm_selector에서 설정한 값)
 51 |     model_name = (
 52 |         st.session_state.get("OPEN_AI_LLM_MODEL")
 53 |         or os.getenv("OPEN_AI_LLM_MODEL")
 54 |         or "gpt-4o-mini"
 55 |     )
 56 | 
 57 |     # DataHub 서버 URL 가져오기 (config에서 로드)
 58 |     config = load_config()
 59 |     gms_server = config.datahub_server
 60 | 
 61 |     # ChatBot 인스턴스 생성 또는 모델 업데이트
 62 |     if "chatbot_instance" not in st.session_state:
 63 |         st.session_state.chatbot_instance = ChatBot(
 64 |             openai_api_key, model_name=model_name, gms_server=gms_server
 65 |         )
 66 |     else:
 67 |         # 기존 인스턴스가 있는 경우, 모델이나 API 키, gms_server가 변경되었는지 확인
 68 |         existing_bot = st.session_state.chatbot_instance
 69 |         if (
 70 |             existing_bot.model_name != model_name
 71 |             or existing_bot.openai_api_key != openai_api_key
 72 |             or existing_bot.gms_server != gms_server
 73 |         ):
 74 |             st.session_state.chatbot_instance = ChatBot(
 75 |                 openai_api_key, model_name=model_name, gms_server=gms_server
 76 |             )
 77 | 
 78 | 
 79 | # 세션 상태 초기화 실행
 80 | initialize_session_state()
 81 | 
 82 | # 페이지 제목
 83 | st.title("🤖 AI ChatBot")
 84 | 
 85 | st.markdown(
 86 |     """
 87 |     LangGraph 기반 AI ChatBot과 대화를 나눌 수 있습니다.
 88 |     - 데이터베이스 테이블 정보 검색
 89 |     - 용어집 조회
 90 |     - 쿼리 예제 조회
 91 |     - 대화를 통해 질문 구체화
 92 |     """
 93 | )
 94 | 
 95 | # 설정 로드
 96 | config = load_config()
 97 | 
 98 | # 사이드바 UI 구성 (lang2sql.py와 동일한 구조)
 99 | render_sidebar_data_source_selector(config)
100 | st.sidebar.divider()
101 | render_sidebar_llm_selector()
102 | st.sidebar.divider()
103 | render_sidebar_embedding_selector()
104 | st.sidebar.divider()
105 | render_sidebar_db_selector()
106 | st.sidebar.divider()
107 | 
108 | # ChatBot 전용 설정
109 | with st.sidebar:
110 |     st.markdown("### 🤖 ChatBot 설정")
111 |     st.divider()
112 |     thread_id = render_sidebar_chatbot_session_controller()
113 | 
114 | 
115 | # 첫 메시지가 없으면 환영 메시지 추가
116 | if not st.session_state.chatbot_messages:
117 |     hello_message = "안녕하세요! 무엇을 도와드릴까요? 🤖"
118 |     st.session_state.chatbot_messages = [
119 |         {"role": "assistant", "content": hello_message}
120 |     ]
121 | 
122 | # 저장된 모든 메시지를 순서대로 표시
123 | for message in st.session_state.chatbot_messages:
124 |     with st.chat_message(message["role"]):
125 |         st.markdown(message["content"])
126 | 
127 | # 사용자 입력 처리
128 | if prompt := st.chat_input("메시지를 입력하세요"):
129 |     # 사용자 메시지를 기록에 추가
130 |     st.session_state.chatbot_messages.append({"role": "user", "content": prompt})
131 |     with st.chat_message("user"):
132 |         st.markdown(prompt)
133 | 
134 |     # AI 응답 생성 및 표시
135 |     with st.chat_message("assistant"):
136 |         try:
137 |             # ChatBot을 통해 응답 생성
138 |             response = st.session_state.chatbot_instance.chat(prompt, thread_id)
139 | 
140 |             # 응답 내용 추출
141 |             response_content = response["messages"][-1].content
142 | 
143 |             # 모델 정보 표시
144 |             model_name = st.session_state.chatbot_instance.model_name
145 |             st.caption(f"🤖 모델: {model_name}")
146 | 
147 |             # 응답 표시
148 |             st.markdown(response_content)
149 | 
150 |             # AI 응답을 기록에 추가
151 |             st.session_state.chatbot_messages.append(
152 |                 {"role": "assistant", "content": response_content}
153 |             )
154 |         except Exception as e:
155 |             error_msg = f"오류가 발생했습니다: {str(e)}"
156 |             st.error(error_msg)
157 |             st.session_state.chatbot_messages.append(
158 |                 {"role": "assistant", "content": error_msg}
159 |             )
160 | 


--------------------------------------------------------------------------------
/interface/app_pages/lang2sql.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Lang2SQL Streamlit 애플리케이션.
  3 | 
  4 | 자연어 질의를 SQL 쿼리로 변환하고 실행 결과를 시각화하는 인터페이스를 제공합니다.
  5 | 사용자는 데이터베이스 다이얼렉트 선택 및 편집, 검색기(retriever) 방식 지정, 토큰 사용량/결과 설명/시각화 등 다양한 출력 옵션을 설정할 수 있습니다.
  6 | 
  7 | 주요 기능:
  8 |     - 사용자 질의를 SQL 쿼리로 변환 후 실행
  9 |     - DB 다이얼렉트(PRESET_DIALECTS) 선택 및 편집 지원
 10 |     - 검색기 유형 및 Top-N 테이블 검색 개수 설정
 11 |     - 쿼리 실행 결과를 표와 차트로 시각화
 12 |     - 토큰 사용량, 문서 적합성 평가, AI 재해석 질의 등 추가 정보 표시
 13 | """
 14 | 
 15 | from copy import deepcopy
 16 | 
 17 | import streamlit as st
 18 | 
 19 | from interface.core.dialects import PRESET_DIALECTS, DialectOption
 20 | from interface.core.lang2sql_runner import run_lang2sql
 21 | from interface.core.result_renderer import display_result
 22 | from interface.core.session_utils import init_graph
 23 | from interface.core.config import load_config
 24 | from interface.app_pages.sidebar_components import (
 25 |     render_sidebar_data_source_selector,
 26 |     render_sidebar_llm_selector,
 27 |     render_sidebar_embedding_selector,
 28 |     render_sidebar_db_selector,
 29 | )
 30 | 
 31 | 
 32 | TITLE = "Lang2SQL"
 33 | DEFAULT_QUERY = "고객 데이터를 기반으로 유니크한 유저 수를 카운트하는 쿼리"
 34 | SIDEBAR_OPTIONS = {
 35 |     "show_token_usage": "Show Token Usage",
 36 |     "show_result_description": "Show Result Description",
 37 |     "show_sql": "Show SQL",
 38 |     "show_question_reinterpreted_by_ai": "Show User Question Reinterpreted by AI",
 39 |     "show_referenced_tables": "Show List of Referenced Tables",
 40 |     "show_question_gate_result": "Show Question Gate Result",
 41 |     "show_document_suitability": "Show Document Suitability",
 42 |     "show_table": "Show Table",
 43 |     "show_chart": "Show Chart",
 44 | }
 45 | 
 46 | st.title(TITLE)
 47 | 
 48 | config = load_config()
 49 | 
 50 | render_sidebar_data_source_selector(config)
 51 | st.sidebar.divider()
 52 | render_sidebar_llm_selector()
 53 | st.sidebar.divider()
 54 | render_sidebar_embedding_selector()
 55 | st.sidebar.divider()
 56 | render_sidebar_db_selector()
 57 | st.sidebar.divider()
 58 | 
 59 | st.sidebar.title("Output Settings")
 60 | for key, label in SIDEBAR_OPTIONS.items():
 61 |     st.sidebar.checkbox(label, value=True, key=key)
 62 | 
 63 | st.sidebar.markdown("### 워크플로우 선택")
 64 | use_enriched = st.sidebar.checkbox(
 65 |     "프로파일 추출 & 컨텍스트 보강 워크플로우 사용", value=False
 66 | )
 67 | 
 68 | if (
 69 |     "graph" not in st.session_state
 70 |     or st.session_state.get("use_enriched") != use_enriched
 71 | ):
 72 |     GRAPH_TYPE = init_graph(use_enriched)
 73 |     st.info(f"Lang2SQL 시작됨. ({GRAPH_TYPE} 워크플로우)")
 74 | 
 75 | if st.sidebar.button("Lang2SQL 새로고침"):
 76 |     GRAPH_TYPE = init_graph(st.session_state.get("use_enriched", False))
 77 |     st.sidebar.success(
 78 |         f"Lang2SQL이 성공적으로 새로고침되었습니다. ({GRAPH_TYPE} 워크플로우)"
 79 |     )
 80 | 
 81 | ## moved to component: render_sidebar_llm_selector()
 82 | 
 83 | user_query = st.text_area("쿼리를 입력하세요:", value=DEFAULT_QUERY)
 84 | 
 85 | if "dialects" not in st.session_state:
 86 |     st.session_state["dialects"] = {k: v.to_dict() for k, v in PRESET_DIALECTS.items()}
 87 | 
 88 | st.markdown("### DB 선택 및 관리")
 89 | cols = st.columns(2)
 90 | dialects = st.session_state["dialects"]
 91 | keys = list(dialects.keys())
 92 | active = st.session_state.get("active_dialect", keys[0])
 93 | 
 94 | with cols[0]:
 95 |     user_database_env = st.selectbox(
 96 |         "사용할 DB를 선택하세요:", options=keys, index=keys.index(active)
 97 |     )
 98 |     st.session_state["active_dialect"] = user_database_env
 99 |     st.session_state["selected_dialect_option"] = dialects[user_database_env]
100 | 
101 | with cols[1]:
102 |     st.caption("선택된 DB 설정을 편집하거나 새로 추가할 수 있습니다.")
103 | 
104 | with st.expander("DB 편집"):
105 |     edit_key = st.selectbox(
106 |         "편집할 DB를 선택하세요:",
107 |         options=keys,
108 |         index=keys.index(active),
109 |         key="dialect_edit_selector",
110 |     )
111 |     current = deepcopy(dialects[edit_key])
112 |     _supports_ilike = st.checkbox(
113 |         "ILIKE 지원", value=bool(current.get("supports_ilike", False))
114 |     )
115 |     _hints_text = st.text_area(
116 |         "hints (쉼표로 구분)", value=", ".join(current.get("hints", []))
117 |     )
118 |     if st.button("변경사항 저장", key="btn_save_dialect_edit"):
119 |         st.session_state["dialects"][edit_key] = DialectOption(
120 |             name=edit_key,
121 |             supports_ilike=_supports_ilike,
122 |             hints=[s.strip() for s in _hints_text.split(",") if s.strip()],
123 |         ).to_dict()
124 |         st.success(f"{edit_key} DB가 업데이트되었습니다.")
125 | 
126 | device = st.selectbox("모델 실행 장치", options=["cpu", "cuda"], index=0)
127 | retriever_options = {
128 |     "기본": "벡터 검색 (기본)",
129 |     "Reranker": "Reranker 검색 (정확도 향상)",
130 | }
131 | user_retriever = st.selectbox(
132 |     "검색기 유형을 선택하세요:",
133 |     options=list(retriever_options.keys()),
134 |     format_func=lambda x: retriever_options[x],
135 | )
136 | user_top_n = st.slider("검색할 테이블 정보 개수:", min_value=1, max_value=20, value=5)
137 | 
138 | if st.button("쿼리 실행"):
139 |     res = run_lang2sql(
140 |         query=user_query,
141 |         database_env=user_database_env,
142 |         retriever_name=user_retriever,
143 |         top_n=user_top_n,
144 |         device=device,
145 |         use_enriched=use_enriched,
146 |     )
147 |     display_result(res=res)
148 | 


--------------------------------------------------------------------------------
/utils/data/datahub_services/query_service.py:
--------------------------------------------------------------------------------
  1 | """
  2 | DataHub 쿼리 서비스 모듈
  3 | 
  4 | DataHub의 쿼리 관련 기능을 제공합니다.
  5 | """
  6 | 
  7 | from utils.data.datahub_services.base_client import DataHubBaseClient
  8 | from utils.data.queries import (
  9 |     GLOSSARY_TERMS_BY_URN_QUERY,
 10 |     LIST_QUERIES_QUERY,
 11 |     QUERIES_BY_URN_QUERY,
 12 | )
 13 | 
 14 | 
 15 | class QueryService:
 16 |     """쿼리 관련 서비스 클래스"""
 17 | 
 18 |     def __init__(self, client: DataHubBaseClient):
 19 |         """
 20 |         쿼리 서비스 초기화
 21 | 
 22 |         Args:
 23 |             client (DataHubBaseClient): DataHub 기본 클라이언트
 24 |         """
 25 |         self.client = client
 26 | 
 27 |     def get_queries(self, start=0, count=10, query="*", filters=None):
 28 |         """
 29 |         DataHub에서 쿼리 목록을 가져오는 함수
 30 | 
 31 |         Args:
 32 |             start (int): 시작 인덱스 (기본값=0)
 33 |             count (int): 반환할 쿼리 수 (기본값=10)
 34 |             query (str): 필터링에 사용할 쿼리 문자열 (기본값="*")
 35 |             filters (list): 추가 필터 (기본값=None)
 36 | 
 37 |         Returns:
 38 |             dict: 쿼리 목록 정보
 39 |         """
 40 |         # GraphQL 요청용 입력 변수 준비
 41 |         input_params = {"start": start, "count": count, "query": query}
 42 | 
 43 |         if filters:
 44 |             input_params["filters"] = filters
 45 | 
 46 |         variables = {"input": input_params}
 47 | 
 48 |         return self.client.execute_graphql_query(LIST_QUERIES_QUERY, variables)
 49 | 
 50 |     def process_queries(self, result):
 51 |         """
 52 |         쿼리 목록 결과를 처리하고 간소화된 형태로 반환하는 함수
 53 | 
 54 |         Args:
 55 |             result (dict): API 응답 결과
 56 | 
 57 |         Returns:
 58 |             dict: 처리된 쿼리 목록 데이터 (urn, name, description, statement만 포함)
 59 |         """
 60 |         if "error" in result:
 61 |             return result
 62 | 
 63 |         processed_result = {"total_queries": 0, "count": 0, "start": 0, "queries": []}
 64 | 
 65 |         if "data" in result and "listQueries" in result["data"]:
 66 |             list_queries = result["data"]["listQueries"]
 67 |             processed_result["total_queries"] = list_queries.get("total", 0)
 68 |             processed_result["count"] = list_queries.get("count", 0)
 69 |             processed_result["start"] = list_queries.get("start", 0)
 70 | 
 71 |             for query in list_queries.get("queries", []):
 72 |                 query_info = {"urn": query.get("urn")}
 73 | 
 74 |                 props = query.get("properties", {})
 75 |                 query_info["name"] = props.get("name")
 76 |                 query_info["description"] = props.get("description")
 77 |                 query_info["statement"] = props.get("statement", {}).get("value")
 78 | 
 79 |                 processed_result["queries"].append(query_info)
 80 | 
 81 |         return processed_result
 82 | 
 83 |     def get_query_data(self, start=0, count=10, query="*", filters=None):
 84 |         """
 85 |         DataHub에서 쿼리 목록을 가져와 처리하는 함수
 86 | 
 87 |         Args:
 88 |             start (int): 시작 인덱스 (기본값=0)
 89 |             count (int): 반환할 쿼리 수 (기본값=10)
 90 |             query (str): 필터링에 사용할 쿼리 문자열 (기본값="*")
 91 |             filters (list): 추가 필터 (기본값=None)
 92 | 
 93 |         Returns:
 94 |             dict: 처리된 쿼리 목록 데이터
 95 |         """
 96 |         # DataHub 서버에 연결하여 쿼리 목록 가져오기
 97 |         result = self.get_queries(start, count, query, filters)
 98 | 
 99 |         # 결과 처리
100 |         if result:
101 |             try:
102 |                 return self.process_queries(result)
103 |             except KeyError as e:
104 |                 return {"error": True, "message": f"결과 구조 파싱 중 오류 발생: {e}"}
105 |         else:
106 |             return {"error": True, "message": "쿼리 목록을 가져오지 못했습니다."}
107 | 
108 |     def get_queries_by_urn(self, dataset_urn):
109 |         """
110 |         특정 데이터셋 URN과 연관된 쿼리들을 조회하는 함수
111 | 
112 |         전체 쿼리를 가져온 후 클라이언트 사이드에서 필터링하는 방식 사용
113 | 
114 |         Args:
115 |             dataset_urn (str): 데이터셋 URN
116 | 
117 |         Returns:
118 |             dict: 연관된 쿼리 목록
119 |         """
120 |         # 먼저 전체 쿼리 목록을 가져옴
121 |         input_params = {"start": 0, "count": 1000, "query": "*"}  # 충분히 큰 수로 설정
122 | 
123 |         variables = {"input": input_params}
124 |         result = self.client.execute_graphql_query(QUERIES_BY_URN_QUERY, variables)
125 | 
126 |         if (
127 |             "error" not in result
128 |             and "data" in result
129 |             and "listQueries" in result["data"]
130 |         ):
131 |             # 클라이언트 사이드에서 특정 URN과 연관된 쿼리만 필터링
132 |             all_queries = result["data"]["listQueries"]["queries"]
133 |             filtered_queries = []
134 | 
135 |             for query in all_queries:
136 |                 subjects = query.get("subjects", [])
137 |                 for subject in subjects:
138 |                     if subject.get("dataset", {}).get("urn") == dataset_urn:
139 |                         filtered_queries.append(query)
140 |                         break
141 | 
142 |             # 필터링된 결과로 응답 구조 재구성
143 |             result["data"]["listQueries"]["queries"] = filtered_queries
144 |             result["data"]["listQueries"]["count"] = len(filtered_queries)
145 | 
146 |         return result
147 | 
148 |     def get_glossary_terms_by_urn(self, dataset_urn):
149 |         """
150 |         특정 데이터셋 URN의 glossary terms를 조회하는 함수
151 | 
152 |         Args:
153 |             dataset_urn (str): 데이터셋 URN
154 | 
155 |         Returns:
156 |             dict: glossary terms 정보
157 |         """
158 |         variables = {"urn": dataset_urn}
159 |         return self.client.execute_graphql_query(GLOSSARY_TERMS_BY_URN_QUERY, variables)
160 | 


--------------------------------------------------------------------------------
/interface/app_pages/settings_sections/README.md:
--------------------------------------------------------------------------------
  1 | # settings_sections
  2 | 
  3 | 설정 페이지의 각 섹션을 렌더링하는 모듈들입니다.
  4 | 
  5 | ## 디렉토리 구조
  6 | 
  7 | ```
  8 | settings_sections/
  9 | ├── __init__.py
 10 | ├── data_source_section.py
 11 | ├── db_section.py
 12 | └── llm_section.py
 13 | ```
 14 | 
 15 | ## 파일 목록 및 설명
 16 | 
 17 | ### `__init__.py`
 18 | 
 19 | 네임스페이스 패키지 초기화 파일로, 패키지에서 export되는 모듈 목록을 정의합니다.
 20 | 
 21 | **내보내는 모듈:**
 22 | - `data_source_section`
 23 | - `llm_section`
 24 | - `db_section`
 25 | 
 26 | ### `data_source_section.py`
 27 | 
 28 | 데이터 소스 설정을 관리하는 UI 섹션을 제공합니다.
 29 | 
 30 | **주요 기능:**
 31 | - DataHub 또는 VectorDB 중 하나를 선택하여 데이터 소스 모드 설정
 32 | - DataHub 서버 관리:
 33 |   - 등록된 DataHub 목록 조회 및 표시
 34 |   - 새로운 DataHub 추가 (이름, URL, FAISS 저장 경로, 메모)
 35 |   - 기존 DataHub 편집 및 삭제
 36 |   - GMS 서버 헬스 체크 기능
 37 | - VectorDB 관리:
 38 |   - 등록된 VectorDB 목록 조회 및 표시 (FAISS, pgvector 지원)
 39 |   - 새로운 VectorDB 추가 (이름, 타입, 위치, 컬렉션 접두사, 메모)
 40 |   - 기존 VectorDB 편집 및 삭제
 41 |   - 설정 검증 기능
 42 | 
 43 | **주요 함수:**
 44 | - `render_data_source_section(config: Config | None = None) -> None`
 45 |   - 데이터 소스 설정 섹션을 Streamlit UI로 렌더링
 46 |   - `config` 파라미터가 없으면 내부에서 `load_config()`를 호출하여 로드
 47 | 
 48 | **의존성:**
 49 | - `interface.core.config`: Config 관리, 데이터 소스 레지스트리 조작
 50 | - `infra.monitoring.check_server.CheckServer`: GMS 서버 헬스 체크
 51 | 
 52 | **상태 표시:**
 53 | - 현재 선택된 데이터 소스 모드에 따라 상태 배너 표시
 54 | - DataHub: 헬스 체크 결과에 따른 성공/경고/정보 메시지
 55 | - VectorDB: 설정 완전성에 따른 성공/경고 메시지
 56 | 
 57 | ### `db_section.py`
 58 | 
 59 | 데이터베이스 연결 설정을 관리하는 UI 섹션을 제공합니다.
 60 | 
 61 | **주요 기능:**
 62 | - 다양한 DB 타입 지원:
 63 |   - PostgreSQL, MySQL, MariaDB, Oracle, ClickHouse
 64 |   - DuckDB, SQLite
 65 |   - Databricks, Snowflake, Trino
 66 | - DB 프로파일 관리:
 67 |   - 등록된 DB 프로파일 목록 조회 및 표시
 68 |   - 새로운 DB 프로파일 추가
 69 |   - 기존 DB 프로파일 편집 및 삭제
 70 | - DB 타입별 필드 동적 처리:
 71 |   - 기본 필드: Host, Port, User, Database (또는 Path for DuckDB/SQLite)
 72 |   - 추가 필드: Oracle(Service Name), Databricks(HTTP Path, Catalog, Schema), Snowflake(Account, Warehouse, Schema), Trino(HTTP Scheme, Catalog, Schema)
 73 |   - 비밀 필드: Password 또는 Access Token (타입별 상이)
 74 | - 환경 변수 기반 자동 채우기 지원
 75 | - 연결 테스트 기능 (SELECT 1 쿼리 실행)
 76 | - 설정 검증 및 세션 적용 기능
 77 | 
 78 | **주요 함수:**
 79 | - `render_db_section() -> None`
 80 |   - DB 연결 설정 섹션을 Streamlit UI로 렌더링
 81 | 
 82 | **의존성:**
 83 | - `interface.core.config`: DB 연결 레지스트리 조작
 84 | - `utils.databases.DatabaseFactory`: DB 커넥터 생성 및 연결 테스트
 85 | - `utils.databases.factory.load_config_from_env`: 환경 변수에서 설정 로드
 86 | 
 87 | **헬퍼 함수:**
 88 | - `_non_secret_fields(db_type: str) -> list[tuple[str, str]]`: DB 타입별 기본 필드 정의
 89 | - `_extra_non_secret_fields(db_type: str) -> list[tuple[str, str]]`: DB 타입별 추가 필드 정의
 90 | - `_secret_fields(db_type: str) -> list[tuple[str, str]]`: DB 타입별 비밀 필드 정의
 91 | - `_prefill_from_env(db_type: str, key: str) -> str`: 환경 변수에서 기본값 로드
 92 | 
 93 | ### `llm_section.py`
 94 | 
 95 | LLM 및 Embedding 설정을 관리하는 UI 섹션을 제공합니다.
 96 | 
 97 | **주요 기능:**
 98 | - LLM 공급자 지원:
 99 |   - OpenAI, Azure OpenAI, AWS Bedrock, Gemini, Ollama, Hugging Face
100 | - Embedding 공급자 지원 (동일한 공급자 목록)
101 | - 공급자별 필드 동적 처리:
102 |   - OpenAI: Model, API Key
103 |   - Azure: Endpoint, Deployment(Model), API Version, API Key
104 |   - Bedrock: Model, Access Key ID, Secret Access Key, Region
105 |   - Gemini: Model, API Key (embedding만)
106 |   - Ollama: Model, Base URL
107 |   - Hugging Face: Endpoint URL, Repo ID, Model, API Token (또는 Embedding: Model, Repo ID, API Token)
108 | - 프로파일 저장 기능:
109 |   - LLM 프로파일 저장 (비밀키 제외 옵션)
110 |   - Embedding 프로파일 저장 (시크릿 포함)
111 | - 저장된 프로파일 목록 조회
112 | - 환경 변수 및 세션 상태 기반 자동 채우기
113 | 
114 | **주요 함수:**
115 | - `render_llm_section(config: Config | None = None) -> None`
116 |   - LLM 및 Embedding 설정 섹션을 Streamlit UI로 렌더링
117 |   - 2개 컬럼으로 나뉘어 Chat LLM과 Embeddings를 각각 설정
118 |   - `config` 파라미터가 없으면 내부에서 `load_config()`를 호출하거나 None 처리
119 | 
120 | **의존성:**
121 | - `interface.core.config`: LLM/Embedding 설정 및 프로파일 관리
122 | 
123 | **헬퍼 함수:**
124 | - `_llm_fields(provider: str) -> list[tuple[str, str, bool]]`: LLM 공급자별 필드 정의 (label, env_key, is_secret)
125 | - `_embedding_fields(provider: str) -> list[tuple[str, str, bool]]`: Embedding 공급자별 필드 정의
126 | 
127 | ## 사용 방법
128 | 
129 | 이 모듈들은 `interface.app_pages.settings.py`에서 import되어 사용됩니다.
130 | 
131 | ### Import 예시
132 | 
133 | ```python
134 | from interface.app_pages.settings_sections.data_source_section import (
135 |     render_data_source_section,
136 | )
137 | from interface.app_pages.settings_sections.llm_section import render_llm_section
138 | from interface.app_pages.settings_sections.db_section import render_db_section
139 | ```
140 | 
141 | ### 사용 예시
142 | 
143 | `settings.py`에서의 사용:
144 | 
145 | ```python
146 | from interface.core.config import load_config
147 | 
148 | config = load_config()
149 | 
150 | tabs = st.tabs(["데이터 소스", "LLM", "DB"])
151 | 
152 | with tabs[0]:
153 |     render_data_source_section(config)
154 | 
155 | with tabs[1]:
156 |     render_llm_section(config)
157 | 
158 | with tabs[2]:
159 |     render_db_section()
160 | ```
161 | 
162 | ### 함수 시그니처
163 | 
164 | #### `render_data_source_section(config: Config | None = None) -> None`
165 | - **매개변수:**
166 |   - `config` (Config | None): 설정 객체. None이면 내부에서 `load_config()` 호출
167 | - **반환값:** None (Streamlit UI 직접 렌더링)
168 | 
169 | #### `render_db_section() -> None`
170 | - **매개변수:** 없음
171 | - **반환값:** None (Streamlit UI 직접 렌더링)
172 | 
173 | #### `render_llm_section(config: Config | None = None) -> None`
174 | - **매개변수:**
175 |   - `config` (Config | None): 설정 객체. None이면 내부에서 `load_config()` 호출하거나 None 처리
176 | - **반환값:** None (Streamlit UI 직접 렌더링)
177 | 
178 | ## 공통 특징
179 | 
180 | - 모든 섹션은 Streamlit을 사용하여 UI를 렌더링합니다.
181 | - 설정 변경 시 `st.rerun()`을 호출하여 UI를 새로고침합니다.
182 | - 에러 발생 시 `st.error()`를 사용하여 사용자에게 오류 메시지를 표시합니다.
183 | - 성공적인 작업 완료 시 `st.success()`를 사용하여 확인 메시지를 표시합니다.
184 | - 민감한 정보(비밀번호, API 키 등)는 `type="password"`를 사용하여 마스킹 처리합니다.
185 | 
186 | 


--------------------------------------------------------------------------------
/utils/llm/graph_utils/README.md:
--------------------------------------------------------------------------------
  1 | # graph_utils
  2 | 
  3 | 이 모듈은 **LangGraph workflow**를 위한 그래프 유틸리티들을 제공합니다. Lang2SQL 프로젝트에서 자연어 질문을 SQL 쿼리로 변환하는 워크플로우를 LangGraph를 사용하여 구성합니다.
  4 | 
  5 | ## 디렉토리 구조
  6 | 
  7 | ```
  8 | graph_utils/
  9 | ├── __init__.py
 10 | ├── base.py
 11 | ├── basic_graph.py
 12 | ├── enriched_graph.py
 13 | ├── profile_utils.py
 14 | └── README.md
 15 | ```
 16 | 
 17 | ## 파일 설명
 18 | 
 19 | ### `__init__.py`
 20 | 그래프 관련 유틸리티 모듈의 공개 인터페이스를 정의합니다.
 21 | 
 22 | **주요 사용:**
 23 | - **상태 및 노드 식별자:**
 24 |   - `QueryMakerState`: 그래프의 상태 타입 정의
 25 |   - `GET_TABLE_INFO`, `QUERY_MAKER`, `PROFILE_EXTRACTION`, `CONTEXT_ENRICHMENT`: 노드 식별자 상수
 26 | 
 27 | - **노드 함수들:**
 28 |   - `get_table_info_node`: 테이블 정보 검색 노드
 29 |   - `query_maker_node`: SQL 쿼리 생성 노드
 30 |   - `profile_extraction_node`: 질문 프로파일 추출 노드
 31 |   - `context_enrichment_node`: 컨텍스트 보강 노드
 32 | 
 33 | - **그래프 빌더들:**
 34 |   - `basic_builder`: 기본 워크플로우 그래프 빌더
 35 |   - `enriched_builder`: 확장된 워크플로우 그래프 빌더
 36 | 
 37 | ### `base.py`
 38 | LangGraph 워크플로우의 핵심 노드 함수들과 상태 정의를 포함합니다.
 39 | 
 40 | **주요 내용:**
 41 | - **상태 타입 (`QueryMakerState`):** TypedDict를 사용하여 그래프 상태 구조를 정의
 42 |   - `messages`: LLM 메시지 리스트
 43 |   - `user_database_env`: 사용자 데이터베이스 환경
 44 |   - `searched_tables`: 검색된 테이블 정보
 45 |   - `question_profile`: 질문 프로파일 정보
 46 |   - `generated_query`: 생성된 SQL 쿼리
 47 |   - 기타 워크플로우에 필요한 상태 정보
 48 | 
 49 | - **노드 식별자 상수:**
 50 |   - `QUESTION_GATE`, `EVALUATE_DOCUMENT_SUITABILITY`, `GET_TABLE_INFO`, `TOOL`, `TABLE_FILTER`, `QUERY_MAKER`, `PROFILE_EXTRACTION`, `CONTEXT_ENRICHMENT`
 51 | 
 52 | - **노드 함수들:**
 53 |   - `question_gate_node`: 사용자 질문이 SQL로 답변 가능한지 판별하는 게이트 노드
 54 |   - `get_table_info_node`: 벡터 검색을 통해 관련 테이블 정보를 가져오는 노드
 55 |   - `document_suitability_node`: 검색된 테이블들의 문서 적합성 점수를 계산하는 노드
 56 |   - `profile_extraction_node`: 자연어 쿼리로부터 질문 유형(시계열, 집계, 필터 등)을 추출하는 노드
 57 |   - `context_enrichment_node`: 질문과 관련된 메타데이터를 기반으로 질문을 풍부하게 만드는 노드
 58 |   - `query_maker_node`: 최종 SQL 쿼리를 생성하는 노드
 59 | 
 60 | ### `basic_graph.py`
 61 | 기본 워크플로우를 위한 StateGraph 구성을 정의합니다.
 62 | 
 63 | **워크플로우 순서:**
 64 | ```
 65 | QUESTION_GATE → GET_TABLE_INFO → EVALUATE_DOCUMENT_SUITABILITY → QUERY_MAKER → END
 66 | ```
 67 | 
 68 | **주요 내용:**
 69 | - `StateGraph`를 사용하여 기본 워크플로우 그래프 생성
 70 | - `builder` 객체를 export하여 다른 모듈에서 사용 가능
 71 | - 조건부 라우팅(`add_conditional_edges`)을 통해 게이트 노드 이후 흐름 제어
 72 | 
 73 | ### `enriched_graph.py`
 74 | 기본 워크플로우에 프로파일 추출과 컨텍스트 보강 단계를 추가한 확장된 그래프입니다.
 75 | 
 76 | **워크플로우 순서:**
 77 | ```
 78 | QUESTION_GATE → GET_TABLE_INFO → EVALUATE_DOCUMENT_SUITABILITY → 
 79 | PROFILE_EXTRACTION → CONTEXT_ENRICHMENT → QUERY_MAKER → END
 80 | ```
 81 | 
 82 | **주요 내용:**
 83 | - `basic_graph`와 동일한 구조이지만 `PROFILE_EXTRACTION`과 `CONTEXT_ENRICHMENT` 노드가 추가됨
 84 | - 더 정교한 질문 분석과 컨텍스트 보강을 통해 더 나은 SQL 쿼리 생성이 가능
 85 | 
 86 | ### `profile_utils.py`
 87 | 질문 프로파일 객체를 텍스트로 변환하는 유틸리티 함수를 제공합니다.
 88 | 
 89 | **주요 함수:**
 90 | - `profile_to_text(profile_obj) -> str`: 질문 프로파일 객체를 읽기 쉬운 텍스트 형태로 변환
 91 |   - 시계열 분석 필요 여부
 92 |   - 집계 함수 필요 여부
 93 |   - WHERE 조건 필요 여부
 94 |   - GROUP BY 필요 여부
 95 |   - 정렬/순위 필요 여부
 96 |   - 기간 비교 필요 여부
 97 |   - 의도 유형 정보
 98 | 
 99 | ## 사용 방법
100 | 
101 | ### 1. `engine/query_executor.py`에서의 사용
102 | 
103 | 기본 또는 확장된 그래프 빌더를 선택하여 쿼리를 실행합니다:
104 | 
105 | ```python
106 | from utils.llm.graph_utils.basic_graph import builder as basic_builder
107 | from utils.llm.graph_utils.enriched_graph import builder as enriched_builder
108 | 
109 | # 그래프 선택
110 | if use_enriched_graph:
111 |     graph_builder = enriched_builder
112 | else:
113 |     graph_builder = basic_builder
114 | 
115 | # 그래프 컴파일 및 실행
116 | graph = graph_builder.compile()
117 | result = graph.invoke({
118 |     "messages": [HumanMessage(content=query)],
119 |     "user_database_env": database_env,
120 |     # ... 기타 상태 정보
121 | })
122 | ```
123 | 
124 | **사용 위치:** `/home/dwlee/Lang2SQL/engine/query_executor.py`의 `execute_query()` 함수
125 | 
126 | ### 2. `interface/core/session_utils.py`에서의 사용
127 | 
128 | Streamlit 세션 상태에서 그래프 빌더를 동적으로 초기화합니다:
129 | 
130 | ```python
131 | def init_graph(use_enriched: bool) -> str:
132 |     builder_module = (
133 |         "utils.llm.graph_utils.enriched_graph"
134 |         if use_enriched
135 |         else "utils.llm.graph_utils.basic_graph"
136 |     )
137 |     builder = __import__(builder_module, fromlist=["builder"]).builder
138 |     st.session_state["graph"] = builder.compile()
139 |     return "확장된" if use_enriched else "기본"
140 | ```
141 | 
142 | **사용 위치:** `/home/dwlee/Lang2SQL/interface/core/session_utils.py`의 `init_graph()` 함수
143 | 
144 | ### 3. `interface/app_pages/graph_builder.py`에서의 사용
145 | 
146 | Streamlit 인터페이스에서 커스텀 그래프를 구성할 때 개별 노드 함수들을 사용합니다:
147 | 
148 | ```python
149 | from utils.llm.graph_utils.base import (
150 |     CONTEXT_ENRICHMENT,
151 |     GET_TABLE_INFO,
152 |     PROFILE_EXTRACTION,
153 |     QUERY_MAKER,
154 |     QueryMakerState,
155 |     context_enrichment_node,
156 |     get_table_info_node,
157 |     profile_extraction_node,
158 |     query_maker_node,
159 | )
160 | 
161 | # 커스텀 시퀀스에 따라 노드 등록
162 | builder = StateGraph(QueryMakerState)
163 | for node_id in sequence:
164 |     if node_id == GET_TABLE_INFO:
165 |         builder.add_node(GET_TABLE_INFO, get_table_info_node)
166 |     elif node_id == PROFILE_EXTRACTION:
167 |         builder.add_node(PROFILE_EXTRACTION, profile_extraction_node)
168 |     # ... 기타 노드들
169 | ```
170 | 
171 | **사용 위치:** `/home/dwlee/Lang2SQL/interface/app_pages/graph_builder.py`의 `build_state_graph()` 함수
172 | 
173 | ## 워크플로우 개요
174 | 
175 | 이 모듈은 **LangGraph**를 사용하여 자연어 질문을 SQL 쿼리로 변환하는 워크플로우를 구현합니다:
176 | 
177 | 1. **QUESTION_GATE**: 질문이 SQL로 답변 가능한지 판별
178 | 2. **GET_TABLE_INFO**: 벡터 검색을 통해 관련 테이블 정보 검색
179 | 3. **EVALUATE_DOCUMENT_SUITABILITY**: 검색된 테이블들의 적합성 평가
180 | 4. **PROFILE_EXTRACTION** (확장 그래프만): 질문의 특성 추출 (시계열, 집계 등)
181 | 5. **CONTEXT_ENRICHMENT** (확장 그래프만): 질문을 컨텍스트 정보로 보강
182 | 6. **QUERY_MAKER**: 최종 SQL 쿼리 생성
183 | 
184 | 각 노드는 `QueryMakerState`를 입력으로 받아 상태를 업데이트하고 반환합니다.
185 | 
186 | 


--------------------------------------------------------------------------------
/utils/data/datahub_source.py:
--------------------------------------------------------------------------------
  1 | """
  2 | DataHub 메타데이터 페처 - 리팩토링된 버전
  3 | 
  4 | 기존 DatahubMetadataFetcher의 모든 기능을 유지하면서
  5 | 내부적으로는 분리된 서비스 모듈들을 사용합니다.
  6 | 
  7 | 기존 코드와의 완벽한 호환성을 보장합니다.
  8 | """
  9 | 
 10 | from utils.data.datahub_services.base_client import DataHubBaseClient
 11 | from utils.data.datahub_services.glossary_service import GlossaryService
 12 | from utils.data.datahub_services.metadata_service import MetadataService
 13 | from utils.data.datahub_services.query_service import QueryService
 14 | 
 15 | 
 16 | class DatahubMetadataFetcher:
 17 |     """
 18 |     DataHub 메타데이터 페처 - 기존 인터페이스 유지
 19 | 
 20 |     내부적으로는 분리된 서비스들을 사용하지만
 21 |     외부 인터페이스는 기존과 동일하게 유지됩니다.
 22 |     """
 23 | 
 24 |     def __init__(self, gms_server="http://localhost:8080", extra_headers={}):
 25 |         """
 26 |         DataHub 메타데이터 페처 초기화
 27 | 
 28 |         Args:
 29 |             gms_server (str): DataHub GMS 서버 URL
 30 |             extra_headers (dict): 추가 HTTP 헤더
 31 |         """
 32 |         # 기본 클라이언트 초기화
 33 |         self.client = DataHubBaseClient(gms_server, extra_headers)
 34 | 
 35 |         # 서비스들 초기화
 36 |         self.metadata_service = MetadataService(self.client)
 37 |         self.query_service = QueryService(self.client)
 38 |         self.glossary_service = GlossaryService(self.client)
 39 | 
 40 |         # 기존 속성들 호환성을 위해 유지
 41 |         self.gms_server = gms_server
 42 |         self.emitter = self.client.emitter
 43 |         self.datahub_graph = self.client.datahub_graph
 44 | 
 45 |     # === 기존 인터페이스 유지 - 메타데이터 관련 ===
 46 | 
 47 |     def get_urns(self):
 48 |         """필터를 적용하여 데이터셋의 URN 가져오기"""
 49 |         return self.client.get_urns()
 50 | 
 51 |     def get_table_name(self, urn):
 52 |         """URN에 대한 테이블 이름 가져오기"""
 53 |         return self.metadata_service.get_table_name(urn)
 54 | 
 55 |     def get_table_description(self, urn):
 56 |         """URN에 대한 테이블 설명 가져오기"""
 57 |         return self.metadata_service.get_table_description(urn)
 58 | 
 59 |     def get_column_names_and_descriptions(self, urn):
 60 |         """URN에 대한 컬럼 이름 및 설명 가져오기"""
 61 |         return self.metadata_service.get_column_names_and_descriptions(urn)
 62 | 
 63 |     def get_table_lineage(
 64 |         self, urn, counts=100, direction="DOWNSTREAM", degree_values=None
 65 |     ):
 66 |         """URN에 대한 DOWNSTREAM/UPSTREAM lineage entity를 counts 만큼 가져오는 함수"""
 67 |         return self.metadata_service.get_table_lineage(
 68 |             urn, counts, direction, degree_values
 69 |         )
 70 | 
 71 |     def get_column_lineage(self, urn):
 72 |         """URN에 대한 UPSTREAM lineage의 column source를 가져오는 함수"""
 73 |         return self.metadata_service.get_column_lineage(urn)
 74 | 
 75 |     def min_degree_lineage(self, lineage_result):
 76 |         """lineage 중 최소 degree만 가져오는 함수"""
 77 |         return self.metadata_service.min_degree_lineage(lineage_result)
 78 | 
 79 |     def build_table_metadata(self, urn, max_degree=2, sort_by_degree=True):
 80 |         """테이블 단위로 테이블 이름, 설명, 컬럼, 테이블 별 리니지(downstream/upstream), 컬럼 별 리니지(upstream)이 포함된 메타데이터 생성 함수"""
 81 |         return self.metadata_service.build_table_metadata(
 82 |             urn, max_degree, sort_by_degree
 83 |         )
 84 | 
 85 |     def get_urn_info(self, urn):
 86 |         """특정 URN에 대한 모든 관련 정보를 가져오는 함수"""
 87 |         return self.metadata_service.get_urn_info(urn)
 88 | 
 89 |     def _print_urn_details(self, metadata):
 90 |         """URN 메타데이터를 보기 좋게 출력하는 내부 함수"""
 91 |         return self.metadata_service._print_urn_details(metadata)
 92 | 
 93 |     # === 기존 인터페이스 유지 - 용어집 관련 ===
 94 | 
 95 |     def get_root_glossary_nodes(self):
 96 |         """DataHub에서 루트 용어집 노드를 가져오는 함수"""
 97 |         return self.glossary_service.get_root_glossary_nodes()
 98 | 
 99 |     def get_glossary_node_by_urn(self, urn):
100 |         """DataHub에서 특정 URN의 용어집 노드 및 그 자식 항목을 가져오는 함수"""
101 |         return self.glossary_service.get_glossary_node_by_urn(urn)
102 | 
103 |     def get_node_basic_info(self, node, index):
104 |         """용어집 노드의 기본 정보를 딕셔너리로 반환하는 함수"""
105 |         return self.glossary_service.get_node_basic_info(node, index)
106 | 
107 |     def get_child_entity_info(self, entity, index):
108 |         """자식 엔티티(용어 또는 노드)의 정보를 딕셔너리로 반환하는 함수"""
109 |         return self.glossary_service.get_child_entity_info(entity, index)
110 | 
111 |     def process_node_details(self, node):
112 |         """노드의 상세 정보를 처리하고 딕셔너리로 반환하는 함수"""
113 |         return self.glossary_service.process_node_details(node)
114 | 
115 |     def process_glossary_nodes(self, result):
116 |         """용어집 노드 결과를 처리하고 딕셔너리로 반환하는 함수"""
117 |         return self.glossary_service.process_glossary_nodes(result)
118 | 
119 |     def get_glossary_data(self):
120 |         """DataHub에서 전체 용어집 데이터를 가져와 처리하는 함수"""
121 |         return self.glossary_service.get_glossary_data()
122 | 
123 |     def get_queries(self, start=0, count=10, query="*", filters=None):
124 |         """DataHub에서 쿼리 목록을 가져오는 함수"""
125 |         return self.query_service.get_queries(start, count, query, filters)
126 | 
127 |     def process_queries(self, result):
128 |         """쿼리 목록 결과를 처리하고 간소화된 형태로 반환하는 함수"""
129 |         return self.query_service.process_queries(result)
130 | 
131 |     def get_query_data(self, start=0, count=10, query="*", filters=None):
132 |         """DataHub에서 쿼리 목록을 가져와 처리하는 함수"""
133 |         return self.query_service.get_query_data(start, count, query, filters)
134 | 
135 |     def get_queries_by_urn(self, dataset_urn):
136 |         """특정 데이터셋 URN과 연관된 쿼리들을 조회하는 함수"""
137 |         return self.query_service.get_queries_by_urn(dataset_urn)
138 | 
139 |     def get_glossary_terms_by_urn(self, dataset_urn):
140 |         """특정 데이터셋 URN의 glossary terms를 조회하는 함수"""
141 |         return self.glossary_service.get_glossary_terms_by_urn(dataset_urn)
142 | 
143 |     def _is_valid_gms_server(self, gms_server):
144 |         """GMS 서버 주소의 유효성을 검사하는 함수 (하위 호환성)"""
145 |         return self.client._is_valid_gms_server(gms_server)
146 | 


--------------------------------------------------------------------------------