├── .env.example ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── PUBLISHING.md ├── README.md ├── assets └── teddynote-parser.png ├── docker-compose.yml ├── example └── parse_pdf.ipynb ├── pyproject.toml ├── requirements.txt ├── scripts └── publish.sh └── teddynote_parser_client ├── __init__.py ├── __main__.py ├── cli.py ├── client.py └── tests ├── __init__.py └── test_client.py /.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY=sk-proj-o0gulL2J2aJ... 2 | UPSTAGE_API_KEY=up_VStYIPg7... 3 | PORT=9990 4 | DATA_VOLUME=./data 5 | RESULT_VOLUME=./result 6 | UPLOADS_VOLUME=./uploads 7 | 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | data/ 29 | .env 30 | 31 | # PyInstaller 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # IPython 78 | profile_default/ 79 | ipython_config.py 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # Environments 91 | .env 92 | .venv 93 | env/ 94 | venv/ 95 | ENV/ 96 | env.bak/ 97 | venv.bak/ 98 | 99 | # Spyder project settings 100 | .spyderproject 101 | .spyproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # mkdocs documentation 107 | /site 108 | 109 | # mypy 110 | .mypy_cache/ 111 | .dmypy.json 112 | dmypy.json 113 | 114 | # Pyre type checker 115 | .pyre/ 116 | 117 | # Editor settings 118 | .vscode/ 119 | .idea/ 120 | 121 | # Project specific 122 | parser_results/ 123 | example_results/ 124 | client_test_results/ 125 | *_results/ 126 | *.zip 127 | 128 | # UV specific 129 | uv.lock -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 테디노트 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | include pyproject.toml 4 | include MANIFEST.in 5 | recursive-exclude teddynote_parser_client/tests * -------------------------------------------------------------------------------- /PUBLISHING.md: -------------------------------------------------------------------------------- 1 | # PyPI 패키지 게시 가이드 2 | 3 | 이 문서는 `teddynote-parser-client` 패키지를 PyPI에 게시하는 방법을 설명합니다. 4 | 5 | ## 사전 준비 6 | 7 | 1. [PyPI](https://pypi.org/) 계정이 필요합니다. 8 | 2. [TestPyPI](https://test.pypi.org/) 계정도 있으면 좋습니다(테스트 업로드용). 9 | 3. `build`와 `twine` 패키지가 설치되어 있어야 합니다: 10 | 11 | ```bash 12 | pip install --upgrade pip build twine 13 | ``` 14 | 15 | 4. PyPI 인증 정보를 설정합니다. 다음 파일을 만들어 주세요: `~/.pypirc` 16 | 17 | ``` 18 | [distutils] 19 | index-servers = 20 | pypi 21 | testpypi 22 | 23 | [pypi] 24 | username = your_username 25 | password = your_password 26 | 27 | [testpypi] 28 | repository = https://test.pypi.org/legacy/ 29 | username = your_username 30 | password = your_password 31 | ``` 32 | 33 | 대신 `twine`을 실행할 때 `--username`과 `--password` 옵션을 사용하거나 업로드 시 자격 증명을 입력할 수도 있습니다. 34 | 35 | ## 패키지 빌드 및 게시 36 | 37 | ### 자동 스크립트 사용 38 | 39 | 제공된 스크립트를 사용하여 패키지 빌드 및 배포를 자동화할 수 있습니다: 40 | 41 | ```bash 42 | ./scripts/publish.sh 43 | ``` 44 | 45 | ### 수동 게시 46 | 47 | 1. 기존 빌드 정리: 48 | 49 | ```bash 50 | rm -rf build/ dist/ *.egg-info/ 51 | ``` 52 | 53 | 2. 패키지 빌드: 54 | 55 | ```bash 56 | python -m build 57 | ``` 58 | 59 | 3. 패키지 확인: 60 | 61 | ```bash 62 | twine check dist/* 63 | ``` 64 | 65 | 4. TestPyPI에 업로드 (선택 사항): 66 | 67 | ```bash 68 | twine upload --repository testpypi dist/* 69 | ``` 70 | 71 | 5. 실제 PyPI에 업로드: 72 | 73 | ```bash 74 | twine upload dist/* 75 | ``` 76 | 77 | ## 버전 업데이트 78 | 79 | 새 버전을 배포하기 전에 다음 파일에서 버전 번호를 업데이트하세요: 80 | 81 | - `teddynote_parser_client/__init__.py`의 `__version__` 변수 82 | - `pyproject.toml`의 `version` 필드 83 | 84 | ## 확인 85 | 86 | 패키지가 성공적으로 게시되었는지 확인하려면: 87 | 88 | 1. TestPyPI에서 설치: 89 | 90 | ```bash 91 | pip install --index-url https://test.pypi.org/simple/ teddynote-parser-client 92 | ``` 93 | 94 | 2. 실제 PyPI에서 설치: 95 | 96 | ```bash 97 | pip install teddynote-parser-client 98 | ``` 99 | 100 | 패키지를 설치한 후 다음을 실행하여 작동하는지 확인하세요: 101 | 102 | ```python 103 | from teddynote_parser_client import TeddyNoteParserClient 104 | print(TeddyNoteParserClient.__doc__) 105 | ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TeddyNote Parser Client 2 | 3 | TeddyNote Parser API를 사용하기 위한 Python 클라이언트 라이브러리입니다. 4 | 5 | > Parser 구조 예시 6 | ![](./assets/teddynote-parser.png) 7 | 8 | ## 바로가기 9 | 10 | - [예제 코드: PDF 파싱 실습 노트북](example/parse_pdf.ipynb) 11 | - [API 서버 실행 가이드](#teddynote-parser-api-서버-실행하기) 12 | - [클라이언트 설치 방법](#설치) 13 | - [클라이언트 사용 예시](#사용-방법) 14 | - [API 참조 문서](#api-참조) 15 | 16 | ## TeddyNote Parser API 서버 실행하기 17 | 18 | 먼저 TeddyNote Parser API 서버를 Docker Compose를 사용하여 실행합니다. 19 | 20 | ### 1. 환경 변수 설정 21 | 22 | 제공된 `.env.example` 파일의 이름을 `.env`로 변경하고 필요한 값들을 설정합니다. 23 | 24 | ``` 25 | # .env.example 파일의 이름을 .env로 변경 26 | # Windows: .env.example 파일을 마우스 오른쪽 버튼으로 클릭하고 '이름 바꾸기' 선택 27 | # macOS: .env.example 파일을 선택하고 Enter 키를 눌러 이름 변경 28 | ``` 29 | 30 | `.env` 파일을 열고 다음 변수들을 설정합니다. 31 | 32 | ``` 33 | PORT=9997 # API 서버 포트 34 | UPSTAGE_API_KEY=your-key-here # UPSTAGE API 키 35 | OPENAI_API_KEY=your-key-here # OpenAI API 키 36 | 37 | # 볼륨 경로 설정 (선택 사항, 절대 경로 권장) 38 | # macOS 예시 (Documents 폴더) 39 | DATA_VOLUME=/Users/username/Documents/teddynote/data 40 | RESULT_VOLUME=/Users/username/Documents/teddynote/result 41 | UPLOADS_VOLUME=/Users/username/Documents/teddynote/uploads 42 | 43 | # Windows 예시 (Documents 폴더): 44 | # DATA_VOLUME=C:/Users/username/Documents/teddynote/data 45 | # RESULT_VOLUME=C:/Users/username/Documents/teddynote/result 46 | # UPLOADS_VOLUME=C:/Users/username/Documents/teddynote/uploads 47 | 48 | # 또는 상대 경로 사용 (기본값): 49 | # DATA_VOLUME=./data 50 | # RESULT_VOLUME=./result 51 | # UPLOADS_VOLUME=./uploads 52 | ``` 53 | 54 | > 참고: 볼륨 경로에 절대 경로를 사용하면 Docker 볼륨 매핑 문제를 방지할 수 있습니다. 특히 Windows에서는 경로 구분자로 역슬래시(`\`) 대신 슬래시(`/`)를 사용해야 합니다. 사용하기 전에 Documents 폴더 아래에 teddynote/data, teddynote/result, teddynote/uploads 폴더를 미리 생성해 두세요. 55 | 56 | ### 2. Docker Compose로 서버 실행 57 | 58 | ```bash 59 | # 서버 시작 60 | docker-compose up -d 61 | ``` 62 | 63 | ### 3. 서버 작동 확인 64 | 65 | > 참고: PORT 번호는 `.env` 파일에서 지정한 포트번호를 사용합니다. 66 | 67 | 브라우저에서 `http://localhost:9990/health`에 접속하거나 다음 명령어로 확인할 수 있습니다: 68 | 69 | ```bash 70 | curl http://localhost:9990/health 71 | ``` 72 | 73 | ## 설치 74 | 75 | ```bash 76 | pip install teddynote-parser-client 77 | ``` 78 | 79 | ## 사용 방법 80 | 81 | > 💡 **Jupyter Notebook 예제**: 실제 사용 예시는 [example/parse_pdf.ipynb](example/parse_pdf.ipynb) 노트북에서 확인할 수 있습니다. 이 노트북은 PDF 파일 파싱 과정을 단계별로 보여줍니다. 82 | 83 | ### 1. 환경 변수 설정 84 | 85 | API 키를 환경 변수로 설정할 수 있습니다. 86 | 87 | ```bash 88 | export UPSTAGE_API_KEY="your-upstage-api-key" 89 | export OPENAI_API_KEY="your-openai-api-key" 90 | ``` 91 | 92 | 또는 `.env` 파일을 만들고 다음과 같이 설정할 수 있습니다. 93 | 94 | ``` 95 | UPSTAGE_API_KEY=your-upstage-api-key 96 | OPENAI_API_KEY=your-openai-api-key 97 | ``` 98 | 99 | 그런 다음 Python 코드에서: 100 | 101 | ```python 102 | import os 103 | from dotenv import load_dotenv 104 | 105 | load_dotenv() # .env 파일에서 환경 변수 로드 106 | ``` 107 | 108 | ### 2. 클라이언트 사용 예시 109 | 110 | ```python 111 | from teddynote_parser_client import TeddyNoteParserClient 112 | import logging 113 | 114 | # 로깅 설정 115 | logging.basicConfig(level=logging.INFO) 116 | 117 | # 클라이언트 초기화 118 | client = TeddyNoteParserClient( 119 | api_url="http://your-api-url:9997", 120 | upstage_api_key="your-upstage-api-key", # 또는 환경 변수에서 자동 로드 121 | openai_api_key="your-openai-api-key", # 또는 환경 변수에서 자동 로드 122 | language="Korean", 123 | include_image=True, 124 | batch_size=30, 125 | ) 126 | 127 | # 1. API 서버 건강 상태 확인 128 | health_status = client.health_check() 129 | print(f"API 서버 상태: {health_status}") 130 | 131 | # 2. PDF 파일 파싱 요청 132 | parse_result = client.parse_pdf("path/to/your/document.pdf") 133 | job_id = parse_result["job_id"] 134 | print(f"파싱 작업 시작됨. 작업 ID: {job_id}") 135 | 136 | # 3. 작업 완료 대기 137 | job_status = client.wait_for_job_completion(job_id) 138 | print(f"작업 완료. 상태: {job_status['status']}") 139 | 140 | # 4. 결과 다운로드 141 | zip_path, extract_path = client.download_result( 142 | job_id, save_dir="results", extract=True 143 | ) 144 | print(f"결과 다운로드 완료: {zip_path}") 145 | print(f"압축 해제 디렉토리: {extract_path}") 146 | ``` 147 | 148 | ### 3. 명령줄 인터페이스 사용 예시 149 | 150 | 이 라이브러리는 명령줄 인터페이스도 제공합니다: 151 | 152 | ```bash 153 | # 도움말 보기 154 | teddynote-parser --help 155 | 156 | # API 서버 건강 상태 확인 157 | teddynote-parser health 158 | 159 | # PDF 파일 파싱 요청 160 | teddynote-parser parse path/to/your/document.pdf --wait --download --extract 161 | 162 | # 작업 상태 확인 163 | teddynote-parser status your-job-id 164 | 165 | # 결과 다운로드 166 | teddynote-parser download your-job-id --extract 167 | 168 | # 모든 작업 목록 조회 169 | teddynote-parser jobs 170 | ``` 171 | 172 | ## API 참조 173 | 174 | ### TeddyNoteParserClient 클래스 175 | 176 | ```python 177 | class TeddyNoteParserClient: 178 | def __init__( 179 | self, 180 | api_url: str = "http://localhost:9990", 181 | upstage_api_key: Optional[str] = None, 182 | openai_api_key: Optional[str] = None, 183 | language: str = "Korean", 184 | include_image: bool = True, 185 | batch_size: int = 30, 186 | test_page: Optional[int] = None, 187 | timeout: int = 60, 188 | logger: Optional[logging.Logger] = None, 189 | ): 190 | """ 191 | TeddyNote Parser API 클라이언트 초기화 192 | 193 | Args: 194 | api_url: API 서버 URL (기본값: http://localhost:9990) 195 | upstage_api_key: UPSTAGE API 키 (환경 변수에서 가져올 수 있음) 196 | openai_api_key: OpenAI API 키 (환경 변수에서 가져올 수 있음) 197 | language: 문서 언어 (기본값: Korean) 198 | include_image: 파싱 결과에 이미지 포함 여부 (기본값: True) 199 | batch_size: 처리할 PDF 페이지의 배치 크기 (기본값: 30) 200 | test_page: 처리할 최대 페이지 수 (처음부터 지정한 페이지까지만 처리, 기본값: None - 모든 페이지 처리) 201 | timeout: API 요청 제한시간 (초 단위, 기본값: 60초) 202 | logger: 로깅에 사용할 로거 인스턴스 (기본값: None) 203 | """ 204 | ``` 205 | 206 | ### 주요 메소드 207 | 208 | - `health_check()`: API 서버 건강 상태 확인 209 | - `parse_pdf(pdf_path, ...)`: PDF 파일 파싱 요청 210 | - `get_job_status(job_id)`: 작업 상태 확인 211 | - `wait_for_job_completion(job_id, ...)`: 작업 완료 대기 212 | - `download_result(job_id, ...)`: 작업 결과 다운로드 213 | - `list_all_jobs()`: 모든 작업 목록 조회 214 | 215 | ## 라이선스 216 | 217 | MIT License 218 | 219 | Copyright (c) 2023 테디노트 220 | 221 | Permission is hereby granted, free of charge, to any person obtaining a copy 222 | of this software and associated documentation files (the "Software"), to deal 223 | in the Software without restriction, including without limitation the rights 224 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 225 | copies of the Software, and to permit persons to whom the Software is 226 | furnished to do so, subject to the following conditions: 227 | 228 | The above copyright notice and this permission notice shall be included in all 229 | copies or substantial portions of the Software. 230 | 231 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 232 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 233 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 234 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 235 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 236 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 237 | SOFTWARE. 238 | -------------------------------------------------------------------------------- /assets/teddynote-parser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teddylee777/teddynote-parser-api-client/eada6abde205426c8c3523ef3f6683a916e9bad1/assets/teddynote-parser.png -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | teddynote-parser-api: 5 | build: 6 | context: . 7 | dockerfile: Dockerfile 8 | image: teddylee777/teddynote-parser-api:v0.2.0 9 | container_name: teddynote-parser-api 10 | restart: unless-stopped 11 | ports: 12 | - "${PORT}:${PORT}" 13 | volumes: 14 | - ${DATA_VOLUME:-./data}:/app/data 15 | - ${RESULT_VOLUME:-./result}:/app/result 16 | - ${UPLOADS_VOLUME:-./uploads}:/app/uploads 17 | - ./.env:/app/.env 18 | environment: 19 | - UPSTAGE_API_KEY=${UPSTAGE_API_KEY} 20 | - OPENAI_API_KEY=${OPENAI_API_KEY} 21 | - TZ=Asia/Seoul 22 | - PORT=${PORT} 23 | - RUN_MODE=api 24 | - DATA_VOLUME=${DATA_VOLUME} 25 | - RESULT_VOLUME=${RESULT_VOLUME} 26 | - UPLOADS_VOLUME=${UPLOADS_VOLUME} 27 | healthcheck: 28 | test: ["CMD", "curl", "-f", "http://localhost:${PORT}/health"] 29 | interval: 30s 30 | timeout: 10s 31 | retries: 3 32 | start_period: 5s 33 | deploy: 34 | resources: 35 | limits: 36 | cpus: '2.0' 37 | memory: 4G 38 | reservations: 39 | cpus: '0.5' 40 | memory: 1G 41 | command: ["api"] -------------------------------------------------------------------------------- /example/parse_pdf.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# TeddyNote Parser API 클라이언트 테스트\n", 8 | "\n", 9 | "이 노트북은 `parser_client.py`에 구현된 `TeddyNoteParserClient` 클래스를 사용하여 TeddyNote Parser API와 상호작용하는 과정을 보여줍니다.\n", 10 | "\n", 11 | "주요 단계:\n", 12 | "1. 클라이언트 초기화 및 API 연결 확인\n", 13 | "2. PDF 파일 업로드 및 파싱 작업 요청\n", 14 | "3. 작업 상태 확인\n", 15 | "4. 완료된 파싱 결과 다운로드" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 3, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "%%capture\n", 25 | "!pip install -U teddynote-parser-api pandas langchain" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "import time\n", 35 | "\n", 36 | "import pandas as pd\n", 37 | "from IPython.display import display" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "## 환경 설정" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 2, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "import os\n", 54 | "from pathlib import Path\n", 55 | "\n", 56 | "# API 서버 설정\n", 57 | "API_URL = \"http://localhost:9990\" # API 서버 주소\n", 58 | "\n", 59 | "# PDF 파일 경로\n", 60 | "PDF_FILE_PATH = \"../data/2210.03629v3.pdf\" # PDF 파일 경로\n", 61 | "\n", 62 | "# 결과 저장 디렉토리\n", 63 | "OUTPUT_DIR = \"parsing_outputs\"\n", 64 | "RESULTS_DIR = Path(OUTPUT_DIR)\n", 65 | "RESULTS_DIR.mkdir(exist_ok=True) # 결과 저장 디렉토리 생성\n", 66 | "\n", 67 | "# API 키 설정 (환경 변수에서 로드하거나 직접 지정)\n", 68 | "UPSTAGE_API_KEY = os.environ.get(\"UPSTAGE_API_KEY\", \"your_upstage_api_key\")\n", 69 | "OPENAI_API_KEY = os.environ.get(\"OPENAI_API_KEY\", \"your_openai_api_key\")" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "## 1. 클라이언트 초기화 및 설정" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 3, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "import logging\n", 86 | "from teddynote_parser_client.client import TeddyNoteParserClient\n", 87 | "\n", 88 | "# 로깅 설정\n", 89 | "logging.basicConfig(\n", 90 | " level=logging.INFO, format=\"%(asctime)s - %(name)s - %(levelname)s - %(message)s\"\n", 91 | ")\n", 92 | "logger = logging.getLogger(\"teddynote_parser_client\")\n", 93 | "\n", 94 | "# TeddyNoteParserClient 인스턴스 생성\n", 95 | "client = TeddyNoteParserClient(\n", 96 | " api_url=API_URL, # API 서버 URL\n", 97 | " upstage_api_key=UPSTAGE_API_KEY, # UPSTAGE LLM API 키\n", 98 | " openai_api_key=OPENAI_API_KEY, # OpenAI API 키 (대체 LLM으로 사용)\n", 99 | " batch_size=50, # 한 번에 처리할 PDF 페이지 수\n", 100 | " test_page=None, # 테스트용 페이지 제한 (None: 전체 페이지 처리)\n", 101 | " language=\"Korean\", # 문서 언어 설정\n", 102 | " include_image=True, # 결과에 이미지 포함 여부\n", 103 | " logger=logger, # 로깅에 사용할 로거\n", 104 | ")" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "서버에 정상 동작중인지 확인합니다." 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 4, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "name": "stderr", 121 | "output_type": "stream", 122 | "text": [ 123 | "2025-03-10 04:32:08,573 - teddynote_parser_client - INFO - API 서버가 정상적으로 응답했습니다.\n" 124 | ] 125 | }, 126 | { 127 | "name": "stdout", 128 | "output_type": "stream", 129 | "text": [ 130 | "✅ API 서버가 정상적으로 실행 중입니다. 상태: {'status': 'ok', 'timestamp': '2025-03-10T04:32:08.571103'}\n" 131 | ] 132 | } 133 | ], 134 | "source": [ 135 | "# API 서버 상태 확인\n", 136 | "try:\n", 137 | " health_status = client.health_check()\n", 138 | " print(f\"✅ API 서버가 정상적으로 실행 중입니다. 상태: {health_status}\")\n", 139 | " api_available = True\n", 140 | "except Exception as e:\n", 141 | " print(f\"❌ API 서버에 접속할 수 없습니다: {e}\")\n", 142 | " print(\"API 서버가 실행 중인지 확인해주세요.\")\n", 143 | " api_available = False" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "## 2. PDF 파일 업로드 및 파싱 작업 요청" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 5, 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "name": "stderr", 160 | "output_type": "stream", 161 | "text": [ 162 | "2025-03-10 04:32:11,037 - teddynote_parser_client - INFO - 파일 '2210.03629v3.pdf'에 대한 파싱 작업 요청 중...\n", 163 | "2025-03-10 04:32:11,037 - teddynote_parser_client - INFO - 파싱 옵션: 언어=English, 이미지 포함=True, 배치 크기=50, 처리 페이지 수=5\n", 164 | "2025-03-10 04:32:11,056 - teddynote_parser_client - INFO - 파싱 작업이 시작되었습니다. 작업 ID: e6e8e9b5-3e65-4298-ad17-cad6c03829f9\n" 165 | ] 166 | }, 167 | { 168 | "name": "stdout", 169 | "output_type": "stream", 170 | "text": [ 171 | "📄 파일 '../data/2210.03629v3.pdf'에 대한 파싱 작업을 요청합니다...\n", 172 | "✅ 파싱 작업이 시작되었습니다!\n", 173 | "📝 작업 ID: e6e8e9b5-3e65-4298-ad17-cad6c03829f9\n", 174 | "📝 상태: pending\n", 175 | "📝 메시지: PDF 파싱 작업이 시작되었습니다.\n" 176 | ] 177 | } 178 | ], 179 | "source": [ 180 | "# PDF 파일 파싱 요청\n", 181 | "if api_available:\n", 182 | " try:\n", 183 | " print(f\"📄 파일 '{PDF_FILE_PATH}'에 대한 파싱 작업을 요청합니다...\")\n", 184 | " # pdf_path: 파싱할 PDF 파일 경로\n", 185 | " # batch_size: 한 번에 처리할 페이지 수(기본값: 30, 최대값: 100)\n", 186 | " # language: Entity 파싱 언어\n", 187 | " # test_page: 테스트용으로 처음 ~ 지정한 페이지까지만 처리(기본값: None - 모든 페이지 처리)\n", 188 | " # include_image: 결과에 이미지 포함 여부(기본값: True)\n", 189 | " parse_result = client.parse_pdf(\n", 190 | " pdf_path=PDF_FILE_PATH,\n", 191 | " batch_size=50,\n", 192 | " language=\"English\",\n", 193 | " test_page=5,\n", 194 | " include_image=True,\n", 195 | " )\n", 196 | "\n", 197 | " job_id = parse_result[\"job_id\"]\n", 198 | " print(f\"✅ 파싱 작업이 시작되었습니다!\")\n", 199 | " print(f\"📝 작업 ID: {job_id}\")\n", 200 | " print(f\"📝 상태: {parse_result['status']}\")\n", 201 | " print(f\"📝 메시지: {parse_result['message']}\")\n", 202 | " except Exception as e:\n", 203 | " print(f\"❌ 파싱 작업 요청 실패: {e}\")\n", 204 | " job_id = None" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "## 3. 작업 상태 확인" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 6, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "name": "stderr", 221 | "output_type": "stream", 222 | "text": [ 223 | "2025-03-10 04:32:13,443 - teddynote_parser_client - INFO - 작업 ID 'e6e8e9b5-3e65-4298-ad17-cad6c03829f9'의 완료 대기 중...\n", 224 | "2025-03-10 04:32:13,444 - teddynote_parser_client - INFO - 상태 확인 간격: 2초, 최대 시도 횟수: 30회\n", 225 | "2025-03-10 04:32:13,450 - teddynote_parser_client - INFO - 작업 ID 'e6e8e9b5-3e65-4298-ad17-cad6c03829f9'의 현재 상태: processing\n" 226 | ] 227 | }, 228 | { 229 | "name": "stdout", 230 | "output_type": "stream", 231 | "text": [ 232 | "작업 ID: e6e8e9b5-3e65-4298-ad17-cad6c03829f9에 대한 상태를 확인하고 완료될 때까지 대기합니다...\n" 233 | ] 234 | }, 235 | { 236 | "name": "stderr", 237 | "output_type": "stream", 238 | "text": [ 239 | "2025-03-10 04:32:15,461 - teddynote_parser_client - INFO - 작업 ID 'e6e8e9b5-3e65-4298-ad17-cad6c03829f9'의 현재 상태: processing\n", 240 | "2025-03-10 04:32:17,470 - teddynote_parser_client - INFO - 작업 ID 'e6e8e9b5-3e65-4298-ad17-cad6c03829f9'의 현재 상태: processing\n", 241 | "2025-03-10 04:32:19,481 - teddynote_parser_client - INFO - 작업 ID 'e6e8e9b5-3e65-4298-ad17-cad6c03829f9'의 현재 상태: processing\n", 242 | "2025-03-10 04:32:21,487 - teddynote_parser_client - INFO - 작업 ID 'e6e8e9b5-3e65-4298-ad17-cad6c03829f9'의 현재 상태: completed\n", 243 | "2025-03-10 04:32:21,488 - teddynote_parser_client - INFO - 작업 ID 'e6e8e9b5-3e65-4298-ad17-cad6c03829f9'가 completed 상태로 완료되었습니다.\n" 244 | ] 245 | }, 246 | { 247 | "name": "stdout", 248 | "output_type": "stream", 249 | "text": [ 250 | "\n", 251 | "작업 최종 정보:\n", 252 | "📝 파일명: 2210.03629v3.pdf\n", 253 | "📝 생성 시간: 2025-03-10 04:32:11\n", 254 | "📝 완료 시간: 2025-03-10 04:32:21\n", 255 | "📝 처리 시간: 10.36초\n", 256 | "📝 ZIP 파일: result/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_20250310_043221.zip\n" 257 | ] 258 | } 259 | ], 260 | "source": [ 261 | "# 작업 상태 확인 및 완료 대기\n", 262 | "if \"job_id\" in locals() and job_id:\n", 263 | " print(f\"작업 ID: {job_id}에 대한 상태를 확인하고 완료될 때까지 대기합니다...\")\n", 264 | "\n", 265 | " try:\n", 266 | " # 비동기 작업 완료 대기 (최대 60초, 2초 간격으로 확인)\n", 267 | " final_status = client.wait_for_job_completion(\n", 268 | " job_id, check_interval=2, max_attempts=30\n", 269 | " )\n", 270 | "\n", 271 | " if final_status[\"status\"] == \"completed\":\n", 272 | " print(\"\\n작업 최종 정보:\")\n", 273 | " print(f\"📝 파일명: {final_status['filename']}\")\n", 274 | " print(\n", 275 | " f\"📝 생성 시간: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(final_status['created_at']))}\"\n", 276 | " )\n", 277 | " print(\n", 278 | " f\"📝 완료 시간: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(final_status['completed_at']))}\"\n", 279 | " )\n", 280 | " print(\n", 281 | " f\"📝 처리 시간: {final_status['completed_at'] - final_status['created_at']:.2f}초\"\n", 282 | " )\n", 283 | " print(f\"📝 ZIP 파일: {final_status['zip_filename']}\")\n", 284 | " elif final_status[\"status\"] == \"failed\":\n", 285 | " print(f\"❌ 작업 실패: {final_status.get('error', '알 수 없는 오류')}\")\n", 286 | " except TimeoutError as e:\n", 287 | " print(f\"⚠️ 작업 대기 시간 초과: {e}\")\n", 288 | " except Exception as e:\n", 289 | " print(f\"❌ 작업 상태 확인 중 오류 발생: {e}\")" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "## 4. 파싱 결과 다운로드" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 7, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "name": "stderr", 306 | "output_type": "stream", 307 | "text": [ 308 | "2025-03-10 04:36:55,719 - teddynote_parser_client - INFO - 작업 ID 'e6e8e9b5-3e65-4298-ad17-cad6c03829f9'의 현재 상태: completed\n", 309 | "2025-03-10 04:36:55,720 - teddynote_parser_client - INFO - 작업 ID 'e6e8e9b5-3e65-4298-ad17-cad6c03829f9'의 결과 다운로드 중...\n", 310 | "2025-03-10 04:36:55,730 - teddynote_parser_client - INFO - 결과가 성공적으로 다운로드되었습니다: parsing_outputs/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_20250310043655.zip\n", 311 | "2025-03-10 04:36:55,735 - teddynote_parser_client - INFO - ZIP 파일 'parsing_outputs/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_20250310043655.zip'의 압축을 'parsing_outputs/e6e8e9b5-3e65-4298-ad17-cad6c03829f9'에 해제했습니다.\n", 312 | "2025-03-10 04:36:55,736 - teddynote_parser_client - INFO - ZIP 파일의 압축이 성공적으로 해제되었습니다: parsing_outputs/e6e8e9b5-3e65-4298-ad17-cad6c03829f9\n" 313 | ] 314 | }, 315 | { 316 | "name": "stdout", 317 | "output_type": "stream", 318 | "text": [ 319 | "✅ 파싱 결과가 성공적으로 다운로드되었습니다: parsing_outputs/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_20250310043655.zip\n", 320 | "✅ ZIP 파일이 성공적으로 압축 해제되었습니다: parsing_outputs/e6e8e9b5-3e65-4298-ad17-cad6c03829f9\n", 321 | "\n", 322 | "파싱 결과 디렉토리: parsing_outputs/e6e8e9b5-3e65-4298-ad17-cad6c03829f9\n" 323 | ] 324 | } 325 | ], 326 | "source": [ 327 | "# 파싱 결과 다운로드 및 압축 해제\n", 328 | "if (\n", 329 | " \"job_id\" in locals()\n", 330 | " and job_id\n", 331 | " and \"final_status\" in locals()\n", 332 | " and final_status[\"status\"] == \"completed\"\n", 333 | "):\n", 334 | " try:\n", 335 | " # 결과 다운로드 및 압축 해제\n", 336 | " zip_path, extract_path = client.download_result(\n", 337 | " job_id=job_id, save_dir=RESULTS_DIR, extract=True, overwrite=True\n", 338 | " )\n", 339 | "\n", 340 | " print(f\"✅ 파싱 결과가 성공적으로 다운로드되었습니다: {zip_path}\")\n", 341 | " print(f\"✅ ZIP 파일이 성공적으로 압축 해제되었습니다: {extract_path}\")\n", 342 | " print(f\"\\n파싱 결과 디렉토리: {extract_path}\")\n", 343 | " except Exception as e:\n", 344 | " print(f\"❌ 결과 다운로드 중 오류 발생: {e}\")\n", 345 | " extract_path = None" 346 | ] 347 | }, 348 | { 349 | "cell_type": "markdown", 350 | "metadata": {}, 351 | "source": [ 352 | "## 5. 작업 목록 조회" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 8, 358 | "metadata": {}, 359 | "outputs": [ 360 | { 361 | "name": "stderr", 362 | "output_type": "stream", 363 | "text": [ 364 | "2025-03-10 04:36:57,096 - teddynote_parser_client - INFO - 모든 작업 목록 조회 중...\n", 365 | "2025-03-10 04:36:57,100 - teddynote_parser_client - INFO - 총 1 개의 작업이 조회되었습니다.\n" 366 | ] 367 | }, 368 | { 369 | "name": "stdout", 370 | "output_type": "stream", 371 | "text": [ 372 | "📋 총 1개의 작업이 있습니다.\n", 373 | "\n" 374 | ] 375 | }, 376 | { 377 | "data": { 378 | "text/html": [ 379 | "
\n", 380 | "\n", 393 | "\n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | "
Job IDStatusFilenameCreated AtCompleted At
0e6e8e9b5-3e65-4298-ad17-cad6c03829f9completed2210.03629v3.pdf2025-03-10 04:32:112025-03-10 04:32:21
\n", 415 | "
" 416 | ], 417 | "text/plain": [ 418 | " Job ID Status Filename \\\n", 419 | "0 e6e8e9b5-3e65-4298-ad17-cad6c03829f9 completed 2210.03629v3.pdf \n", 420 | "\n", 421 | " Created At Completed At \n", 422 | "0 2025-03-10 04:32:11 2025-03-10 04:32:21 " 423 | ] 424 | }, 425 | "metadata": {}, 426 | "output_type": "display_data" 427 | } 428 | ], 429 | "source": [ 430 | "# 작업 목록 조회\n", 431 | "if api_available:\n", 432 | " try:\n", 433 | " jobs = client.list_all_jobs()[\"jobs\"]\n", 434 | "\n", 435 | " print(f\"📋 총 {len(jobs)}개의 작업이 있습니다.\\n\")\n", 436 | "\n", 437 | " # 작업 정보를 DataFrame으로 변환\n", 438 | " if jobs:\n", 439 | " job_data = []\n", 440 | " for job in jobs:\n", 441 | " created_at = time.strftime(\n", 442 | " \"%Y-%m-%d %H:%M:%S\", time.localtime(job[\"created_at\"])\n", 443 | " )\n", 444 | " completed_at = (\n", 445 | " time.strftime(\n", 446 | " \"%Y-%m-%d %H:%M:%S\", time.localtime(job[\"completed_at\"])\n", 447 | " )\n", 448 | " if job[\"completed_at\"]\n", 449 | " else \"N/A\"\n", 450 | " )\n", 451 | "\n", 452 | " job_data.append(\n", 453 | " {\n", 454 | " \"Job ID\": job[\"job_id\"],\n", 455 | " \"Status\": job[\"status\"],\n", 456 | " \"Filename\": job[\"filename\"],\n", 457 | " \"Created At\": created_at,\n", 458 | " \"Completed At\": completed_at,\n", 459 | " }\n", 460 | " )\n", 461 | "\n", 462 | " df = pd.DataFrame(job_data)\n", 463 | " display(df)\n", 464 | " except Exception as e:\n", 465 | " print(f\"❌ 작업 목록 조회 중 오류 발생: {e}\")" 466 | ] 467 | }, 468 | { 469 | "cell_type": "code", 470 | "execution_count": 9, 471 | "metadata": {}, 472 | "outputs": [], 473 | "source": [ 474 | "import pickle\n", 475 | "\n", 476 | "\n", 477 | "def load_documents_from_pkl(filepath):\n", 478 | " \"\"\"\n", 479 | " Pickle 파일에서 Langchain Document 리스트를 불러오는 함수\n", 480 | "\n", 481 | " Args:\n", 482 | " filepath: 원본 파일 경로 (예: path/to/filename.pdf)\n", 483 | " Returns:\n", 484 | " Langchain Document 객체 리스트\n", 485 | " \"\"\"\n", 486 | " # 확장자 제거하고 절대 경로로 변환\n", 487 | " abs_path = os.path.abspath(filepath)\n", 488 | " base_path = os.path.splitext(abs_path)[0]\n", 489 | " pkl_path = f\"{base_path}.pkl\"\n", 490 | "\n", 491 | " with open(pkl_path, \"rb\") as f:\n", 492 | " documents = pickle.load(f)\n", 493 | " return documents" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": 10, 499 | "metadata": {}, 500 | "outputs": [ 501 | { 502 | "name": "stdout", 503 | "output_type": "stream", 504 | "text": [ 505 | "📄 parsing_outputs/e6e8e9b5-3e65-4298-ad17-cad6c03829f9/e6e8e9b5-3e65-4298-ad17-cad6c03829f9/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pkl 파일 로드 중...\n", 506 | "✅ 총 25개의 문서가 로드되었습니다.\n" 507 | ] 508 | } 509 | ], 510 | "source": [ 511 | "import glob\n", 512 | "from pathlib import Path\n", 513 | "\n", 514 | "# extract_path 디렉토리에서 모든 .pkl 파일 찾기\n", 515 | "pkl_files = glob.glob(str(Path(extract_path) / \"*\" / \"*.pkl\"))\n", 516 | "\n", 517 | "if not pkl_files:\n", 518 | " print(\"❌ extract_path에서 .pkl 파일을 찾을 수 없습니다.\")\n", 519 | "else:\n", 520 | " # 모든 .pkl 파일에서 문서 로드\n", 521 | " all_documents = []\n", 522 | " for pkl_file in pkl_files:\n", 523 | " print(f\"📄 {pkl_file} 파일 로드 중...\") # 한국어 코멘트\n", 524 | " documents = load_documents_from_pkl(pkl_file)\n", 525 | " all_documents.extend(documents)\n", 526 | "\n", 527 | " print(f\"✅ 총 {len(all_documents)}개의 문서가 로드되었습니다.\")" 528 | ] 529 | }, 530 | { 531 | "cell_type": "code", 532 | "execution_count": 11, 533 | "metadata": {}, 534 | "outputs": [ 535 | { 536 | "data": { 537 | "text/plain": [ 538 | "[Document(metadata={'page': 0, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='# REAC T: SYNERGIZING REASONING AND ACTING IN\\nLANGUAGE MODELS\\nShunyu Yao∗*,1, Jeffrey Zhao2, Dian Yu2, Nan Du2, Izhak Shafran2, Karthik Narasimhan1, Yuan Cao2\\n1Department of Computer Science, Princeton University\\n2Google Research, Brain team\\n1{shunyuy,karthikn}@princeton.edu\\n2{jeffreyzhao,dianyu,dunan,izhak,yuancao}@google.com\\nABSTRACT\\nWhile large language models (LLMs) have demonstrated impressive performance\\nacross tasks in language understanding and interactive decision making, their\\nabilities for reasoning (e.g. chain-of-thought prompting) and acting (e.g. action\\nplan generation) have primarily been studied as separate topics. In this paper, we\\nexplore the use of LLMs to generate both reasoning traces and task-specific actions\\nin an interleaved manner, allowing for greater synergy between the two: reasoning\\ntraces help the model induce, track, and update action plans as well as handle\\nexceptions, while actions allow it to interface with and gather additional information'),\n", 539 | " Document(metadata={'page': 0, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='from external sources such as knowledge bases or environments. We apply our\\napproach, named ReAct, to a diverse set of language and decision making tasks\\nand demonstrate its effectiveness over state-of-the-art baselines in addition to\\nimproved human interpretability and trustworthiness. Concretely, on question\\nanswering (HotpotQA) and fact verification (Fever), ReAct overcomes prevalent\\nissues of hallucination and error propagation in chain-of-thought reasoning by\\ninteracting with a simple Wikipedia API, and generating human-like task-solving\\ntrajectories that are more interpretable than baselines without reasoning traces.\\nFurthermore, on two interactive decision making benchmarks (ALFWorld and\\nWebShop), ReAct outperforms imitation and reinforcement learning methods by\\nan absolute success rate of 34% and 10% respectively, while being prompted with\\nonly one or two in-context examples.\\n# 1 INTRODUCTION'),\n", 540 | " Document(metadata={'page': 0, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='A unique feature of human intelligence is the ability to seamlessly combine task-oriented actions with\\nverbal reasoning (or inner speech, Alderson-Day & Fernyhough, 2015), which has been theorized to\\nplay an important role in human cognition for enabling self-regulation or strategization (Vygotsky,\\n1987; Luria, 1965; Fernyhough, 2010) and maintaining a working memory (Baddeley, 1992). Con-\\nsider the example of cooking up a dish in the kitchen. Between any two specific actions, we may\\nreason in language in order to track progress (“now that everything is cut, I should heat up the pot of\\nwater”), to handle exceptions or adjust the plan according to the situation (“I don’t have salt, so let\\nme use soy sauce and pepper instead”), and to realize when external information is needed (“how do\\nI prepare dough? Let me search on the Internet”). We may also act (open a cookbook to read the\\nrecipe, open the fridge, check ingredients) to support the reasoning and to answer questions (“What'),\n", 541 | " Document(metadata={'page': 0, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='dish can I make right now?”). This tight synergy between “acting” and “reasoning” allows humans\\nto learn new tasks quickly and perform robust decision making or reasoning, even under previously\\nunseen circumstances or facing information uncertainties.\\nRecent results have hinted at the possibility of combining verbal reasoning with interactive decision\\nmaking in autonomous systems. On one hand, properly prompted large language models (LLMs)\\nhave demonstrated emergent capabilities to carry out several steps of reasoning traces to derive'),\n", 542 | " Document(metadata={'page': 1, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='Figure 1: (1) Comparison of 4 prompting methods, (a) Standard, (b) Chain-of-thought (CoT,\\nReason Only), (c) Act-only, and (d) ReAct (Reason+Act), solving a HotpotQA (Yang et al., 2018)\\nquestion. (2) Comparison of (a) Act-only and (b) ReAct prompting to solve an AlfWorld (Shridhar\\net al., 2020b) game. In both domains, we omit in-context examples in the prompt, and only show task\\nsolving trajectories generated by the model (Act, Thought) and the environment (Obs).\\nanswers from questions in arithmetic, commonsense, and symbolic reasoning tasks (Wei et al.,\\n2022). However, this “chain-of-thought” reasoning is a static black box, in that the model uses\\nits own internal representations to generate thoughts and is not grounded in the external world,\\nwhich limits its ability to reason reactively or update its knowledge. This can lead to issues like fact\\nhallucination and error propagation over the reasoning process (Figure 1 (1b)). On the other hand,'),\n", 543 | " Document(metadata={'page': 1, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='recent work has explored the use of pre-trained language models for planning and acting in interactive\\nenvironments (Ahn et al., 2022; Nakano et al., 2021; Yao et al., 2020; Huang et al., 2022a), with\\na focus on predicting actions via language priors. These approaches usually convert multi-modal\\nobservations into text, use a language model to generate domain-specific actions or plans, and then\\nuse a controller to choose or execute them. However, they do not employ language models to reason\\nabstractly about high-level goals or maintain a working memory to support acting, barring Huang\\net al. (2022b) who perform a limited form of verbal reasoning to reiterate spatial facts about the\\ncurrent state. Beyond such simple embodied tasks to interact with a few blocks, there have not been\\nstudies on how reasoning and acting can be combined in a synergistic manner for general task solving,\\nand if such a combination can bring systematic benefits compared to reasoning or acting alone.'),\n", 544 | " Document(metadata={'page': 1, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='In this work, we present ReAct, a general paradigm to combine reasoning and acting with language\\nmodels for solving diverse language reasoning and decision making tasks (Figure 1). ReAct\\nprompts LLMs to generate both verbal reasoning traces and actions pertaining to a task in an\\ninterleaved manner, which allows the model to perform dynamic reasoning to create, maintain, and\\nadjust high-level plans for acting (reason to act), while also interact with the external environments\\n(e.g. Wikipedia) to incorporate additional information into reasoning (act to reason).'),\n", 545 | " Document(metadata={'image': '![](file:////app/uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3_Page_1_Index_13.png)', 'entity': '\\n\\nComparative Analysis of Prompting Methods in AI Reasoning\\n\\n
\\nThe image illustrates a comparison of four prompting methods used in AI reasoning tasks: Standard, Chain-of-Thought (CoT), Act-only, and ReAct (Reason + Act). It highlights how these methods approach problem-solving in different contexts, such as HotpotQA and AlfWorld, showcasing the effectiveness of the ReAct method in integrating reasoning and action.\\n
\\n\\nHotpotQA, AlfWorld, Apple Remote, Front Row software, AI prompting methods\\n\\n\\n- How might the integration of reasoning and acting improve AI performance in complex tasks?\\n- What challenges could arise from using the ReAct method in real-world applications?\\n\\n', 'page': 1, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='![](file:////app/uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3_Page_1_Index_13.png)\\n\\n\\n\\nComparative Analysis of Prompting Methods in AI Reasoning\\n\\n
\\nThe image illustrates a comparison of four prompting methods used in AI reasoning tasks: Standard, Chain-of-Thought (CoT), Act-only, and ReAct (Reason + Act). It highlights how these methods approach problem-solving in different contexts, such as HotpotQA and AlfWorld, showcasing the effectiveness of the ReAct method in integrating reasoning and action.\\n
\\n\\nHotpotQA, AlfWorld, Apple Remote, Front Row software, AI prompting methods\\n\\n\\n- How might the integration of reasoning and acting improve AI performance in complex tasks?\\n- What challenges could arise from using the ReAct method in real-world applications?\\n\\n'),\n", 546 | " Document(metadata={'page': 2, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='We conduct empirical evaluations of ReAct and state-of-the-art baselines on four diverse benchmarks:\\nquestion answering (HotPotQA, Yang et al., 2018), fact verification (Fever, Thorne et al., 2018),\\ntext-based game (ALFWorld, Shridhar et al., 2020b), and webpage navigation (WebShop, Yao\\net al., 2022). For HotPotQA and Fever, with access to a Wikipedia API that the model can interact\\nwith, ReAct outperforms vanilla action generation models while being competitive with chain-of-\\nthought reasoning (CoT) (Wei et al., 2022). The best approach overall is a combination of ReAct\\nand CoT that allows for the use of both internal knowledge and externally obtained information\\nduring reasoning. On ALFWorld and WebShop, two or even one-shot ReAct prompting is able\\nto outperform imitation or reinforcement learning methods trained with 103 ∼ 105 task instances,\\nwith an absolute improvement of 34% and 10% in success rates respectively. We also demonstrate'),\n", 547 | " Document(metadata={'page': 2, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='the importance of sparse, versatile reasoning in decision making by showing consistent advantages\\nover controlled baselines with actions only. Besides general applicability and performance boost,\\nthe combination of reasoning and acting also contributes to model interpretability, trustworthiness,\\nand diagnosability across all domains, as humans can readily distinguish information from model’s\\ninternal knowledge versus external environments, as well as inspect reasoning traces to understand\\nthe decision basis of model actions.\\nTo summarize, our key contributions are the following: (1) we introduce ReAct, a novel prompt-\\nbased paradigm to synergize reasoning and acting in language models for general task solving; (2) we\\nperform extensive experiments across diverse benchmarks to showcase the advantage of ReAct in a\\nfew-shot learning setup over prior approaches that perform either reasoning or action generation in'),\n", 548 | " Document(metadata={'page': 2, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='isolation; (3) we present systematic ablations and analysis to understand the importance of acting in\\nreasoning tasks, and reasoning in interactive tasks; (4) we analyze the limitations of ReAct under the\\nprompting setup (i.e. limited support of reasoning and acting behaviors), and perform initial finetuning\\nexperiments showing the potential of ReAct to improve with additional training data. Scaling up\\nReAct to train and operate on more tasks and combining it with complementary paradigms like\\nreinforcement learning could further unlock the potential of large language models.\\n2 REAC T: SYNERGIZING RE ASONING + AC T ING\\nConsider a general setup of an agent interacting with an environment for task solving. At time\\nstep t, an agent receives an observation ot ∈ O from the environment and takes an action at ∈ A\\nfollowing some policy π(at|ct), where ct = (o1, a1, · · · , ot−1, at−1, ot) is the context to the agent.'),\n", 549 | " Document(metadata={'page': 2, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='Learning a policy is challenging when the mapping ct (cid:55)→ at is highly implicit and requires extensive\\ncomputation. For example, the agent shown in Figure 1(1c) is unable to generate the correct final\\naction (Act 4) to finish the QA task as it requires complex reasoning over the trajectory context\\n(Question, Act 1-3, Obs 1-3). Similarly, the agent shown in Figure 1(2a) fails to comprehend from the\\ncontext that sinkbasin 1 does not contain peppershaker 1, thus keep producing hallucinating actions.\\nˆ = A ∪ L, where L is the\\nThe idea of ReAct is simple: we augment the agent’s action space to A\\nspace of language. An action ˆat ∈ L in the language space, which we will refer to as a thought or a\\nreasoning trace, does not affect the external environment, thus leading to no observation feedback.\\nInstead, a thought ˆat aims to compose useful information by reasoning over the current context ct,'),\n", 550 | " Document(metadata={'page': 2, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='and update the context ct+1 = (ct, ˆat) to support future reasoning or acting. As shown in Figure 1,\\nthere could be various types of useful thoughts, e.g. decomposing task goals and create action plans\\n(2b, Act 1; 1d, Thought 1), injecting commonsense knowledge relevant to task solving (2b, Act 1),\\nextracting important parts from observations (1d, Thought2, 4), track progress and transit action plans\\n(2b, Act 8), handle exceptions and adjust action plans (1d, Thought 3), and so on.\\nHowever, as the language space L is unlimited, learning in this augmented action space is difficult\\nand requires strong language priors. In this paper, we mainly focus on the setup where a frozen\\nlarge language model, PaLM-540B (Chowdhery et al., 2022)1, is prompted with few-shot in-context\\nexamples to generate both domain-specific actions and free-form language thoughts for task solving\\n(Figure 1 (1d), (2b)). Each in-context example is a human trajectory of actions, thoughts, and'),\n", 551 | " Document(metadata={'page': 2, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='environment observations to solve a task instance (see Appendix C). For the tasks where reasoning is\\nof primary importance (Figure 1(1)), we alternate the generation of thoughts and actions so that the\\ntask-solving trajectory consists of multiple thought-action-observation steps. In contrast, for decision\\nmaking tasks that potentially involve a large number of actions (Figure 1(2)), thoughts only need to'),\n", 552 | " Document(metadata={'page': 3, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='appear sparsely in the most relevant positions of a trajectory, so we let the language model decide the\\nasynchronous occurrence of thoughts and actions for itself.\\nSince decision making and reasoning capabilities are integrated into a large language model, ReAct\\nenjoys several unique features: A) Intuitive and easy to design: Designing ReAct prompts is\\nstraightforward as human annotators just type down their thoughts in language on top of their actions\\ntaken. No ad-hoc format choice, thought design, or example selection is used in this paper. We detail\\nprompt design for each task in Sections 3 and 4. B) General and flexible: Due to the flexible thought\\nspace and thought-action occurrence format, ReAct works for diverse tasks with distinct action\\nspaces and reasoning needs, including but not limited to QA, fact verification, text game, and web\\nnavigation. C) Performant and robust: ReAct shows strong generalization to new task instances'),\n", 553 | " Document(metadata={'page': 3, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='while learning solely from one to six in-context examples, consistently outperforming baselines with\\nonly reasoning or acting across different domains. We also show in Section 3 additional benefits\\nwhen finetuning is enabled, and in Section 4 how ReAct performance is robust to prompt selections.\\nD) Human aligned and controllable: ReAct promises an interpretable sequential decision making\\nand reasoning process where humans can easily inspect reasoning and factual correctness. Moreover,\\nhumans can also control or correct the agent behavior on the go by thought editing, as shown in\\nFigure 5 in Section 4.\\n# 3 KNOWLEDGE-INTENSIVE REASONING TASKS\\nWe begin with knowledge-intensive reasoning tasks like multi-hop question answering and fact\\nverification. As shown in Figure 1(1d), by interacting with a Wikipedia API, ReAct is able to\\nretrieve information to support reasoning, while also use reasoning to target what to retrieve next,\\ndemonstrating a synergy of reasoning and acting.\\n3.1 SETUP'),\n", 554 | " Document(metadata={'page': 3, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='Domains We consider two datasets challenging knowledge retrieval and reasoning: (1) Hot-\\nPotQA (Yang et al., 2018), a multi-hop question answering benchmark that requires reasoning\\nover two or more Wikipedia passages, and (2) FEVER (Thorne et al., 2018), a fact verification\\nbenchmark where each claim is annotated SUPPORTS, REFUTES, or NOT ENOUGH INFO, based\\non if there exists a Wikipedia passage to verify the claim. In this work, we operate in a question-only\\nsetup for both tasks, where models only receive the question/claim as input without access to support\\nparagraphs, and have to rely on their internal knowledge or retrieve knowledge via interacting with\\nan external environment to support reasoning.\\nAction Space We design a simple Wikipedia web API with three types of actions to support\\ninteractive information retrieval: (1) search[entity], which returns the first 5 sentences from\\nthe corresponding entity wiki page if it exists, or else suggests top-5 similar entities from the'),\n", 555 | " Document(metadata={'page': 3, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='Wikipedia search engine, (2) lookup[string], which would return the next sentence in the page\\ncontaining string, simulating Ctrl+F functionality on the browser. (3) finish[answer], which\\nwould finish the current task with answer. We note that this action space mostly can only retrieve a\\nsmall part of a passage based on exact passage name, which is significantly weaker than state-of-the-\\nart lexical or neural retrievers. The purpose is to simulate how humans would interact with Wikipedia,\\nand force models to retrieve via explicit reasoning in language.\\n3.2 METHODS\\nReAct Prompting For HotpotQA and Fever, we randomly select 6 and 3 cases2 from the training\\nset and manually compose ReAct-format trajectories to use as few-shot exemplars in the prompts.\\nSimilar to Figure 1(d), each trajectory consists of multiple thought-action-observation steps (i.e. dense\\nthought), where free-form thoughts are used for various purposes. Specifically, we use a combination'),\n", 556 | " Document(metadata={'page': 3, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='of thoughts that decompose questions (“I need to search x, find y, then find z”), extract information\\nfrom Wikipedia observations (“x was started in 1844”, “The paragraph does not tell x”), perform\\ncommonsense (“x is not y, so z must instead be...”) or arithmetic reasoning (“1844 < 1989”), guide'),\n", 557 | " Document(metadata={'page': 4, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='Table 1: PaLM-540B prompting results on\\nHotpotQA and Fever.\\nFigure 2: PaLM-540B prompting results with respect to\\nnumber of CoT-SC samples used.\\nsearch reformulation (“maybe I can search/look up x instead”), and synthesize the final answer (“...so\\nthe answer is x”). See Appendix C for more details.\\nBaselines We systematically ablate ReAct trajectories to build prompts for multiple baselines (with\\nformats as Figure 1(1a-1c)): (a) Standard prompting (Standard), which removes all thoughts,\\nactions, observations in ReAct trajectories. (b) Chain-of-thought prompting (CoT) (Wei et al.,\\n2022), which removes actions and observations and serve as a reasoning-only baseline. We also\\nbuild a self-consistency baseline (CoT-SC) (Wang et al., 2022a;b) by sampling 21 CoT trajectories\\nwith decoding temperature 0.7 during inference and adopting the majority answer, which is found to\\nconsistently boost performance over CoT. (c) Acting-only prompt (Act), which removes thoughts'),\n", 558 | " Document(metadata={'page': 4, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='in ReAct trajectories, loosely resembling how WebGPT (Nakano et al., 2021) interacts with the\\nInternet to answer questions, though it operates on a different task and action space, and uses imitation\\nand reinforcement learning instead of prompting.\\nCombining Internal and External Knowledge As will be detail in Section 3.3, we observe that\\nthe problem solving process demonstrated by ReAct is more factual and grounded, whereas CoT\\nis more accurate in formulating reasoning structure but can easily suffer from hallucinated facts\\nor thoughts. We therefore propose to incorporate ReAct and CoT-SC, and let the model decide\\nwhen to switch to the other method based on the following heuristics: A) ReAct → CoT-SC: when\\nReAct fails to return an answer within given steps, back off to CoT-SC. We set 7 and 5 steps for\\nHotpotQA and FEVER respectively as we find more steps will not improve ReAct performance3.\\nB) CoT-SC → ReAct: when the majority answer among n CoT-SC samples occurs less than n/2'),\n", 559 | " Document(metadata={'page': 4, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='times (i.e. internal knowledge might not support the task confidently), back off to ReAct.\\nFinetuning Due to the challenge of manually annotating reasoning traces and actions at scale,\\nwe consider a bootstraping approach similar to Zelikman et al. (2022), using 3,000 trajectories\\nwith correct answers generated by ReAct (also for other baselines) to finetune smaller language\\nmodels (PaLM-8/62B) to decode trajectories (all thoughts, actions, observations) conditioned on\\ninput questions/claims. More details are in Appendix B.1.\\n3.3 RESULTS AND OBSERVATIONS\\nReAct outperforms Act consistently Table 1 shows HotpotQA and Fever results using PaLM-\\n540B as the base model with different prompting methods. We note that ReAct is better than Act\\non both tasks, demonstrating the value of reasoning to guide acting, especially for synthesizing the\\nfinal answer, as shown in Figure 1 (1c-d). Fine-tuning results 3 also confirm the benefit of reasoning\\ntraces for more informed acting.'),\n", 560 | " Document(metadata={'image': '![](file:////app/uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3_Page_4_Index_41.png)', 'entity': '\\n\\nPerformance Comparison of Prompting Methods in AI Models\\n\\n
\\nThe image presents two graphs illustrating the performance of various prompting methods (CoT-SC, ReAct, and CoT) on the HotpotQA and Fever datasets. The left graph shows the accuracy of HotpotQA as the number of CoT-SC trials increases, while the right graph displays the accuracy for the Fever dataset. The lines indicate the performance trends of each method, highlighting the effectiveness of ReAct compared to others.\\n
\\n\\nHotpotQA, Fever, CoT-SC, ReAct, CoT, accuracy metrics\\n\\n\\n- What factors contribute to the superior performance of the ReAct method over others?\\n- How might the results differ if the number of trials were increased beyond 20?\\n\\n', 'page': 4, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='![](file:////app/uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3_Page_4_Index_41.png)\\n\\n\\n\\nPerformance Comparison of Prompting Methods in AI Models\\n\\n
\\nThe image presents two graphs illustrating the performance of various prompting methods (CoT-SC, ReAct, and CoT) on the HotpotQA and Fever datasets. The left graph shows the accuracy of HotpotQA as the number of CoT-SC trials increases, while the right graph displays the accuracy for the Fever dataset. The lines indicate the performance trends of each method, highlighting the effectiveness of ReAct compared to others.\\n
\\n\\nHotpotQA, Fever, CoT-SC, ReAct, CoT, accuracy metrics\\n\\n\\n- What factors contribute to the superior performance of the ReAct method over others?\\n- How might the results differ if the number of trials were increased beyond 20?\\n\\n'),\n", 561 | " Document(metadata={'table': '| Prompt Methoda | HotpotQA (EM) | Fever (Acc) |\\n| --- | --- | --- |\\n| Standard | 28.7 | 57.1 |\\n| CoT (Wei et al., 2022) | 29.4 | 56.3 |\\n| CoT-SC (Wang et al., 2022a) | 33.4 | 60.4 |\\n| Act | 25.7 | 58.9 |\\n| ReAct | 27.4 | 60.9 |\\n| CoT-SC → ReAct | 34.2 | 64.6 |\\n| ReAct→ CoT-SC | 35.1 | 62.0 |\\n| Supervised SoTAb | 67.5 | 89.5 |\\n\\n', 'entity': '', 'page': 4, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='| Prompt Methoda | HotpotQA (EM) | Fever (Acc) |\\n| --- | --- | --- |\\n| Standard | 28.7 | 57.1 |\\n| CoT (Wei et al., 2022) | 29.4 | 56.3 |\\n| CoT-SC (Wang et al., 2022a) | 33.4 | 60.4 |\\n| Act | 25.7 | 58.9 |\\n| ReAct | 27.4 | 60.9 |\\n| CoT-SC → ReAct | 34.2 | 64.6 |\\n| ReAct→ CoT-SC | 35.1 | 62.0 |\\n| Supervised SoTAb | 67.5 | 89.5 |\\n\\n\\n\\n'),\n", 562 | " Document(metadata={'table': '| Prompt Methoda | HotpotQA (EM) | Fever (Acc) |\\n| --- | --- | --- |\\n| Standard | 28.7 | 57.1 |\\n| CoT (Wei et al., 2022) | 29.4 | 56.3 |\\n| CoT-SC (Wang et al., 2022a) | 33.4 | 60.4 |\\n| Act | 25.7 | 58.9 |\\n| ReAct | 27.4 | 60.9 |\\n| CoT-SC → ReAct | 34.2 | 64.6 |\\n| ReAct→ CoT-SC | 35.1 | 62.0 |\\n| Supervised SoTAb | 67.5 | 89.5 |\\n\\n', 'entity': '', 'page': 4, 'source': 'uploads/e6e8e9b5-3e65-4298-ad17-cad6c03829f9_2210.03629v3.pdf'}, page_content='')]" 563 | ] 564 | }, 565 | "execution_count": 11, 566 | "metadata": {}, 567 | "output_type": "execute_result" 568 | } 569 | ], 570 | "source": [ 571 | "all_documents" 572 | ] 573 | } 574 | ], 575 | "metadata": { 576 | "kernelspec": { 577 | "display_name": ".venv", 578 | "language": "python", 579 | "name": "python3" 580 | }, 581 | "language_info": { 582 | "codemirror_mode": { 583 | "name": "ipython", 584 | "version": 3 585 | }, 586 | "file_extension": ".py", 587 | "mimetype": "text/x-python", 588 | "name": "python", 589 | "nbconvert_exporter": "python", 590 | "pygments_lexer": "ipython3", 591 | "version": "3.11.11" 592 | } 593 | }, 594 | "nbformat": 4, 595 | "nbformat_minor": 4 596 | } 597 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=42", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "teddynote-parser-client" 7 | version = "0.0.1" 8 | description = "TeddyNote Parser API 클라이언트 라이브러리" 9 | readme = "README.md" 10 | authors = [ 11 | {name = "teddylee777", email = "teddylee777@gmail.com"}, 12 | ] 13 | license = {text = "MIT"} 14 | classifiers = [ 15 | "Programming Language :: Python :: 3", 16 | "Programming Language :: Python :: 3.7", 17 | "Programming Language :: Python :: 3.8", 18 | "Programming Language :: Python :: 3.9", 19 | "Programming Language :: Python :: 3.10", 20 | "License :: OSI Approved :: MIT License", 21 | "Operating System :: OS Independent", 22 | "Topic :: Software Development :: Libraries :: Python Modules", 23 | ] 24 | keywords = ["pdf", "parser", "api", "client", "teddynote"] 25 | dependencies = [ 26 | "jupyter>=1.1.1", 27 | "langchain>=0.0.27", 28 | "notebook>=6.5.7", 29 | "pandas>=1.1.5", 30 | "python-dotenv>=0.21.1", 31 | "requests>=2.25.0", 32 | ] 33 | requires-python = ">=3.7" 34 | 35 | [project.optional-dependencies] 36 | dev = [ 37 | "pytest>=7.0.0", 38 | "black>=22.1.0", 39 | "isort>=5.10.1", 40 | "mypy>=0.942", 41 | "ruff>=0.0.186", 42 | "build>=0.8.0", 43 | "twine>=4.0.0", 44 | ] 45 | 46 | [project.urls] 47 | "Homepage" = "https://github.com/teddylee777/teddynote-parser-api-client" 48 | "Bug Tracker" = "https://github.com/teddylee777/teddynote-parser-api-client/issues" 49 | 50 | [project.scripts] 51 | teddynote-parser = "teddynote_parser_client.cli:main" 52 | 53 | [tool.setuptools] 54 | packages = ["teddynote_parser_client"] 55 | 56 | [tool.isort] 57 | profile = "black" 58 | line_length = 88 59 | 60 | [tool.black] 61 | line-length = 88 62 | target-version = ["py37", "py38", "py39", "py310"] 63 | 64 | [tool.ruff] 65 | line-length = 88 66 | select = ["E", "F", "W", "I", "N", "UP", "ANN", "BLE", "C4", "SIM", "TID"] 67 | ignore = ["ANN101", "ANN102"] 68 | target-version = "py37" 69 | exclude = [ 70 | ".git", 71 | ".venv", 72 | "venv", 73 | "build", 74 | "dist", 75 | ] 76 | 77 | [tool.mypy] 78 | python_version = "3.7" 79 | warn_return_any = true 80 | warn_unused_configs = true 81 | disallow_untyped_defs = true 82 | disallow_incomplete_defs = true 83 | check_untyped_defs = true 84 | disallow_untyped_decorators = true 85 | no_implicit_optional = true 86 | strict_optional = true 87 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Core dependencies 2 | jupyter>=1.1.1 3 | langchain>=0.0.27 4 | notebook>=6.5.7 5 | pandas>=1.1.5 6 | python-dotenv>=0.21.1 7 | requests>=2.25.0 8 | 9 | # Development dependencies 10 | pytest>=7.0.0 11 | black>=22.1.0 12 | isort>=5.10.1 13 | mypy>=0.942 14 | ruff>=0.0.186 15 | build>=0.8.0 16 | twine>=4.0.0 17 | -------------------------------------------------------------------------------- /scripts/publish.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 스크립트가 있는 디렉토리의 상위 디렉토리로 이동 (프로젝트 루트) 4 | cd "$(dirname "$0")/.." 5 | 6 | # 필요한 패키지 설치 7 | echo "필요한 패키지 설치 중..." 8 | pip install --upgrade pip build twine 9 | 10 | # 기존 빌드 제거 11 | echo "기존 빌드 제거 중..." 12 | rm -rf build/ dist/ *.egg-info/ 13 | 14 | # 패키지 빌드 15 | echo "패키지 빌드 중..." 16 | python -m build 17 | 18 | # 패키지 내용 확인 19 | echo "빌드된 패키지 내용 확인 중..." 20 | twine check dist/* 21 | 22 | # 테스트 PyPI 업로드 (선택 사항) 23 | read -p "테스트 PyPI에 먼저 업로드하시겠습니까? (y/n): " test_upload 24 | if [ "$test_upload" = "y" ]; then 25 | echo "테스트 PyPI에 업로드 중..." 26 | twine upload --repository-url https://test.pypi.org/legacy/ dist/* 27 | echo "테스트 PyPI에 업로드 완료!" 28 | echo "테스트 PyPI에서 패키지를 설치하려면: pip install --index-url https://test.pypi.org/simple/ teddynote-parser-client" 29 | fi 30 | 31 | # 실제 PyPI 업로드 32 | read -p "실제 PyPI에 업로드하시겠습니까? (y/n): " live_upload 33 | if [ "$live_upload" = "y" ]; then 34 | echo "PyPI에 업로드 중..." 35 | twine upload dist/* 36 | echo "PyPI에 업로드 완료!" 37 | echo "PyPI에서 패키지를 설치하려면: pip install teddynote-parser-client" 38 | fi 39 | 40 | echo "완료!" -------------------------------------------------------------------------------- /teddynote_parser_client/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | TeddyNote Parser Client Package 3 | 4 | 이 패키지는 TeddyNote Parser API와 상호작용하기 위한 클라이언트 라이브러리를 제공합니다. 5 | """ 6 | 7 | from teddynote_parser_client.client import TeddyNoteParserClient 8 | 9 | __version__ = "0.0.1" 10 | __author__ = "teddylee777" 11 | -------------------------------------------------------------------------------- /teddynote_parser_client/__main__.py: -------------------------------------------------------------------------------- 1 | """ 2 | TeddyNote Parser Client 패키지 실행 모듈 3 | 4 | 이 모듈은 teddynote_parser_client 패키지를 직접 실행할 수 있게 합니다. 5 | 예: python -m teddynote_parser_client 6 | """ 7 | 8 | from teddynote_parser_client.cli import main 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /teddynote_parser_client/cli.py: -------------------------------------------------------------------------------- 1 | """ 2 | TeddyNote Parser 클라이언트 명령줄 인터페이스 (CLI) 3 | 4 | 이 모듈은 TeddyNote Parser API 클라이언트를 명령줄에서 사용할 수 있는 인터페이스를 제공합니다. 5 | """ 6 | 7 | import os 8 | import sys 9 | import argparse 10 | import logging 11 | from pathlib import Path 12 | from typing import Optional, List, Dict, Any 13 | 14 | from teddynote_parser_client.client import TeddyNoteParserClient 15 | 16 | 17 | def setup_logger() -> logging.Logger: 18 | """로거 설정""" 19 | logger = logging.getLogger("teddynote_parser") 20 | logger.setLevel(logging.INFO) 21 | 22 | # 콘솔 핸들러 추가 23 | handler = logging.StreamHandler() 24 | formatter = logging.Formatter( 25 | "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 26 | ) 27 | handler.setFormatter(formatter) 28 | logger.addHandler(handler) 29 | 30 | return logger 31 | 32 | 33 | def health_check_command( 34 | client: TeddyNoteParserClient, args: argparse.Namespace 35 | ) -> None: 36 | """API 서버 건강 상태 확인 명령 처리""" 37 | try: 38 | result = client.health_check() 39 | print(f"API 서버 상태: {result}") 40 | except Exception as e: 41 | print(f"오류 발생: {e}") 42 | sys.exit(1) 43 | 44 | 45 | def parse_pdf_command(client: TeddyNoteParserClient, args: argparse.Namespace) -> None: 46 | """PDF 파일 파싱 명령 처리""" 47 | try: 48 | # PDF 파일 파싱 요청 49 | result = client.parse_pdf( 50 | pdf_path=args.pdf_path, 51 | language=args.language, 52 | include_image=args.include_image, 53 | batch_size=args.batch_size, 54 | test_page=args.test_page, 55 | ) 56 | job_id = result["job_id"] 57 | print(f"파싱 작업 시작됨. 작업 ID: {job_id}") 58 | 59 | # 작업 완료 대기 (요청 시) 60 | if args.wait: 61 | print("작업 완료 대기 중...") 62 | job_status = client.wait_for_job_completion( 63 | job_id, 64 | check_interval=args.check_interval, 65 | max_attempts=args.max_attempts, 66 | ) 67 | print(f"작업 완료. 상태: {job_status['status']}") 68 | 69 | # 결과 다운로드 (요청 시) 70 | if args.download: 71 | save_dir = args.save_dir or "parser_results" 72 | zip_path, extract_path = client.download_result( 73 | job_id, 74 | save_dir=save_dir, 75 | extract=args.extract, 76 | overwrite=args.overwrite, 77 | ) 78 | print(f"결과 다운로드 완료: {zip_path}") 79 | if extract_path: 80 | print(f"압축 해제 디렉토리: {extract_path}") 81 | except Exception as e: 82 | print(f"오류 발생: {e}") 83 | sys.exit(1) 84 | 85 | 86 | def status_command(client: TeddyNoteParserClient, args: argparse.Namespace) -> None: 87 | """작업 상태 확인 명령 처리""" 88 | try: 89 | job_status = client.get_job_status(args.job_id) 90 | print(f"작업 ID '{args.job_id}'의 상태: {job_status}") 91 | except Exception as e: 92 | print(f"오류 발생: {e}") 93 | sys.exit(1) 94 | 95 | 96 | def download_command(client: TeddyNoteParserClient, args: argparse.Namespace) -> None: 97 | """결과 다운로드 명령 처리""" 98 | try: 99 | save_dir = args.save_dir or "parser_results" 100 | zip_path, extract_path = client.download_result( 101 | args.job_id, 102 | save_dir=save_dir, 103 | extract=args.extract, 104 | overwrite=args.overwrite, 105 | ) 106 | print(f"결과 다운로드 완료: {zip_path}") 107 | if extract_path: 108 | print(f"압축 해제 디렉토리: {extract_path}") 109 | except Exception as e: 110 | print(f"오류 발생: {e}") 111 | sys.exit(1) 112 | 113 | 114 | def list_jobs_command(client: TeddyNoteParserClient, args: argparse.Namespace) -> None: 115 | """작업 목록 조회 명령 처리""" 116 | try: 117 | jobs = client.list_all_jobs() 118 | print(f"총 {len(jobs)}개의 작업이 있습니다:") 119 | for job in jobs: 120 | print( 121 | f"작업 ID: {job.get('job_id', 'N/A')}, 상태: {job.get('status', 'N/A')}" 122 | ) 123 | except Exception as e: 124 | print(f"오류 발생: {e}") 125 | sys.exit(1) 126 | 127 | 128 | def main() -> None: 129 | """메인 함수""" 130 | parser = argparse.ArgumentParser( 131 | description="TeddyNote Parser API 클라이언트 명령줄 도구" 132 | ) 133 | 134 | # 공통 인자 135 | parser.add_argument( 136 | "--api-url", 137 | default=os.environ.get("TEDDYNOTE_API_URL", "http://localhost:9997"), 138 | help="API 서버 URL (기본값: 환경 변수 TEDDYNOTE_API_URL 또는 http://localhost:9997)", 139 | ) 140 | parser.add_argument( 141 | "--upstage-api-key", 142 | default=os.environ.get("UPSTAGE_API_KEY"), 143 | help="UPSTAGE API 키 (기본값: 환경 변수 UPSTAGE_API_KEY)", 144 | ) 145 | parser.add_argument( 146 | "--openai-api-key", 147 | default=os.environ.get("OPENAI_API_KEY"), 148 | help="OpenAI API 키 (기본값: 환경 변수 OPENAI_API_KEY)", 149 | ) 150 | parser.add_argument("--debug", action="store_true", help="디버그 모드 활성화") 151 | 152 | # 하위 명령어 설정 153 | subparsers = parser.add_subparsers(dest="command", help="사용할 명령") 154 | 155 | # 건강 상태 확인 명령 156 | health_parser = subparsers.add_parser("health", help="API 서버 건강 상태 확인") 157 | 158 | # PDF 파싱 명령 159 | parse_parser = subparsers.add_parser("parse", help="PDF 파일 파싱") 160 | parse_parser.add_argument("pdf_path", help="파싱할 PDF 파일 경로") 161 | parse_parser.add_argument( 162 | "--language", default="Korean", help="문서 언어 (기본값: Korean)" 163 | ) 164 | parse_parser.add_argument( 165 | "--include-image", 166 | action="store_true", 167 | default=True, 168 | help="파싱 결과에 이미지 포함 (기본값: True)", 169 | ) 170 | parse_parser.add_argument( 171 | "--batch-size", 172 | type=int, 173 | default=30, 174 | help="처리할 PDF 페이지의 배치 크기 (기본값: 30)", 175 | ) 176 | parse_parser.add_argument("--test-page", type=int, help="처리할 최대 페이지 수") 177 | parse_parser.add_argument("--wait", action="store_true", help="작업 완료까지 대기") 178 | parse_parser.add_argument( 179 | "--check-interval", type=int, default=2, help="상태 확인 간격(초) (기본값: 2초)" 180 | ) 181 | parse_parser.add_argument( 182 | "--max-attempts", type=int, default=60, help="최대 시도 횟수 (기본값: 60회)" 183 | ) 184 | parse_parser.add_argument( 185 | "--download", action="store_true", help="완료 후 결과 다운로드" 186 | ) 187 | parse_parser.add_argument( 188 | "--save-dir", help="결과를 저장할 디렉토리 (기본값: parser_results)" 189 | ) 190 | parse_parser.add_argument( 191 | "--extract", action="store_true", help="ZIP 파일 압축 해제" 192 | ) 193 | parse_parser.add_argument( 194 | "--overwrite", action="store_true", help="기존 파일 덮어쓰기" 195 | ) 196 | 197 | # 작업 상태 확인 명령 198 | status_parser = subparsers.add_parser("status", help="작업 상태 확인") 199 | status_parser.add_argument("job_id", help="확인할 작업 ID") 200 | 201 | # 결과 다운로드 명령 202 | download_parser = subparsers.add_parser("download", help="작업 결과 다운로드") 203 | download_parser.add_argument("job_id", help="다운로드할 작업 ID") 204 | download_parser.add_argument( 205 | "--save-dir", help="결과를 저장할 디렉토리 (기본값: parser_results)" 206 | ) 207 | download_parser.add_argument( 208 | "--extract", action="store_true", help="ZIP 파일 압축 해제" 209 | ) 210 | download_parser.add_argument( 211 | "--overwrite", action="store_true", help="기존 파일 덮어쓰기" 212 | ) 213 | 214 | # 작업 목록 조회 명령 215 | jobs_parser = subparsers.add_parser("jobs", help="모든 작업 목록 조회") 216 | 217 | # 인자 파싱 218 | args = parser.parse_args() 219 | 220 | # 명령어가 제공되지 않은 경우 도움말 표시 221 | if not args.command: 222 | parser.print_help() 223 | sys.exit(1) 224 | 225 | # 로깅 설정 226 | logger = setup_logger() 227 | if args.debug: 228 | logger.setLevel(logging.DEBUG) 229 | 230 | # 클라이언트 초기화 231 | client = TeddyNoteParserClient( 232 | api_url=args.api_url, 233 | upstage_api_key=args.upstage_api_key, 234 | openai_api_key=args.openai_api_key, 235 | logger=logger, 236 | ) 237 | 238 | # 명령어 처리 239 | commands = { 240 | "health": health_check_command, 241 | "parse": parse_pdf_command, 242 | "status": status_command, 243 | "download": download_command, 244 | "jobs": list_jobs_command, 245 | } 246 | 247 | commands[args.command](client, args) 248 | 249 | 250 | if __name__ == "__main__": 251 | main() 252 | -------------------------------------------------------------------------------- /teddynote_parser_client/client.py: -------------------------------------------------------------------------------- 1 | """ 2 | TeddyNote Parser API 클라이언트 모듈 3 | 4 | 이 모듈은 TeddyNote Parser API와 상호작용하기 위한 클라이언트 클래스를 정의합니다. 5 | """ 6 | 7 | import os 8 | import time 9 | import requests 10 | from pathlib import Path 11 | from typing import Dict, Any, Optional, List, Union, Tuple 12 | import logging 13 | import zipfile 14 | from datetime import datetime 15 | 16 | 17 | class TeddyNoteParserClient: 18 | """ 19 | TeddyNote Parser API 클라이언트 클래스 20 | 21 | 이 클래스는 TeddyNote Parser API와 상호작용하는 메소드를 제공합니다: 22 | - 헬스 체크 23 | - PDF 파일 파싱 요청 24 | - 작업 상태 확인 25 | - 결과 다운로드 26 | """ 27 | 28 | def __init__( 29 | self, 30 | api_url: str = "http://localhost:9997", 31 | upstage_api_key: Optional[str] = None, 32 | openai_api_key: Optional[str] = None, 33 | language: str = "Korean", 34 | include_image: bool = True, 35 | batch_size: int = 30, 36 | test_page: Optional[int] = None, 37 | timeout: int = 60, 38 | logger: Optional[logging.Logger] = None, 39 | ): 40 | """ 41 | TeddyNote Parser API 클라이언트 초기화 42 | 43 | Args: 44 | api_url: API 서버 URL (기본값: http://localhost:9997) 45 | upstage_api_key: UPSTAGE API 키 (환경 변수에서 가져올 수 있음) 46 | openai_api_key: OpenAI API 키 (환경 변수에서 가져올 수 있음) 47 | language: 문서 언어 (기본값: Korean) 48 | include_image: 파싱 결과에 이미지 포함 여부 (기본값: True) 49 | batch_size: 처리할 PDF 페이지의 배치 크기 (기본값: 30) 50 | test_page: 처리할 최대 페이지 수 (처음부터 지정한 페이지까지만 처리, 기본값: None - 모든 페이지 처리) 51 | timeout: API 요청 제한시간 (초 단위, 기본값: 60초) 52 | logger: 로깅에 사용할 로거 인스턴스 (기본값: None) 53 | """ 54 | # API 서버 URL 설정 55 | self.api_url = api_url.rstrip("/") 56 | 57 | # API 엔드포인트 설정 58 | self.health_endpoint = f"{self.api_url}/health" 59 | self.parse_endpoint = f"{self.api_url}/parse" 60 | self.status_endpoint = f"{self.api_url}/status" 61 | self.download_endpoint = f"{self.api_url}/download" 62 | self.jobs_endpoint = f"{self.api_url}/jobs" 63 | 64 | # API 키 설정 65 | self.upstage_api_key = upstage_api_key or os.environ.get("UPSTAGE_API_KEY") 66 | self.openai_api_key = openai_api_key or os.environ.get("OPENAI_API_KEY") 67 | 68 | # 파싱 옵션 설정 69 | self.language = language 70 | self.include_image = include_image 71 | self.batch_size = batch_size 72 | self.test_page = test_page 73 | 74 | # 요청 제한시간 설정 75 | self.timeout = timeout 76 | 77 | # 로거 설정 78 | self.logger = logger or logging.getLogger(__name__) 79 | 80 | # API 키가 제공되었는지 확인 81 | if not self.upstage_api_key: 82 | self.logger.warning("UPSTAGE API 키가 설정되지 않았습니다.") 83 | 84 | if not self.openai_api_key: 85 | self.logger.warning("OpenAI API 키가 설정되지 않았습니다.") 86 | 87 | def health_check(self) -> Dict[str, Any]: 88 | """ 89 | API 서버 건강 상태 확인 90 | 91 | Returns: 92 | Dict[str, Any]: 서버 상태 정보 (상태 및 타임스탬프) 93 | 94 | Raises: 95 | requests.RequestException: API 요청 중 오류 발생 96 | """ 97 | try: 98 | response = requests.get(self.health_endpoint, timeout=self.timeout) 99 | response.raise_for_status() 100 | self.logger.info("API 서버가 정상적으로 응답했습니다.") 101 | return response.json() 102 | except requests.RequestException as e: 103 | self.logger.error(f"API 서버 건강 상태 확인 실패: {e}") 104 | raise 105 | 106 | def parse_pdf( 107 | self, 108 | pdf_path: Union[str, Path], 109 | language: Optional[str] = None, 110 | include_image: Optional[bool] = None, 111 | batch_size: Optional[int] = None, 112 | test_page: Optional[int] = None, 113 | ) -> Dict[str, Any]: 114 | """ 115 | PDF 파일을 업로드하고 파싱 작업 요청 116 | 117 | Args: 118 | pdf_path: 파싱할 PDF 파일 경로 119 | language: 문서 언어 (기본값: 인스턴스 초기화 시 설정값) 120 | include_image: 파싱 결과에 이미지 포함 여부 (기본값: 인스턴스 초기화 시 설정값) 121 | batch_size: 처리할 PDF 페이지의 배치 크기 (기본값: 인스턴스 초기화 시 설정값) 122 | test_page: 처리할 최대 페이지 수 (처음부터 지정한 페이지까지만 처리, 기본값: 인스턴스 초기화 시 설정값) 123 | 124 | Returns: 125 | Dict[str, Any]: 작업 ID와 작업 상태를 포함한 응답 126 | 127 | Raises: 128 | FileNotFoundError: PDF 파일을 찾을 수 없을 때 129 | ValueError: API 키가 설정되지 않았을 때 130 | requests.RequestException: API 요청 중 오류 발생 131 | """ 132 | # 파일 경로를 Path 객체로 변환 133 | pdf_path = Path(pdf_path) 134 | 135 | # 파일 존재 여부 확인 136 | if not pdf_path.exists(): 137 | error_msg = f"파일을 찾을 수 없습니다: {pdf_path}" 138 | self.logger.error(error_msg) 139 | raise FileNotFoundError(error_msg) 140 | 141 | # API 키 확인 142 | if not self.upstage_api_key or not self.openai_api_key: 143 | error_msg = "UPSTAGE API 키와 OpenAI API 키가 필요합니다." 144 | self.logger.error(error_msg) 145 | raise ValueError(error_msg) 146 | 147 | # 요청 파라미터 설정 148 | lang = language if language is not None else self.language 149 | img = include_image if include_image is not None else self.include_image 150 | batch = batch_size if batch_size is not None else self.batch_size 151 | test = test_page if test_page is not None else self.test_page 152 | 153 | # 디버깅 정보 출력 154 | self.logger.debug( 155 | f"파싱 요청 파라미터 초기값: language={language}, include_image={include_image}, batch_size={batch_size}, test_page={test_page}" 156 | ) 157 | self.logger.debug( 158 | f"파싱 요청 파라미터 결정값: language={lang}, include_image={img}, batch_size={batch}, test_page={test}" 159 | ) 160 | 161 | # API 요청 준비 162 | headers = { 163 | "X-UPSTAGE-API-KEY": self.upstage_api_key, 164 | "X-OPENAI-API-KEY": self.openai_api_key, 165 | } 166 | 167 | # 파일 및 폼 데이터 준비 (FormData 형식) 168 | files = {"file": (pdf_path.name, open(pdf_path, "rb"), "application/pdf")} 169 | 170 | # FormData에 포함될 추가 필드 171 | data = { 172 | "language": str(lang), 173 | "include_image": str(img).lower(), 174 | "batch_size": str(batch), 175 | } 176 | 177 | # test_page가 None이 아닌 경우에만 추가 178 | if test is not None: 179 | data["test_page"] = str(test) 180 | self.logger.debug(f"test_page={test} 파라미터가 API 요청에 추가되었습니다.") 181 | else: 182 | self.logger.debug( 183 | "test_page 파라미터가 None이므로 API 요청에 포함되지 않습니다." 184 | ) 185 | 186 | self.logger.debug(f"API 요청에 사용될 데이터: {data}") 187 | self.logger.debug(f"API 요청에 사용될 파일: {pdf_path.name}") 188 | 189 | try: 190 | # API 요청 수행 191 | self.logger.info(f"파일 '{pdf_path.name}'에 대한 파싱 작업 요청 중...") 192 | self.logger.info( 193 | f"파싱 옵션: 언어={lang}, 이미지 포함={img}, 배치 크기={batch}, 처리 페이지 수={test}" 194 | ) 195 | 196 | response = requests.post( 197 | self.parse_endpoint, 198 | headers=headers, 199 | files=files, 200 | data=data, # FormData로 전송 201 | timeout=self.timeout, 202 | ) 203 | 204 | if response.status_code != 200: 205 | self.logger.error( 206 | f"API 요청 실패. 상태 코드: {response.status_code}, 응답: {response.text}" 207 | ) 208 | response.raise_for_status() 209 | 210 | # 응답 처리 211 | result = response.json() 212 | self.logger.info(f"파싱 작업이 시작되었습니다. 작업 ID: {result['job_id']}") 213 | return result 214 | except requests.RequestException as e: 215 | self.logger.error(f"파싱 작업 요청 실패: {e}") 216 | raise 217 | finally: 218 | # 파일 핸들 닫기 219 | files["file"][1].close() 220 | 221 | def get_job_status(self, job_id: str) -> Dict[str, Any]: 222 | """ 223 | 작업 ID를 사용하여 작업 상태 확인 224 | 225 | Args: 226 | job_id: 확인할 작업 ID 227 | 228 | Returns: 229 | Dict[str, Any]: 작업 상태 정보 230 | 231 | Raises: 232 | requests.RequestException: API 요청 중 오류 발생 233 | """ 234 | try: 235 | response = requests.get( 236 | f"{self.status_endpoint}/{job_id}", timeout=self.timeout 237 | ) 238 | response.raise_for_status() 239 | 240 | job_status = response.json() 241 | status = job_status.get("status", "unknown") 242 | self.logger.info(f"작업 ID '{job_id}'의 현재 상태: {status}") 243 | 244 | return job_status 245 | except requests.RequestException as e: 246 | self.logger.error(f"작업 상태 확인 실패: {e}") 247 | raise 248 | 249 | def wait_for_job_completion( 250 | self, job_id: str, check_interval: int = 2, max_attempts: int = 60 251 | ) -> Dict[str, Any]: 252 | """ 253 | 작업이 완료될 때까지 대기 254 | 255 | Args: 256 | job_id: 대기할 작업 ID 257 | check_interval: 상태 확인 간격(초) (기본값: 2초) 258 | max_attempts: 최대 시도 횟수 (기본값: 60회, 총 최대 대기 시간: 120초) 259 | 260 | Returns: 261 | Dict[str, Any]: 작업 상태 정보 262 | 263 | Raises: 264 | TimeoutError: 최대 시도 횟수 초과 265 | requests.RequestException: API 요청 중 오류 발생 266 | """ 267 | self.logger.info(f"작업 ID '{job_id}'의 완료 대기 중...") 268 | self.logger.info( 269 | f"상태 확인 간격: {check_interval}초, 최대 시도 횟수: {max_attempts}회" 270 | ) 271 | 272 | for attempt in range(max_attempts): 273 | job_status = self.get_job_status(job_id) 274 | status = job_status.get("status", "unknown") 275 | 276 | if status in ["completed", "failed"]: 277 | self.logger.info( 278 | f"작업 ID '{job_id}'가 {status} 상태로 완료되었습니다." 279 | ) 280 | return job_status 281 | 282 | self.logger.debug( 283 | f"[{attempt + 1}/{max_attempts}] 작업 ID '{job_id}'의 현재 상태: {status}. {check_interval}초 후 다시 확인합니다." 284 | ) 285 | time.sleep(check_interval) 286 | 287 | error_msg = f"작업 ID '{job_id}'의 완료 대기 시간이 초과되었습니다. 최대 {max_attempts * check_interval}초 경과." 288 | self.logger.error(error_msg) 289 | raise TimeoutError(error_msg) 290 | 291 | def download_result( 292 | self, 293 | job_id: str, 294 | save_dir: Union[str, Path] = "parser_results", 295 | extract: bool = False, 296 | overwrite: bool = False, 297 | ) -> Tuple[Optional[Path], Optional[Path]]: 298 | """ 299 | 작업 결과를 다운로드하고 선택적으로 압축 해제 300 | 301 | Args: 302 | job_id: 다운로드할 작업 ID 303 | save_dir: 결과를 저장할 디렉토리 (기본값: "parser_results") 304 | extract: 다운로드한 ZIP 파일의 압축 해제 여부 (기본값: False) 305 | overwrite: 이미 존재하는 파일 덮어쓰기 여부 (기본값: False) 306 | 307 | Returns: 308 | Tuple[Optional[Path], Optional[Path]]: (ZIP 파일 경로, 압축 해제 디렉토리 경로) 튜플. 309 | 압축 해제를 요청하지 않은 경우 두 번째 값은 None. 310 | 311 | Raises: 312 | ValueError: 작업 ID가 완료 상태가 아닐 때 313 | FileExistsError: 파일이 이미 존재하고 overwrite=False일 때 314 | requests.RequestException: API 요청 중 오류 발생 315 | """ 316 | # 작업 상태 확인 317 | job_status = self.get_job_status(job_id) 318 | status = job_status.get("status", "unknown") 319 | 320 | # 작업이 완료되지 않은 경우 321 | if status != "completed": 322 | error_msg = f"작업 ID '{job_id}'가 완료되지 않았습니다. 현재 상태: {status}" 323 | self.logger.error(error_msg) 324 | raise ValueError(error_msg) 325 | 326 | # 저장 디렉토리 생성 327 | save_dir = Path(save_dir) 328 | save_dir.mkdir(parents=True, exist_ok=True) 329 | 330 | # ZIP 파일 저장 경로 331 | timestamp = datetime.now().strftime("%Y%m%d%H%M%S") 332 | zip_filename = f"{job_id}_{timestamp}.zip" 333 | zip_path = save_dir / zip_filename 334 | 335 | # 파일이 이미 존재하고 덮어쓰기를 허용하지 않는 경우 336 | if zip_path.exists() and not overwrite: 337 | error_msg = f"파일이 이미 존재합니다: {zip_path}. overwrite=True로 설정하여 덮어쓰기를 허용하세요." 338 | self.logger.error(error_msg) 339 | raise FileExistsError(error_msg) 340 | 341 | # 다운로드 URL 342 | download_url = f"{self.download_endpoint}/{job_id}" 343 | 344 | try: 345 | # 파일 다운로드 346 | self.logger.info(f"작업 ID '{job_id}'의 결과 다운로드 중...") 347 | response = requests.get(download_url, timeout=self.timeout) 348 | response.raise_for_status() 349 | 350 | # 파일 저장 351 | with open(zip_path, "wb") as f: 352 | f.write(response.content) 353 | self.logger.info(f"결과가 성공적으로 다운로드되었습니다: {zip_path}") 354 | 355 | # 선택적 압축 해제 356 | extract_path = None 357 | if extract: 358 | extract_dir = save_dir / job_id 359 | extract_path = self._extract_zip(zip_path, extract_dir) 360 | self.logger.info( 361 | f"ZIP 파일의 압축이 성공적으로 해제되었습니다: {extract_path}" 362 | ) 363 | 364 | return zip_path, extract_path 365 | 366 | except requests.RequestException as e: 367 | self.logger.error(f"결과 다운로드 실패: {e}") 368 | raise 369 | 370 | def _extract_zip(self, zip_path: Path, extract_path: Path) -> Path: 371 | """ 372 | ZIP 파일 압축 해제 373 | 374 | Args: 375 | zip_path: 압축 해제할 ZIP 파일 경로 376 | extract_path: 압축 해제할 디렉토리 경로 377 | 378 | Returns: 379 | Path: 압축 해제된 디렉토리 경로 380 | 381 | Raises: 382 | zipfile.BadZipFile: ZIP 파일이 손상된 경우 383 | OSError: 파일 시스템 오류 발생 시 384 | """ 385 | try: 386 | # 추출 디렉토리 생성 387 | extract_path.mkdir(parents=True, exist_ok=True) 388 | 389 | # ZIP 파일 압축 해제 390 | with zipfile.ZipFile(zip_path, "r") as zip_ref: 391 | zip_ref.extractall(extract_path) 392 | 393 | self.logger.info( 394 | f"ZIP 파일 '{zip_path}'의 압축을 '{extract_path}'에 해제했습니다." 395 | ) 396 | return extract_path 397 | 398 | except (zipfile.BadZipFile, OSError) as e: 399 | self.logger.error(f"ZIP 파일 압축 해제 실패: {e}") 400 | raise 401 | 402 | def list_all_jobs(self) -> List[Dict[str, Any]]: 403 | """ 404 | 모든 작업 목록 조회 405 | 406 | Returns: 407 | List[Dict[str, Any]]: 작업 목록 (작업 ID, 상태 및 기타 정보 포함) 408 | 409 | Raises: 410 | requests.RequestException: API 요청 중 오류 발생 411 | """ 412 | try: 413 | self.logger.info("모든 작업 목록 조회 중...") 414 | response = requests.get(self.jobs_endpoint, timeout=self.timeout) 415 | response.raise_for_status() 416 | 417 | jobs = response.json() 418 | self.logger.info(f"총 {len(jobs)} 개의 작업이 조회되었습니다.") 419 | return jobs 420 | 421 | except requests.RequestException as e: 422 | self.logger.error(f"작업 목록 조회 실패: {e}") 423 | raise 424 | -------------------------------------------------------------------------------- /teddynote_parser_client/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | TeddyNote Parser Client 테스트 패키지 3 | """ 4 | -------------------------------------------------------------------------------- /teddynote_parser_client/tests/test_client.py: -------------------------------------------------------------------------------- 1 | """ 2 | TeddyNote Parser 클라이언트 테스트 모듈 3 | """ 4 | 5 | import os 6 | import unittest 7 | from unittest.mock import patch, MagicMock 8 | from pathlib import Path 9 | 10 | from teddynote_parser_client.client import TeddyNoteParserClient 11 | 12 | 13 | class TestTeddyNoteParserClient(unittest.TestCase): 14 | """TeddyNoteParserClient 클래스에 대한 단위 테스트""" 15 | 16 | def setUp(self): 17 | """테스트 설정""" 18 | # 테스트용 클라이언트 인스턴스 생성 19 | self.client = TeddyNoteParserClient( 20 | api_url="http://test-api-url:9997", 21 | upstage_api_key="test-upstage-api-key", 22 | openai_api_key="test-openai-api-key", 23 | ) 24 | 25 | @patch("teddynote_parser_client.client.requests.get") 26 | def test_health_check(self, mock_get): 27 | """health_check 메소드 테스트""" 28 | # Mock 응답 설정 29 | mock_response = MagicMock() 30 | mock_response.status_code = 200 31 | mock_response.json.return_value = { 32 | "status": "ok", 33 | "timestamp": "2023-01-01T00:00:00Z", 34 | } 35 | mock_get.return_value = mock_response 36 | 37 | # 메소드 호출 38 | result = self.client.health_check() 39 | 40 | # 결과 검증 41 | self.assertEqual(result["status"], "ok") 42 | mock_get.assert_called_once_with( 43 | "http://test-api-url:9997/health", timeout=self.client.timeout 44 | ) 45 | 46 | @patch("teddynote_parser_client.client.requests.post") 47 | def test_parse_pdf(self, mock_post): 48 | """parse_pdf 메소드 테스트""" 49 | # Mock 파일 생성 50 | test_file = Path("test_file.pdf") 51 | with open(test_file, "w") as f: 52 | f.write("test content") 53 | 54 | try: 55 | # Mock 응답 설정 56 | mock_response = MagicMock() 57 | mock_response.status_code = 200 58 | mock_response.json.return_value = { 59 | "job_id": "test-job-id", 60 | "status": "processing", 61 | } 62 | mock_post.return_value = mock_response 63 | 64 | # 메소드 호출 65 | result = self.client.parse_pdf(test_file) 66 | 67 | # 결과 검증 68 | self.assertEqual(result["job_id"], "test-job-id") 69 | self.assertEqual(result["status"], "processing") 70 | 71 | # 요청이 올바른 인자로 호출되었는지 검증 72 | mock_post.assert_called_once() 73 | args, kwargs = mock_post.call_args 74 | self.assertEqual(args[0], "http://test-api-url:9997/parse") 75 | self.assertIn("headers", kwargs) 76 | self.assertIn("files", kwargs) 77 | self.assertIn("data", kwargs) 78 | self.assertIn("timeout", kwargs) 79 | 80 | finally: 81 | # 테스트 파일 삭제 82 | if test_file.exists(): 83 | test_file.unlink() 84 | 85 | @patch("teddynote_parser_client.client.requests.get") 86 | def test_get_job_status(self, mock_get): 87 | """get_job_status 메소드 테스트""" 88 | # Mock 응답 설정 89 | mock_response = MagicMock() 90 | mock_response.status_code = 200 91 | mock_response.json.return_value = { 92 | "job_id": "test-job-id", 93 | "status": "completed", 94 | } 95 | mock_get.return_value = mock_response 96 | 97 | # 메소드 호출 98 | result = self.client.get_job_status("test-job-id") 99 | 100 | # 결과 검증 101 | self.assertEqual(result["job_id"], "test-job-id") 102 | self.assertEqual(result["status"], "completed") 103 | mock_get.assert_called_once_with( 104 | "http://test-api-url:9997/status/test-job-id", timeout=self.client.timeout 105 | ) 106 | 107 | 108 | if __name__ == "__main__": 109 | unittest.main() 110 | --------------------------------------------------------------------------------