├── app ├── __init__.py ├── web │ ├── __init__.py │ ├── status │ │ ├── __init__.py │ │ ├── lecture_deletion_status_callback.py │ │ ├── ingestion_status_callback.py │ │ └── faq_ingestion_status_callback.py │ └── routers │ │ ├── __init__.py │ │ ├── health.py │ │ └── ingestion_status.py ├── domain │ ├── chat │ │ ├── __init__.py │ │ ├── course_chat │ │ │ ├── __init__.py │ │ │ ├── course_chat_status_update_dto.py │ │ │ └── course_chat_pipeline_execution_dto.py │ │ ├── exercise_chat │ │ │ ├── __init__.py │ │ │ ├── exercise_chat_status_update_dto.py │ │ │ └── exercise_chat_pipeline_execution_dto.py │ │ ├── lecture_chat │ │ │ ├── __init__.py │ │ │ └── lecture_chat_pipeline_execution_dto.py │ │ ├── chat_pipeline_execution_dto.py │ │ ├── interaction_suggestion_dto.py │ │ └── chat_pipeline_execution_base_data_dto.py │ ├── data │ │ ├── __init__.py │ │ ├── metrics │ │ │ ├── map_entry_dto.py │ │ │ ├── lecture_unit_information_dto.py │ │ │ ├── competency_progress_dto.py │ │ │ ├── exercise_student_metrics_dto.py │ │ │ ├── lecture_unit_student_metrics_dto.py │ │ │ ├── competency_jol_dto.py │ │ │ ├── competency_information_dto.py │ │ │ ├── competency_student_metrics_dto.py │ │ │ └── student_metrics_dto.py │ │ ├── course_dto.py │ │ ├── text_message_content_dto.py │ │ ├── json_message_content_dto.py │ │ ├── user_dto.py │ │ ├── image_message_content_dto.py │ │ ├── build_log_entry.py │ │ ├── feedback_dto.py │ │ ├── simple_submission_dto.py │ │ ├── tool_message_content_dto.py │ │ ├── faq_dto.py │ │ ├── result_dto.py │ │ ├── tool_call_dto.py │ │ ├── message_content_dto.py │ │ ├── text_exercise_dto.py │ │ ├── lecture_dto.py │ │ ├── lecture_unit_dto.py │ │ ├── programming_submission_dto.py │ │ ├── exam_dto.py │ │ ├── programming_exercise_dto.py │ │ ├── extended_course_dto.py │ │ ├── competency_dto.py │ │ └── exercise_with_submissions_dto.py │ ├── ingestion │ │ ├── __init__.py │ │ ├── ingestion_status_update_dto.py │ │ ├── deletionPipelineExecutionDto.py │ │ └── ingestion_pipeline_execution_dto.py │ ├── status │ │ ├── __init__.py │ │ ├── rewriting_status_update_dto.py │ │ ├── inconsistency_check_status_update_dto.py │ │ ├── text_exercise_chat_status_update_dto.py │ │ ├── stage_state_dto.py │ │ ├── competency_extraction_status_update_dto.py │ │ ├── stage_dto.py │ │ ├── status_update_dto.py │ │ └── lecture_chat_status_update_dto.py │ ├── feature_dto.py │ ├── error_response_dto.py │ ├── model_dto.py │ ├── rewriting_pipeline_execution_dto.py │ ├── event │ │ └── pyris_event_dto.py │ ├── inconsistency_check_pipeline_execution_dto.py │ ├── pipeline_execution_settings_dto.py │ ├── pipeline_execution_dto.py │ ├── text_exercise_chat_pipeline_execution_dto.py │ ├── competency_extraction_pipeline_execution_dto.py │ └── __init__.py ├── ingestion │ ├── __init__.py │ └── abstract_ingestion.py ├── pipeline │ ├── chat │ │ ├── __init__.py │ │ └── output_models │ │ │ ├── __init__.py │ │ │ └── output_models │ │ │ ├── __init__.py │ │ │ ├── selected_file_model.py │ │ │ └── selected_paragraphs.py │ ├── __init__.py │ ├── shared │ │ ├── __init__.py │ │ ├── utils.py │ │ ├── summary_pipeline.py │ │ └── reranker_pipeline.py │ ├── prompts │ │ ├── summary_prompt.txt │ │ ├── chat_gpt_wrapper_prompts.py │ │ ├── reranker_prompt.txt │ │ ├── faq_retrieval_prompts.py │ │ ├── content_image_interpretation_merge_prompt.txt │ │ ├── choose_response_prompt.txt │ │ ├── faq_rewriting.py │ │ ├── competency_extraction.py │ │ ├── code_feedback_prompt.txt │ │ ├── faq_citation_prompt.txt │ │ ├── citation_prompt.txt │ │ ├── inconsistency_check_prompts.py │ │ ├── rewriting_prompts.py │ │ ├── lecture_retrieval_prompts.py │ │ └── text_exercise_chat_prompts.py │ ├── pipeline.py │ ├── rewriting_pipeline.py │ ├── chat_gpt_wrapper_pipeline.py │ ├── competency_extraction_pipeline.py │ └── inconsistency_check_pipeline.py ├── common │ ├── __init__.py │ ├── singleton.py │ ├── token_usage_dto.py │ ├── PipelineEnum.py │ ├── custom_exceptions.py │ └── pyris_message.py ├── vector_database │ ├── __init__.py │ ├── database.py │ └── faq_schema.py ├── llm │ ├── __init__.py │ ├── capability │ │ ├── __init__.py │ │ ├── requirement_list.py │ │ ├── capability_checker.py │ │ └── capability_list.py │ ├── langchain │ │ ├── __init__.py │ │ ├── iris_langchain_embedding_model.py │ │ ├── iris_langchain_completion_model.py │ │ └── iris_langchain_chat_model.py │ ├── request_handler │ │ ├── __init__.py │ │ ├── request_handler_interface.py │ │ ├── basic_request_handler.py │ │ └── capability_request_handler.py │ ├── completion_arguments.py │ ├── external │ │ ├── __init__.py │ │ ├── openai_completion.py │ │ ├── openai_dalle.py │ │ ├── openai_embeddings.py │ │ ├── model.py │ │ └── ollama.py │ └── llm_manager.py ├── dependencies.py ├── sentry.py ├── config.py ├── retrieval │ ├── faq_retrieval_utils.py │ └── faq_retrieval.py └── main.py ├── .github ├── CODEOWNERS ├── workflows │ ├── pullrequest-labeler.yml │ ├── lint.yml │ ├── build.yml │ └── deploy.yml ├── dependabot.yml └── labeler.yml ├── docker ├── nginx │ ├── 70-pyris-setup.sh │ ├── timeouts.conf │ ├── dhparam.pem │ ├── pyris-server.conf │ ├── pyris-nginx.conf │ ├── certs │ │ ├── pyris-nginx+4.pem │ │ └── pyris-nginx+4-key.pem │ └── nginx_502.html ├── weaviate │ └── default.env ├── weaviate.yml ├── pyris.yml ├── pyris-dev.yml ├── pyris-production-internal.yml ├── nginx.yml └── pyris-production.yml ├── application.example.yml ├── .flake8 ├── .pre-commit-config.yaml ├── requirements.txt ├── Dockerfile ├── .whitesource ├── log_conf.yml ├── LICENSE ├── llm_config.example.yml └── .gitignore /app/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/web/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/domain/chat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/domain/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/ingestion/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/web/status/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @bassner -------------------------------------------------------------------------------- /app/domain/ingestion/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/domain/status/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/pipeline/chat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/domain/chat/course_chat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/domain/chat/exercise_chat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/domain/chat/lecture_chat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/pipeline/chat/output_models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/pipeline/chat/output_models/output_models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/common/__init__.py: -------------------------------------------------------------------------------- 1 | from app.common.singleton import Singleton 2 | -------------------------------------------------------------------------------- /app/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | from app.pipeline.pipeline import Pipeline 2 | -------------------------------------------------------------------------------- /app/pipeline/shared/__init__.py: -------------------------------------------------------------------------------- 1 | from ...pipeline.shared.summary_pipeline import SummaryPipeline 2 | -------------------------------------------------------------------------------- /app/pipeline/prompts/summary_prompt.txt: -------------------------------------------------------------------------------- 1 | Write a concise summary of the following: 2 | "{text}" 3 | CONCISE SUMMARY: -------------------------------------------------------------------------------- /app/vector_database/__init__.py: -------------------------------------------------------------------------------- 1 | import app.vector_database.database 2 | import app.vector_database.lecture_schema 3 | -------------------------------------------------------------------------------- /docker/nginx/70-pyris-setup.sh: -------------------------------------------------------------------------------- 1 | # disable default.conf 2 | mv /etc/nginx/conf.d/default.conf /etc/nginx/conf.d/default.conf.disabled || true 3 | -------------------------------------------------------------------------------- /docker/nginx/timeouts.conf: -------------------------------------------------------------------------------- 1 | proxy_send_timeout 900s; 2 | proxy_read_timeout 900s; 3 | fastcgi_send_timeout 900s; 4 | fastcgi_read_timeout 900s; 5 | -------------------------------------------------------------------------------- /app/domain/feature_dto.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | class FeatureDTO(BaseModel): 5 | id: str 6 | name: str 7 | description: str 8 | -------------------------------------------------------------------------------- /app/domain/error_response_dto.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | 3 | 4 | class IrisErrorResponseDTO(BaseModel): 5 | error_message: str = Field(alias="errorMessage") 6 | -------------------------------------------------------------------------------- /application.example.yml: -------------------------------------------------------------------------------- 1 | api_keys: 2 | - token: "secret" 3 | 4 | weaviate: 5 | host: "localhost" 6 | port: "8001" 7 | grpc_port: "50051" 8 | 9 | env_vars: 10 | SOME: 'value' -------------------------------------------------------------------------------- /app/domain/status/rewriting_status_update_dto.py: -------------------------------------------------------------------------------- 1 | from app.domain.status.status_update_dto import StatusUpdateDTO 2 | 3 | 4 | class RewritingStatusUpdateDTO(StatusUpdateDTO): 5 | result: str = "" 6 | -------------------------------------------------------------------------------- /app/web/routers/__init__.py: -------------------------------------------------------------------------------- 1 | from ..routers.health import router as health_router 2 | from ..routers.pipelines import router as pipelines_router 3 | from ..routers.webhooks import router as webhooks_router 4 | -------------------------------------------------------------------------------- /app/domain/data/metrics/map_entry_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from pydantic import BaseModel 3 | 4 | 5 | class MapEntryDTO(BaseModel): 6 | key: Optional[int] = None 7 | value: Optional[int] = None 8 | -------------------------------------------------------------------------------- /app/domain/status/inconsistency_check_status_update_dto.py: -------------------------------------------------------------------------------- 1 | from app.domain.status.status_update_dto import StatusUpdateDTO 2 | 3 | 4 | class InconsistencyCheckStatusUpdateDTO(StatusUpdateDTO): 5 | result: str = "" 6 | -------------------------------------------------------------------------------- /app/domain/model_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class PyrisModelDTO(BaseModel): 7 | id: str 8 | name: str 9 | description: Optional[str] = None 10 | -------------------------------------------------------------------------------- /app/llm/__init__.py: -------------------------------------------------------------------------------- 1 | from app.llm.completion_arguments import * 2 | from app.llm.external import * 3 | from app.llm.capability import * 4 | from app.llm.request_handler import * 5 | from app.llm.capability import RequirementList 6 | -------------------------------------------------------------------------------- /app/pipeline/prompts/chat_gpt_wrapper_prompts.py: -------------------------------------------------------------------------------- 1 | chat_gpt_initial_system_prompt = """ 2 | You are a helpful, smart, kind, and efficient AI assistant. 3 | You always fulfill the user's requests to the best of your ability. 4 | """ 5 | -------------------------------------------------------------------------------- /app/llm/capability/__init__.py: -------------------------------------------------------------------------------- 1 | from ..capability.capability_list import CapabilityList 2 | from ..capability.requirement_list import RequirementList 3 | from ..capability.capability_checker import capabilities_fulfill_requirements 4 | -------------------------------------------------------------------------------- /app/domain/data/course_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class CourseDTO(BaseModel): 7 | id: int 8 | name: Optional[str] 9 | description: Optional[str] = Field(None) 10 | -------------------------------------------------------------------------------- /app/domain/status/text_exercise_chat_status_update_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from app.domain.status.status_update_dto import StatusUpdateDTO 4 | 5 | 6 | class TextExerciseChatStatusUpdateDTO(StatusUpdateDTO): 7 | result: Optional[str] 8 | -------------------------------------------------------------------------------- /app/domain/data/text_message_content_dto.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, ConfigDict, Field 2 | 3 | 4 | class TextMessageContentDTO(BaseModel): 5 | model_config = ConfigDict(populate_by_name=True) 6 | 7 | text_content: str = Field(alias="textContent") 8 | -------------------------------------------------------------------------------- /app/domain/status/stage_state_dto.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class StageStateEnum(str, Enum): 5 | NOT_STARTED = "NOT_STARTED" 6 | IN_PROGRESS = "IN_PROGRESS" 7 | DONE = "DONE" 8 | SKIPPED = "SKIPPED" 9 | ERROR = "ERROR" 10 | -------------------------------------------------------------------------------- /app/domain/chat/lecture_chat/lecture_chat_pipeline_execution_dto.py: -------------------------------------------------------------------------------- 1 | from app.domain import ChatPipelineExecutionDTO 2 | from app.domain.data.course_dto import CourseDTO 3 | 4 | 5 | class LectureChatPipelineExecutionDTO(ChatPipelineExecutionDTO): 6 | course: CourseDTO 7 | -------------------------------------------------------------------------------- /app/domain/ingestion/ingestion_status_update_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from ...domain.status.status_update_dto import StatusUpdateDTO 4 | 5 | 6 | class IngestionStatusUpdateDTO(StatusUpdateDTO): 7 | result: Optional[str] = None 8 | id: Optional[int] = None 9 | -------------------------------------------------------------------------------- /app/common/singleton.py: -------------------------------------------------------------------------------- 1 | class Singleton(type): 2 | _instances = {} 3 | 4 | def __call__(cls, *args, **kwargs): 5 | if cls not in cls._instances: 6 | cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) 7 | return cls._instances[cls] 8 | -------------------------------------------------------------------------------- /app/domain/rewriting_pipeline_execution_dto.py: -------------------------------------------------------------------------------- 1 | from pydantic import Field, BaseModel 2 | from . import PipelineExecutionDTO 3 | 4 | 5 | class RewritingPipelineExecutionDTO(BaseModel): 6 | execution: PipelineExecutionDTO 7 | to_be_rewritten: str = Field(alias="toBeRewritten") 8 | -------------------------------------------------------------------------------- /.github/workflows/pullrequest-labeler.yml: -------------------------------------------------------------------------------- 1 | name: Pull Request Labeler 2 | on: pull_request_target 3 | 4 | jobs: 5 | label: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - uses: actions/labeler@v5 9 | with: 10 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 11 | -------------------------------------------------------------------------------- /app/domain/status/competency_extraction_status_update_dto.py: -------------------------------------------------------------------------------- 1 | from app.domain.data.competency_dto import Competency 2 | from app.domain.status.status_update_dto import StatusUpdateDTO 3 | 4 | 5 | class CompetencyExtractionStatusUpdateDTO(StatusUpdateDTO): 6 | result: list[Competency] = [] 7 | -------------------------------------------------------------------------------- /app/domain/chat/course_chat/course_chat_status_update_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List 2 | 3 | from app.domain.status.status_update_dto import StatusUpdateDTO 4 | 5 | 6 | class CourseChatStatusUpdateDTO(StatusUpdateDTO): 7 | result: Optional[str] = None 8 | suggestions: List[str] = [] 9 | -------------------------------------------------------------------------------- /app/domain/data/json_message_content_dto.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, ConfigDict, Field, Json 2 | from typing import Any 3 | 4 | 5 | class JsonMessageContentDTO(BaseModel): 6 | model_config = ConfigDict(populate_by_name=True) 7 | 8 | json_content: Json[Any] = Field(alias="jsonContent") 9 | -------------------------------------------------------------------------------- /app/domain/chat/exercise_chat/exercise_chat_status_update_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List 2 | 3 | from app.domain.status.status_update_dto import StatusUpdateDTO 4 | 5 | 6 | class ExerciseChatStatusUpdateDTO(StatusUpdateDTO): 7 | result: Optional[str] = None 8 | suggestions: List[str] = [] 9 | -------------------------------------------------------------------------------- /app/llm/langchain/__init__.py: -------------------------------------------------------------------------------- 1 | from ...llm.langchain.iris_langchain_completion_model import ( 2 | IrisLangchainCompletionModel, 3 | ) 4 | from ...llm.langchain.iris_langchain_chat_model import IrisLangchainChatModel 5 | from ...llm.langchain.iris_langchain_embedding_model import IrisLangchainEmbeddingModel 6 | -------------------------------------------------------------------------------- /app/domain/data/user_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class UserDTO(BaseModel): 7 | id: int 8 | first_name: Optional[str] = Field(alias="firstName", default=None) 9 | last_name: Optional[str] = Field(alias="lastName", default=None) 10 | -------------------------------------------------------------------------------- /app/domain/data/image_message_content_dto.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field, ConfigDict 2 | from typing import Optional 3 | 4 | 5 | class ImageMessageContentDTO(BaseModel): 6 | base64: str = Field(..., alias="pdfFile") 7 | prompt: Optional[str] = None 8 | model_config = ConfigDict(populate_by_name=True) 9 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | exclude = 4 | .git, 5 | __pycache__, 6 | .idea 7 | per-file-ignores = 8 | # imported but unused 9 | __init__.py: F401, F403 10 | open_ai_chat_wrapper.py: F811 11 | open_ai_completion_wrapper.py: F811 12 | open_ai_embedding_wrapper.py: F811 13 | 14 | -------------------------------------------------------------------------------- /app/llm/request_handler/__init__.py: -------------------------------------------------------------------------------- 1 | from ..request_handler.request_handler_interface import RequestHandler 2 | from ..request_handler.basic_request_handler import BasicRequestHandler 3 | 4 | from ..request_handler.capability_request_handler import ( 5 | CapabilityRequestHandler, 6 | CapabilityRequestHandlerSelectionMode, 7 | ) 8 | -------------------------------------------------------------------------------- /docker/weaviate/default.env: -------------------------------------------------------------------------------- 1 | QUERY_DEFAULTS_LIMIT=25 2 | AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true 3 | PERSISTENCE_DATA_PATH=/var/lib/weaviate 4 | DEFAULT_VECTORIZER_MODULE=none 5 | ENABLE_MODULES= 6 | CLUSTER_HOSTNAME=pyris 7 | LIMIT_RESOURCES=true 8 | DISK_USE_WARNING_PERCENTAGE=80 9 | vectorCacheMaxObjects=1000000 10 | 11 | -------------------------------------------------------------------------------- /app/domain/status/stage_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel 4 | 5 | from app.domain.status.stage_state_dto import StageStateEnum 6 | 7 | 8 | class StageDTO(BaseModel): 9 | name: Optional[str] = None 10 | weight: int 11 | state: StageStateEnum 12 | message: Optional[str] = None 13 | -------------------------------------------------------------------------------- /app/domain/status/status_update_dto.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from pydantic import BaseModel 4 | 5 | from app.common.token_usage_dto import TokenUsageDTO 6 | from ...domain.status.stage_dto import StageDTO 7 | 8 | 9 | class StatusUpdateDTO(BaseModel): 10 | stages: List[StageDTO] 11 | tokens: List[TokenUsageDTO] = [] 12 | -------------------------------------------------------------------------------- /app/domain/event/pyris_event_dto.py: -------------------------------------------------------------------------------- 1 | from typing import TypeVar, Generic, Optional 2 | 3 | from pydantic import Field, BaseModel 4 | 5 | T = TypeVar("T") 6 | 7 | 8 | class PyrisEventDTO(BaseModel, Generic[T]): 9 | event_type: Optional[str] = Field(default=None, alias="eventType") 10 | event: Optional[T] = Field(default=None, alias="event") 11 | -------------------------------------------------------------------------------- /app/domain/inconsistency_check_pipeline_execution_dto.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | from . import PipelineExecutionDTO 4 | from .data.programming_exercise_dto import ProgrammingExerciseDTO 5 | 6 | 7 | class InconsistencyCheckPipelineExecutionDTO(BaseModel): 8 | execution: PipelineExecutionDTO 9 | exercise: ProgrammingExerciseDTO 10 | -------------------------------------------------------------------------------- /app/domain/data/build_log_entry.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Optional 3 | 4 | from pydantic import BaseModel 5 | 6 | 7 | class BuildLogEntryDTO(BaseModel): 8 | timestamp: Optional[datetime] = None 9 | message: Optional[str] = None 10 | 11 | def __str__(self): 12 | return f"{self.timestamp}: {self.message}" 13 | -------------------------------------------------------------------------------- /app/pipeline/chat/output_models/output_models/selected_file_model.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from pydantic.v1 import BaseModel as V1BaseModel, Field as V1Field 4 | 5 | 6 | class SelectedFiles(V1BaseModel): 7 | selected_files: List[str] = V1Field( 8 | description="List of selected files from the repository. Minimum 0 files, maximum 5 files." 9 | ) 10 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | groups: 8 | python-deps: 9 | applies-to: version-updates 10 | patterns: 11 | - "*" 12 | - package-ecosystem: "github-actions" 13 | directory: "/" 14 | schedule: 15 | interval: "weekly" 16 | -------------------------------------------------------------------------------- /app/domain/data/feedback_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class FeedbackDTO(BaseModel): 7 | text: Optional[str] = None 8 | test_case_name: Optional[str] = Field(alias="testCaseName", default=None) 9 | credits: float 10 | 11 | def __str__(self): 12 | return f"{self.test_case_name}: {self.text} ({self.credits} credits)" 13 | -------------------------------------------------------------------------------- /app/domain/data/simple_submission_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | from datetime import datetime 6 | 7 | 8 | class SimpleSubmissionDTO(BaseModel): 9 | timestamp: Optional[datetime] = Field(alias="timestamp", default=None) 10 | score: Optional[float] = Field(alias="score", default=0) 11 | 12 | class Config: 13 | require_by_default = False 14 | -------------------------------------------------------------------------------- /app/domain/data/tool_message_content_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel, ConfigDict, Field 4 | 5 | 6 | class ToolMessageContentDTO(BaseModel): 7 | 8 | model_config = ConfigDict(populate_by_name=True) 9 | name: Optional[str] = Field(alias="toolName", default="") 10 | tool_content: str = Field(alias="toolContent") 11 | tool_call_id: str = Field(alias="toolCallId") 12 | -------------------------------------------------------------------------------- /app/domain/pipeline_execution_settings_dto.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class PipelineExecutionSettingsDTO(BaseModel): 7 | authentication_token: str = Field(alias="authenticationToken") 8 | allowed_model_identifiers: List[str] = Field( 9 | default=[], alias="allowedModelIdentifiers" 10 | ) 11 | artemis_base_url: str = Field(alias="artemisBaseUrl") 12 | -------------------------------------------------------------------------------- /app/pipeline/chat/output_models/output_models/selected_paragraphs.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from pydantic import Field, BaseModel 4 | 5 | 6 | class SelectedParagraphs(BaseModel): 7 | selected_paragraphs: List[int] = Field( 8 | default=[], 9 | description="List of paragraphs sorted from most relevant to least relevant to the student question, " 10 | "each with a relevance score.", 11 | ) 12 | -------------------------------------------------------------------------------- /app/domain/chat/chat_pipeline_execution_dto.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from pydantic import Field 4 | 5 | from app.domain import PipelineExecutionDTO 6 | from app.common.pyris_message import PyrisMessage 7 | from app.domain.data.user_dto import UserDTO 8 | 9 | 10 | class ChatPipelineExecutionDTO(PipelineExecutionDTO): 11 | chat_history: List[PyrisMessage] = Field(alias="chatHistory", default=[]) 12 | user: Optional[UserDTO] 13 | -------------------------------------------------------------------------------- /app/web/routers/health.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, status, Response, Depends 2 | 3 | from app.dependencies import TokenValidator 4 | 5 | router = APIRouter(prefix="/api/v1/health", tags=["health"]) 6 | 7 | 8 | @router.get( 9 | "/", 10 | dependencies=[Depends(TokenValidator())], 11 | ) 12 | def health_check(): 13 | return Response( 14 | status_code=status.HTTP_200_OK, content=b"[]", media_type="application/json" 15 | ) 16 | -------------------------------------------------------------------------------- /app/domain/data/faq_dto.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | 3 | 4 | class FaqDTO(BaseModel): 5 | faq_id: int = Field(alias="faqId") 6 | course_id: int = Field(alias="courseId") 7 | question_title: str = Field(alias="questionTitle") 8 | question_answer: str = Field(alias="questionAnswer") 9 | course_name: str = Field(default="", alias="courseName") 10 | course_description: str = Field(default="", alias="courseDescription") 11 | -------------------------------------------------------------------------------- /app/domain/data/metrics/lecture_unit_information_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from pydantic import BaseModel, Field 3 | from datetime import datetime 4 | 5 | 6 | class LectureUnitInformationDTO(BaseModel): 7 | id: Optional[int] = None 8 | name: Optional[str] = None 9 | release_date: Optional[datetime] = Field(None, alias="releaseDate") 10 | type: Optional[str] = None 11 | 12 | class Config: 13 | populate_by_name = True 14 | -------------------------------------------------------------------------------- /app/ingestion/abstract_ingestion.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List, Dict 3 | 4 | 5 | class AbstractIngestion(ABC): 6 | """ 7 | Abstract class for ingesting repositories into a database. 8 | """ 9 | 10 | @abstractmethod 11 | def chunk_data(self, path: str) -> List[Dict[str, str]]: 12 | """ 13 | Abstract method to chunk code files in the root directory. 14 | """ 15 | pass 16 | -------------------------------------------------------------------------------- /app/domain/data/metrics/competency_progress_dto.py: -------------------------------------------------------------------------------- 1 | # app/domain/data/metrics/competency_progress_dto.py 2 | 3 | from typing import Optional 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class CompetencyProgressDTO(BaseModel): 8 | competency_id: Optional[int] = Field(None, alias="competencyId") 9 | progress: Optional[float] = None 10 | 11 | confidence: Optional[float] = None 12 | 13 | class Config: 14 | populate_by_name = True 15 | -------------------------------------------------------------------------------- /docker/weaviate.yml: -------------------------------------------------------------------------------- 1 | --- 2 | services: 3 | weaviate: 4 | command: 5 | - --host 6 | - 0.0.0.0 7 | - --port 8 | - '8001' 9 | - --scheme 10 | - http 11 | image: cr.weaviate.io/semitechnologies/weaviate:1.25.3 12 | expose: 13 | - 8001 14 | - 50051 15 | volumes: 16 | - ${WEAVIATE_VOLUME_MOUNT:-./.docker-data/weaviate-data}:/var/lib/weaviate 17 | restart: on-failure:3 18 | env_file: 19 | - ./weaviate/default.env -------------------------------------------------------------------------------- /app/domain/data/result_dto.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import List, Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | from ...domain.data.feedback_dto import FeedbackDTO 7 | 8 | 9 | class ResultDTO(BaseModel): 10 | completion_date: Optional[datetime] = Field(alias="completionDate", default=None) 11 | successful: bool = Field(alias="successful", default=False) 12 | feedbacks: List[FeedbackDTO] = Field(alias="feedbacks", default=[]) 13 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/ambv/black 5 | rev: stable 6 | hooks: 7 | - id: black 8 | language_version: python3.12 9 | - repo: https://github.com/pre-commit/pre-commit-hooks 10 | rev: v2.0.0 11 | hooks: 12 | - id: flake8 13 | language_version: python3.12 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | black==24.10.0 2 | fastapi==0.115.5 3 | flake8==7.1.1 4 | langchain==0.3.8 5 | ollama==0.3.3 6 | openai==1.54.4 7 | pre-commit==4.0.1 8 | psutil==6.1.0 9 | pydantic==2.9.2 10 | PyMuPDF==1.24.13 11 | pytz==2024.1 12 | PyYAML==6.0.2 13 | requests==2.32.3 14 | sentry-sdk[starlette,fastapi,openai]==2.13.0 15 | unstructured==0.16.5 16 | uvicorn==0.32.0 17 | weaviate-client==4.9.3 18 | langchain-core~=0.3.17 19 | starlette~=0.41.2 20 | langsmith~=0.1.142 21 | langchain-text-splitters~=0.3.2 -------------------------------------------------------------------------------- /app/domain/chat/interaction_suggestion_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List 2 | 3 | from pydantic import Field, BaseModel 4 | 5 | from app.common.pyris_message import PyrisMessage 6 | 7 | 8 | class InteractionSuggestionPipelineExecutionDTO(BaseModel): 9 | chat_history: List[PyrisMessage] = Field(alias="chatHistory", default=[]) 10 | last_message: Optional[str] = Field(alias="lastMessage", default=None) 11 | problem_statement: Optional[str] = Field(alias="problemStatement", default=None) 12 | -------------------------------------------------------------------------------- /app/domain/data/tool_call_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Literal, Any 2 | 3 | from pydantic import BaseModel, ConfigDict, Field, Json 4 | 5 | 6 | class FunctionDTO(BaseModel): 7 | name: str = Field(..., alias="name") 8 | arguments: Json[Any] = Field(..., alias="arguments") 9 | 10 | 11 | class ToolCallDTO(BaseModel): 12 | 13 | model_config = ConfigDict(populate_by_name=True) 14 | id: str = Field(alias="id") 15 | type: Literal["function"] = "function" 16 | function: FunctionDTO = Field(alias="function") 17 | -------------------------------------------------------------------------------- /app/domain/data/message_content_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from .tool_message_content_dto import ToolMessageContentDTO 4 | from ...domain.data.image_message_content_dto import ImageMessageContentDTO 5 | from ...domain.data.json_message_content_dto import JsonMessageContentDTO 6 | from ...domain.data.text_message_content_dto import TextMessageContentDTO 7 | 8 | MessageContentDTO = Union[ 9 | TextMessageContentDTO, 10 | ImageMessageContentDTO, 11 | JsonMessageContentDTO, 12 | ToolMessageContentDTO, 13 | ] 14 | -------------------------------------------------------------------------------- /app/domain/data/text_exercise_dto.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | from app.domain.data.course_dto import CourseDTO 7 | 8 | 9 | class TextExerciseDTO(BaseModel): 10 | id: int 11 | title: str 12 | course: CourseDTO 13 | problem_statement: str = Field(alias="problemStatement") 14 | start_date: Optional[datetime] = Field(alias="startDate", default=None) 15 | end_date: Optional[datetime] = Field(alias="endDate", default=None) 16 | -------------------------------------------------------------------------------- /app/domain/data/metrics/exercise_student_metrics_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Set 2 | from pydantic import BaseModel, Field 3 | 4 | 5 | class ExerciseStudentMetricsDTO(BaseModel): 6 | average_score: Dict[int, float] = Field({}, alias="averageScore") 7 | score: Dict[int, float] = Field({}) 8 | average_latest_submission: Dict[int, float] = Field( 9 | {}, alias="averageLatestSubmission" 10 | ) 11 | latest_submission: Dict[int, float] = Field({}, alias="latestSubmission") 12 | completed: Set[int] = Field({}) 13 | -------------------------------------------------------------------------------- /app/domain/pipeline_execution_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | from app.domain.pipeline_execution_settings_dto import PipelineExecutionSettingsDTO 6 | from app.domain.status.stage_dto import StageDTO 7 | 8 | 9 | class PipelineExecutionDTO(BaseModel): 10 | settings: Optional[PipelineExecutionSettingsDTO] 11 | initial_stages: Optional[list[StageDTO]] = Field( 12 | default=None, alias="initialStages" 13 | ) 14 | 15 | class Config: 16 | populate_by_name = True 17 | -------------------------------------------------------------------------------- /app/domain/text_exercise_chat_pipeline_execution_dto.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | 3 | from app.common.pyris_message import PyrisMessage 4 | from app.domain import PipelineExecutionDTO 5 | from app.domain.data.text_exercise_dto import TextExerciseDTO 6 | 7 | 8 | class TextExerciseChatPipelineExecutionDTO(BaseModel): 9 | execution: PipelineExecutionDTO 10 | exercise: TextExerciseDTO 11 | conversation: list[PyrisMessage] = Field(default=[]) 12 | current_submission: str = Field(alias="currentSubmission", default="") 13 | -------------------------------------------------------------------------------- /app/domain/data/metrics/lecture_unit_student_metrics_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Set, Optional 2 | from pydantic import BaseModel, Field 3 | from app.domain.data.metrics.lecture_unit_information_dto import ( 4 | LectureUnitInformationDTO, 5 | ) 6 | 7 | 8 | class LectureUnitStudentMetricsDTO(BaseModel): 9 | lecture_unit_information: Dict[int, LectureUnitInformationDTO] = Field( 10 | {}, alias="lectureUnitInformation" 11 | ) 12 | completed: Optional[Set[int]] = None 13 | 14 | class Config: 15 | populate_by_name = True 16 | -------------------------------------------------------------------------------- /app/domain/status/lecture_chat_status_update_dto.py: -------------------------------------------------------------------------------- 1 | from app.domain.status.status_update_dto import StatusUpdateDTO 2 | 3 | 4 | class LectureChatStatusUpdateDTO(StatusUpdateDTO): 5 | """Data Transfer Object for lecture chat status updates. 6 | This DTO extends the base StatusUpdateDTO to include the result of lecture chat 7 | pipeline operations, facilitating communication between Artemis and the lecture 8 | chat system. 9 | """ 10 | 11 | result: str 12 | """The result message or status of the lecture chat pipeline operation.""" 13 | -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- 1 | "component:LLM": 2 | - changed-files: 3 | - any-glob-to-any-file: app/llm/** 4 | "component:Pipeline": 5 | - changed-files: 6 | - any-glob-to-any-file: app/pipeline/** 7 | "component:FastAPI": 8 | - changed-files: 9 | - any-glob-to-any-file: app/web/** 10 | "component:Domain": 11 | - changed-files: 12 | - any-glob-to-any-file: app/domain/** 13 | "component:Docker": 14 | - changed-files: 15 | - any-glob-to-any-file: docker/** 16 | "component:CI/CD": 17 | - changed-files: 18 | - any-glob-to-any-file: .github/** 19 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Dockerfile to build a container image for a Python 3.12 FastAPI application 2 | FROM python:3.12.3-slim 3 | 4 | # Set the working directory in the container 5 | WORKDIR /app 6 | 7 | # Copy the dependencies file to the working directory 8 | COPY requirements.txt . 9 | 10 | # Install any dependencies 11 | RUN pip install --no-cache-dir -r requirements.txt 12 | 13 | # Copy the content of the local src directory to the working directory 14 | COPY app/ ./app 15 | 16 | # Specify the command to run on container start 17 | CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] 18 | -------------------------------------------------------------------------------- /.whitesource: -------------------------------------------------------------------------------- 1 | { 2 | "scanSettings": { 3 | "enableScan": true, 4 | "baseBranches": ["main"], 5 | "scanDependabotPR": false 6 | }, 7 | "checkRunSettings": { 8 | "vulnerableCheckRunConclusionLevel": "failure", 9 | "displayMode": "diff", 10 | "useMendCheckNames": true 11 | }, 12 | "issueSettings": { 13 | "minSeverityLevel": "MEDIUM", 14 | "issueType": "DEPENDENCY" 15 | }, 16 | "remediateSettings": { 17 | "workflowRules": { 18 | "enabled": true, 19 | "minVulnerabilityScore": 1.5, 20 | "maxVulnerabilityScore": 10 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /app/llm/completion_arguments.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | CompletionArgumentsResponseFormat = Enum("TEXT", "JSON") 4 | 5 | 6 | class CompletionArguments: 7 | """Arguments for the completion request""" 8 | 9 | def __init__( 10 | self, 11 | max_tokens: int = None, 12 | temperature: float = None, 13 | stop: list[str] = None, 14 | response_format: CompletionArgumentsResponseFormat = "TEXT", 15 | ): 16 | self.max_tokens = max_tokens 17 | self.temperature = temperature 18 | self.stop = stop 19 | self.response_format = response_format 20 | -------------------------------------------------------------------------------- /app/domain/chat/course_chat/course_chat_pipeline_execution_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Any 2 | 3 | from pydantic import Field 4 | 5 | from ..chat_pipeline_execution_dto import ChatPipelineExecutionDTO 6 | from ...data.extended_course_dto import ExtendedCourseDTO 7 | from ...data.metrics.student_metrics_dto import StudentMetricsDTO 8 | from ...event.pyris_event_dto import PyrisEventDTO 9 | 10 | 11 | class CourseChatPipelineExecutionDTO(ChatPipelineExecutionDTO): 12 | course: ExtendedCourseDTO 13 | metrics: Optional[StudentMetricsDTO] 14 | event_payload: Optional[PyrisEventDTO[Any]] = Field(None, alias="eventPayload") 15 | -------------------------------------------------------------------------------- /app/domain/data/metrics/competency_jol_dto.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | from typing import Optional 3 | from datetime import datetime 4 | 5 | 6 | class CompetencyJolDTO(BaseModel): 7 | competency_id: Optional[int] = Field(None, alias="competencyId") 8 | jol_value: Optional[int] = Field(None, alias="jolValue") 9 | judgement_time: Optional[datetime] = Field(None, alias="judgementTime") 10 | competency_progress: Optional[float] = Field(None, alias="competencyProgress") 11 | competency_confidence: Optional[float] = Field(None, alias="competencyConfidence") 12 | 13 | class Config: 14 | populate_by_name = True 15 | -------------------------------------------------------------------------------- /app/domain/data/lecture_dto.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import List, Optional 3 | from pydantic import BaseModel, Field 4 | 5 | from app.domain.data.lecture_unit_dto import LectureUnitDTO 6 | 7 | 8 | class PyrisLectureDTO(BaseModel): 9 | id: int = Field(alias="id") 10 | title: Optional[str] = Field(alias="title", default=None) 11 | description: Optional[str] = Field(alias="description", default=None) 12 | start_date: Optional[datetime] = Field(alias="startDate", default=None) 13 | end_date: Optional[datetime] = Field(alias="endDate", default=None) 14 | units: List[LectureUnitDTO] = Field(alias="units", default=[]) 15 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Run linters 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Checkout repo 15 | uses: actions/checkout@v3 16 | 17 | - name: Set up Python 18 | uses: actions/setup-python@v5 19 | with: 20 | python-version: "3.12" 21 | cache: 'pip' 22 | 23 | - name: Install Dependencies from requirements.txt 24 | run: pip install -r requirements.txt 25 | 26 | - name: Execute black 27 | run: black . --check 28 | 29 | - name: Execute flake8 30 | run: flake8 . 31 | -------------------------------------------------------------------------------- /app/domain/chat/chat_pipeline_execution_base_data_dto.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from pydantic import Field, BaseModel 4 | 5 | from app.domain import PipelineExecutionSettingsDTO 6 | from app.common.pyris_message import PyrisMessage 7 | from app.domain.data.user_dto import UserDTO 8 | from app.domain.status.stage_dto import StageDTO 9 | 10 | 11 | class ChatPipelineExecutionBaseDataDTO(BaseModel): 12 | chat_history: List[PyrisMessage] = Field(alias="chatHistory", default=[]) 13 | user: Optional[UserDTO] 14 | settings: Optional[PipelineExecutionSettingsDTO] 15 | initial_stages: Optional[List[StageDTO]] = Field( 16 | default=None, alias="initialStages" 17 | ) 18 | -------------------------------------------------------------------------------- /app/domain/data/lecture_unit_dto.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | 3 | 4 | class LectureUnitDTO(BaseModel): 5 | pdf_file_base64: str = Field(default="", alias="pdfFile") 6 | lecture_unit_id: int = Field(alias="lectureUnitId") 7 | lecture_unit_name: str = Field(default="", alias="lectureUnitName") 8 | lecture_unit_link: str = Field(default="", alias="lectureUnitLink") 9 | lecture_id: int = Field(alias="lectureId") 10 | lecture_name: str = Field(default="", alias="lectureName") 11 | course_id: int = Field(alias="courseId") 12 | course_name: str = Field(default="", alias="courseName") 13 | course_description: str = Field(default="", alias="courseDescription") 14 | -------------------------------------------------------------------------------- /app/domain/data/metrics/competency_information_dto.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | from typing import Optional 3 | from datetime import datetime 4 | 5 | from app.domain.data.competency_dto import CompetencyTaxonomy 6 | 7 | 8 | class CompetencyInformationDTO(BaseModel): 9 | id: Optional[int] = None 10 | title: Optional[str] = None 11 | description: Optional[str] = None 12 | taxonomy: Optional[CompetencyTaxonomy | str] = None 13 | soft_due_date: Optional[datetime] = Field(None, alias="softDueDate") 14 | optional: Optional[bool] = None 15 | mastery_threshold: Optional[int] = Field(None, alias="masteryThreshold") 16 | 17 | class Config: 18 | populate_by_name = True 19 | -------------------------------------------------------------------------------- /app/llm/langchain/iris_langchain_embedding_model.py: -------------------------------------------------------------------------------- 1 | from typing import List, Any 2 | from langchain_core.embeddings import Embeddings 3 | from ...llm import RequestHandler 4 | 5 | 6 | class IrisLangchainEmbeddingModel(Embeddings): 7 | """Custom langchain embedding for our own request handler""" 8 | 9 | request_handler: RequestHandler 10 | 11 | def __init__(self, request_handler: RequestHandler, **kwargs: Any) -> None: 12 | super().__init__(request_handler=request_handler, **kwargs) 13 | 14 | def embed_documents(self, texts: List[str]) -> List[List[float]]: 15 | return [self.embed_query(text) for text in texts] 16 | 17 | def embed_query(self, text: str) -> List[float]: 18 | return self.request_handler.embed(text) 19 | -------------------------------------------------------------------------------- /app/llm/external/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | from ...llm.external.model import LanguageModel 4 | from ...llm.external.openai_completion import ( 5 | DirectOpenAICompletionModel, 6 | AzureOpenAICompletionModel, 7 | ) 8 | from ...llm.external.openai_chat import DirectOpenAIChatModel, AzureOpenAIChatModel 9 | from ...llm.external.openai_embeddings import ( 10 | DirectOpenAIEmbeddingModel, 11 | AzureOpenAIEmbeddingModel, 12 | ) 13 | from ...llm.external.ollama import OllamaModel 14 | 15 | AnyLLM = Union[ 16 | DirectOpenAICompletionModel, 17 | AzureOpenAICompletionModel, 18 | DirectOpenAIChatModel, 19 | AzureOpenAIChatModel, 20 | DirectOpenAIEmbeddingModel, 21 | AzureOpenAIEmbeddingModel, 22 | OllamaModel, 23 | ] 24 | -------------------------------------------------------------------------------- /app/domain/data/programming_submission_dto.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Optional 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | from datetime import datetime 6 | from ...domain.data.build_log_entry import BuildLogEntryDTO 7 | from ...domain.data.result_dto import ResultDTO 8 | 9 | 10 | class ProgrammingSubmissionDTO(BaseModel): 11 | id: int 12 | date: Optional[datetime] = None 13 | repository: Dict[str, str] = Field(alias="repository", default={}) 14 | is_practice: bool = Field(alias="isPractice") 15 | build_failed: bool = Field(alias="buildFailed") 16 | build_log_entries: List[BuildLogEntryDTO] = Field( 17 | alias="buildLogEntries", default=[] 18 | ) 19 | latest_result: Optional[ResultDTO] = Field(alias="latestResult", default=None) 20 | -------------------------------------------------------------------------------- /app/domain/chat/exercise_chat/exercise_chat_pipeline_execution_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Any 2 | 3 | from pydantic import Field 4 | 5 | from app.domain.chat.chat_pipeline_execution_dto import ChatPipelineExecutionDTO 6 | from app.domain.data.course_dto import CourseDTO 7 | from app.domain.data.programming_exercise_dto import ProgrammingExerciseDTO 8 | from app.domain.data.programming_submission_dto import ProgrammingSubmissionDTO 9 | from app.domain.event.pyris_event_dto import PyrisEventDTO 10 | 11 | 12 | class ExerciseChatPipelineExecutionDTO(ChatPipelineExecutionDTO): 13 | submission: Optional[ProgrammingSubmissionDTO] = None 14 | exercise: ProgrammingExerciseDTO 15 | course: CourseDTO 16 | event_payload: Optional[PyrisEventDTO[Any]] = Field(None, alias="eventPayload") 17 | -------------------------------------------------------------------------------- /app/pipeline/prompts/reranker_prompt.txt: -------------------------------------------------------------------------------- 1 | A list of paragraphs is shown below. Each paragraph has a number next to it. A question is also provided. 2 | Respond with the numbers of the paragraphs you should consult to answer the question, in order of relevance. 3 | The relevance score is a number from 1 to 10 based on how relevant the paragraphs are to answer the question. 4 | Do not include any paragraphs that are not relevant to the question. 5 | Without any comment, return the result in the following JSON format, it is important to avoid giving 6 | unnecessary information, only the number of the paragraph if it's necessary for answering the student's question 7 | otherwise leave the array empty. 8 | {{"selected_paragraphs": [, , ...]}} 9 | 10 | {paragraphs} 11 | Question: {question} 12 | -------------------------------------------------------------------------------- /docker/pyris.yml: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------------------------------------------------- 2 | # Pyris base service 3 | # ---------------------------------------------------------------------------------------------------------------------- 4 | 5 | services: 6 | pyris-app: 7 | container_name: pyris-app 8 | build: 9 | context: .. 10 | dockerfile: Dockerfile 11 | pull: true 12 | environment: 13 | APPLICATION_YML_PATH: "/config/application.yml" 14 | LLM_CONFIG_PATH: "/config/llm_config.yml" 15 | expose: 16 | - "8000" 17 | networks: 18 | - pyris 19 | command: ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] 20 | 21 | networks: 22 | pyris: 23 | driver: "bridge" 24 | name: pyris -------------------------------------------------------------------------------- /app/dependencies.py: -------------------------------------------------------------------------------- 1 | from fastapi import Depends 2 | from fastapi.requests import Request 3 | 4 | from app.common.custom_exceptions import ( 5 | RequiresAuthenticationException, 6 | PermissionDeniedException, 7 | ) 8 | from app.config import APIKeyConfig, settings 9 | 10 | 11 | def _get_api_key(request: Request) -> str: 12 | authorization_header = request.headers.get("Authorization") 13 | 14 | if not authorization_header: 15 | raise RequiresAuthenticationException 16 | 17 | return authorization_header 18 | 19 | 20 | class TokenValidator: 21 | async def __call__(self, api_key: str = Depends(_get_api_key)) -> APIKeyConfig: 22 | for key in settings.api_keys: 23 | if key.token == api_key: 24 | return key 25 | raise PermissionDeniedException 26 | -------------------------------------------------------------------------------- /app/pipeline/shared/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, List 2 | 3 | from langchain_core.tools import StructuredTool 4 | 5 | 6 | def generate_structured_tool_from_function(tool_function: Callable) -> StructuredTool: 7 | """ 8 | Generates a structured tool from a function 9 | :param tool_function: The tool function 10 | :return: The structured tool 11 | """ 12 | return StructuredTool.from_function(tool_function) 13 | 14 | 15 | def generate_structured_tools_from_functions( 16 | tools: List[Callable], 17 | ) -> List[StructuredTool]: 18 | """ 19 | Generates a list of structured tools from a list of functions 20 | :param tools: The list of tool functions 21 | :return: The list of structured tools 22 | """ 23 | return [generate_structured_tool_from_function(_tool) for _tool in tools] 24 | -------------------------------------------------------------------------------- /docker/nginx/dhparam.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN DH PARAMETERS----- 2 | MIICCAKCAgEAiT9FabLCTYkbfSNvenLC4q5kWMNIjCUTcYWZpt8xOlmrJQqgui9h 3 | lKXP3hDe0J50oqYUkDQ+YS8i+GCVLzAJqXixqynLqrz/v5IWgloQMJJKlPBEl9M6 4 | /Kh40+VyasVz7toja4qbyN12Kz1S8qLOlmxCcPmOpxGwIUG2yYSuZH9JQ724Gnji 5 | 4puFw3CXDm6aBZWBb7cpwEvHSVW5C9R+Acph7ahCby9kWpLrLsNHL73+jJiJSGcx 6 | hig+Yie2XTTlUBHVcxlHCZu8pFXA40hLuagejmXGuVfaaoezMyU1OpfrJpsJSE2s 7 | OxFEt01nCaEguNn7L1dr46fHWux651/UCRHR8MB0J6KEOuKDhgQ8bq+WSGlowaJM 8 | NGhGxAlFH98D/gbOrVcRxJDpmaSFVVwO4piDT/pBDvzaS6Ll8dnoKLv8TNa2r7dG 9 | gedlnJ2gIhU3lLLjqIwe+fmrfhlr3ybwuIiSx/efEaw65vDnOkOHeKKXtbxUAMS6 10 | 07bLIKLEw4QRwMmrLhzu2sZnFipAppXjsQ8tRa/QO4eoaEM97FKq6qONVwAA2if6 11 | l3amSySYVDvMYpaOwQYawKTole1Kon06h8JlIr+A5W3vmraMfQZZY72HAkxuOYH0 12 | wchOEYKU+jlmutbEdz747Ngleb5kp55CtL/PlEawEpqXWWXYBqo8mmMCAQI= 13 | -----END DH PARAMETERS----- 14 | -------------------------------------------------------------------------------- /app/domain/competency_extraction_pipeline_execution_dto.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from pydantic import Field, BaseModel 4 | 5 | from . import PipelineExecutionDTO 6 | from .data.competency_dto import CompetencyTaxonomy, Competency 7 | 8 | 9 | class CompetencyExtractionPipelineExecutionDTO(BaseModel): 10 | execution: PipelineExecutionDTO 11 | course_description: str = Field(alias="courseDescription") 12 | current_competencies: list[Competency] = Field( 13 | alias="currentCompetencies", default=[] 14 | ) 15 | taxonomy_options: List[CompetencyTaxonomy] = Field( 16 | alias="taxonomyOptions", default=[] 17 | ) 18 | max_n: int = Field( 19 | alias="maxN", 20 | description="Maximum number of competencies to extract from the course description", 21 | default=10, 22 | ) 23 | -------------------------------------------------------------------------------- /app/domain/data/metrics/competency_student_metrics_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Set 2 | from pydantic import BaseModel, Field 3 | from app.domain.data.metrics.competency_information_dto import CompetencyInformationDTO 4 | from app.domain.data.metrics.competency_jol_dto import CompetencyJolDTO 5 | 6 | 7 | class CompetencyStudentMetricsDTO(BaseModel): 8 | competency_information: Dict[int, CompetencyInformationDTO] = Field( 9 | {}, alias="competencyInformation" 10 | ) 11 | exercises: Dict[int, Set[int]] = Field({}) 12 | lecture_units: Dict[int, Set[int]] = Field({}, alias="lectureUnits") 13 | progress: Dict[int, float] = Field({}) 14 | confidence: Dict[int, float] = Field({}) 15 | jol_values: Dict[int, CompetencyJolDTO] = Field({}, alias="jolValues") 16 | 17 | class Config: 18 | populate_by_name = True 19 | -------------------------------------------------------------------------------- /app/domain/data/exam_dto.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Optional 3 | 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class ExamDTO(BaseModel): 8 | id: int = Field(alias="id") 9 | title: Optional[str] = Field(alias="title", default=None) 10 | is_text_exam: bool = Field(alias="isTextExam", default=False) 11 | start_date: Optional[datetime] = Field(alias="startDate", default=None) 12 | end_date: Optional[datetime] = Field(alias="endDate", default=None) 13 | publish_results_date: Optional[datetime] = Field( 14 | alias="publishResultsDate", default=None 15 | ) 16 | exam_student_review_start: Optional[datetime] = Field( 17 | alias="examStudentReviewStart", default=None 18 | ) 19 | exam_student_review_end: Optional[datetime] = Field( 20 | alias="examStudentReviewEnd", default=None 21 | ) 22 | -------------------------------------------------------------------------------- /app/common/token_usage_dto.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | 3 | from app.common.PipelineEnum import PipelineEnum 4 | 5 | 6 | class TokenUsageDTO(BaseModel): 7 | model_info: str = Field(alias="model", default="") 8 | num_input_tokens: int = Field(alias="numInputTokens", default=0) 9 | cost_per_input_token: float = Field(alias="costPerMillionInputToken", default=0) 10 | num_output_tokens: int = Field(alias="numOutputTokens", default=0) 11 | cost_per_output_token: float = Field(alias="costPerMillionOutputToken", default=0) 12 | pipeline: PipelineEnum = Field(alias="pipelineId", default=PipelineEnum.NOT_SET) 13 | 14 | def __str__(self): 15 | return ( 16 | f"{self.model_info}: {self.num_input_tokens} input cost: {self.cost_per_input_token}," 17 | f" {self.num_output_tokens} output cost: {self.cost_per_output_token}, pipeline: {self.pipeline} " 18 | ) 19 | -------------------------------------------------------------------------------- /docker/pyris-dev.yml: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------------------------------------------------- 2 | # Setup for a Pyris development server. 3 | # ---------------------------------------------------------------------------------------------------------------------- 4 | 5 | services: 6 | pyris-app: 7 | extends: 8 | file: ./pyris.yml 9 | service: pyris-app 10 | pull_policy: never 11 | restart: "no" 12 | volumes: 13 | - ../application.local.yml:/config/application.yml:ro 14 | - ../llm_config.local.yml:/config/llm_config.yml:ro 15 | networks: 16 | - pyris 17 | ports: 18 | - 8000:8000 19 | 20 | weaviate: 21 | extends: 22 | file: ./weaviate.yml 23 | service: weaviate 24 | networks: 25 | - pyris 26 | ports: 27 | - 8001:8001 28 | - 50051:50051 29 | 30 | networks: 31 | pyris: 32 | driver: "bridge" 33 | name: pyris 34 | 35 | -------------------------------------------------------------------------------- /app/domain/__init__.py: -------------------------------------------------------------------------------- 1 | from .error_response_dto import IrisErrorResponseDTO 2 | from .pipeline_execution_dto import PipelineExecutionDTO 3 | from .pipeline_execution_settings_dto import PipelineExecutionSettingsDTO 4 | from .chat.chat_pipeline_execution_dto import ChatPipelineExecutionDTO 5 | from .chat.chat_pipeline_execution_base_data_dto import ChatPipelineExecutionBaseDataDTO 6 | from .competency_extraction_pipeline_execution_dto import ( 7 | CompetencyExtractionPipelineExecutionDTO, 8 | ) 9 | from .inconsistency_check_pipeline_execution_dto import ( 10 | InconsistencyCheckPipelineExecutionDTO, 11 | ) 12 | from app.domain.chat.exercise_chat.exercise_chat_pipeline_execution_dto import ( 13 | ExerciseChatPipelineExecutionDTO, 14 | ) 15 | from app.domain.chat.course_chat.course_chat_pipeline_execution_dto import ( 16 | CourseChatPipelineExecutionDTO, 17 | ) 18 | from app.domain.data import image_message_content_dto 19 | from app.domain.feature_dto import FeatureDTO 20 | -------------------------------------------------------------------------------- /app/domain/data/metrics/student_metrics_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from pydantic import Field, BaseModel 3 | from app.domain.data.metrics.competency_student_metrics_dto import ( 4 | CompetencyStudentMetricsDTO, 5 | ) 6 | from app.domain.data.metrics.exercise_student_metrics_dto import ( 7 | ExerciseStudentMetricsDTO, 8 | ) 9 | from app.domain.data.metrics.lecture_unit_student_metrics_dto import ( 10 | LectureUnitStudentMetricsDTO, 11 | ) 12 | 13 | 14 | class StudentMetricsDTO(BaseModel): 15 | exercise_metrics: Optional[ExerciseStudentMetricsDTO] = Field( 16 | None, alias="exerciseMetrics" 17 | ) 18 | lecture_unit_student_metrics_dto: Optional[LectureUnitStudentMetricsDTO] = Field( 19 | None, alias="lectureUnitStudentMetricsDTO" 20 | ) 21 | competency_metrics: Optional[CompetencyStudentMetricsDTO] = Field( 22 | None, alias="competencyMetrics" 23 | ) 24 | 25 | class Config: 26 | populate_by_name = True 27 | -------------------------------------------------------------------------------- /app/domain/ingestion/deletionPipelineExecutionDto.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from pydantic import Field 4 | 5 | from app.domain import PipelineExecutionDTO, PipelineExecutionSettingsDTO 6 | from app.domain.data.faq_dto import FaqDTO 7 | from app.domain.data.lecture_unit_dto import LectureUnitDTO 8 | from app.domain.status.stage_dto import StageDTO 9 | 10 | 11 | class LecturesDeletionExecutionDto(PipelineExecutionDTO): 12 | lecture_units: List[LectureUnitDTO] = Field(..., alias="pyrisLectureUnits") 13 | settings: Optional[PipelineExecutionSettingsDTO] 14 | initial_stages: Optional[List[StageDTO]] = Field( 15 | default=None, alias="initialStages" 16 | ) 17 | 18 | 19 | class FaqDeletionExecutionDto(PipelineExecutionDTO): 20 | faq: FaqDTO = Field(..., alias="pyrisFaqWebhookDTO") 21 | settings: Optional[PipelineExecutionSettingsDTO] 22 | initial_stages: Optional[List[StageDTO]] = Field( 23 | default=None, alias="initialStages" 24 | ) 25 | -------------------------------------------------------------------------------- /app/domain/ingestion/ingestion_pipeline_execution_dto.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from pydantic import Field 4 | 5 | from app.domain import PipelineExecutionDTO, PipelineExecutionSettingsDTO 6 | from app.domain.data.faq_dto import FaqDTO 7 | from app.domain.data.lecture_unit_dto import LectureUnitDTO 8 | from app.domain.status.stage_dto import StageDTO 9 | 10 | 11 | class IngestionPipelineExecutionDto(PipelineExecutionDTO): 12 | lecture_unit: LectureUnitDTO = Field(..., alias="pyrisLectureUnit") 13 | settings: Optional[PipelineExecutionSettingsDTO] 14 | initial_stages: Optional[List[StageDTO]] = Field( 15 | default=None, alias="initialStages" 16 | ) 17 | 18 | 19 | class FaqIngestionPipelineExecutionDto(PipelineExecutionDTO): 20 | faq: FaqDTO = Field(..., alias="pyrisFaqWebhookDTO") 21 | settings: Optional[PipelineExecutionSettingsDTO] 22 | initial_stages: Optional[List[StageDTO]] = Field( 23 | default=None, alias="initialStages" 24 | ) 25 | -------------------------------------------------------------------------------- /log_conf.yml: -------------------------------------------------------------------------------- 1 | version: 1 2 | disable_existing_loggers: False 3 | formatters: 4 | default: 5 | "use_colors": null, 6 | "()": uvicorn.logging.DefaultFormatter 7 | format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s' 8 | access: 9 | "use_colors": null, 10 | "()": uvicorn.logging.AccessFormatter 11 | format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s' 12 | handlers: 13 | default: 14 | formatter: default 15 | class: logging.StreamHandler 16 | stream: ext://sys.stderr 17 | access: 18 | formatter: access 19 | class: logging.StreamHandler 20 | stream: ext://sys.stdout 21 | loggers: 22 | uvicorn: 23 | level: INFO 24 | handlers: 25 | - default 26 | propagate: no 27 | uvicorn.error: 28 | level: INFO 29 | handlers: 30 | - default 31 | propagate: no 32 | uvicorn.access: 33 | level: INFO 34 | handlers: 35 | - access 36 | propagate: no 37 | root: 38 | level: DEBUG 39 | handlers: 40 | - default 41 | propagate: no -------------------------------------------------------------------------------- /docker/nginx/pyris-server.conf: -------------------------------------------------------------------------------- 1 | resolver 127.0.0.11; 2 | resolver_timeout 5s; 3 | client_max_body_size 10m; 4 | client_body_buffer_size 1m; 5 | 6 | location / { 7 | proxy_pass http://pyris; 8 | proxy_http_version 1.1; 9 | proxy_set_header Upgrade $http_upgrade; 10 | proxy_set_header Connection 'upgrade'; 11 | proxy_set_header Host $host; 12 | # proxy_set_header Early-Data $ssl_early_data; 13 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 14 | proxy_set_header X-Forwarded-Proto $scheme; 15 | proxy_cache_bypass $http_upgrade; 16 | proxy_send_timeout 900s; 17 | proxy_read_timeout 900s; 18 | proxy_max_temp_file_size 0; 19 | proxy_buffering on; 20 | proxy_buffer_size 16k; 21 | proxy_buffers 8 16k; 22 | proxy_busy_buffers_size 32k; 23 | fastcgi_send_timeout 900s; 24 | fastcgi_read_timeout 900s; 25 | client_max_body_size 128M; 26 | } 27 | 28 | error_page 502 /502.html; 29 | location /502.html { 30 | root /usr/share/nginx/html; 31 | internal; 32 | } 33 | -------------------------------------------------------------------------------- /app/common/PipelineEnum.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class PipelineEnum(str, Enum): 5 | IRIS_CODE_FEEDBACK = "IRIS_CODE_FEEDBACK" 6 | IRIS_CHAT_COURSE_MESSAGE = "IRIS_CHAT_COURSE_MESSAGE" 7 | IRIS_CHAT_EXERCISE_MESSAGE = "IRIS_CHAT_EXERCISE_MESSAGE" 8 | IRIS_CHAT_EXERCISE_AGENT_MESSAGE = "IRIS_CHAT_EXERCISE_AGENT_MESSAGE" 9 | IRIS_INTERACTION_SUGGESTION = "IRIS_INTERACTION_SUGGESTION" 10 | IRIS_CHAT_LECTURE_MESSAGE = "IRIS_CHAT_LECTURE_MESSAGE" 11 | IRIS_COMPETENCY_GENERATION = "IRIS_COMPETENCY_GENERATION" 12 | IRIS_CITATION_PIPELINE = "IRIS_CITATION_PIPELINE" 13 | IRIS_RERANKER_PIPELINE = "IRIS_RERANKER_PIPELINE" 14 | IRIS_SUMMARY_PIPELINE = "IRIS_SUMMARY_PIPELINE" 15 | IRIS_LECTURE_RETRIEVAL_PIPELINE = "IRIS_LECTURE_RETRIEVAL_PIPELINE" 16 | IRIS_LECTURE_INGESTION = "IRIS_LECTURE_INGESTION" 17 | IRIS_FAQ_INGESTION = "IRIS_FAQ_INGESTION" 18 | IRIS_FAQ_RETRIEVAL_PIPELINE = "IRIS_FAQ_RETRIEVAL_PIPELINE" 19 | IRIS_INCONSISTENCY_CHECK = "IRIS_INCONSISTENCY_CHECK" 20 | IRIS_REWRITING_PIPELINE = "IRIS_REWRITING_PIPELINE" 21 | NOT_SET = "NOT_SET" 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 TUM Applied Software Engineering 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /app/web/status/lecture_deletion_status_callback.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from .status_update import StatusCallback 4 | from ...domain.ingestion.ingestion_status_update_dto import IngestionStatusUpdateDTO 5 | from ...domain.status.stage_state_dto import StageStateEnum 6 | from ...domain.status.stage_dto import StageDTO 7 | import logging 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class LecturesDeletionStatusCallback(StatusCallback): 13 | """ 14 | Callback class for updating the status of a Tutor Chat pipeline run. 15 | """ 16 | 17 | def __init__( 18 | self, run_id: str, base_url: str, initial_stages: List[StageDTO] = None 19 | ): 20 | url = f"{base_url}/api/public/pyris/webhooks/ingestion/runs/{run_id}/status" 21 | 22 | current_stage_index = len(initial_stages) if initial_stages else 0 23 | stages = initial_stages or [] 24 | stages += [ 25 | StageDTO( 26 | weight=100, state=StageStateEnum.NOT_STARTED, name="Slides removal" 27 | ), 28 | ] 29 | status = IngestionStatusUpdateDTO(stages=stages) 30 | stage = stages[current_stage_index] 31 | super().__init__(url, run_id, status, stage, current_stage_index) 32 | -------------------------------------------------------------------------------- /app/pipeline/prompts/faq_retrieval_prompts.py: -------------------------------------------------------------------------------- 1 | faq_retriever_initial_prompt = """ 2 | You write good and performant vector database queries, in particular for Weaviate, 3 | from chat histories between an AI tutor and a student. 4 | The query should be designed to retrieve context information from indexed faqs so the AI tutor 5 | can use the context information to give a better answer. Apply accepted norms when querying vector databases. 6 | Query the database so it returns answers for the latest student query. 7 | A good vector database query is formulated in natural language, just like a student would ask a question. 8 | It is not an instruction to the database, but a question to the database. 9 | The chat history between the AI tutor and the student is provided to you in the next messages. 10 | """ 11 | 12 | write_hypothetical_answer_prompt = """ 13 | A student has sent a query in the context the course {course_name}. 14 | The chat history between the AI tutor and the student is provided to you in the next messages. 15 | Please provide a response in {course_language}. 16 | You should create a response that looks like a faq answer. 17 | Craft your response to closely reflect the style and content of typical university lecture materials. 18 | Do not exceed 350 words. Add keywords and phrases that are relevant to student intent. 19 | """ 20 | -------------------------------------------------------------------------------- /app/pipeline/pipeline.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | from typing import List 3 | 4 | from app.common.token_usage_dto import TokenUsageDTO 5 | from app.common.PipelineEnum import PipelineEnum 6 | 7 | 8 | class Pipeline(metaclass=ABCMeta): 9 | """Abstract class for all pipelines""" 10 | 11 | implementation_id: str 12 | tokens: List[TokenUsageDTO] 13 | 14 | def __init__(self, implementation_id=None, **kwargs): 15 | self.implementation_id = implementation_id 16 | 17 | def __str__(self): 18 | return f"{self.__class__.__name__}" 19 | 20 | def __repr__(self): 21 | return f"{self.__class__.__name__}" 22 | 23 | def __call__(self, **kwargs): 24 | """ 25 | Extracts the required parameters from the kwargs runs the pipeline. 26 | """ 27 | raise NotImplementedError("Subclasses must implement the __call__ method.") 28 | 29 | def __init_subclass__(cls, **kwargs): 30 | super().__init_subclass__(**kwargs) 31 | if "__call__" not in cls.__dict__: 32 | raise NotImplementedError( 33 | "Subclasses of Pipeline interface must implement the __call__ method." 34 | ) 35 | 36 | def _append_tokens(self, tokens: TokenUsageDTO, pipeline: PipelineEnum) -> None: 37 | tokens.pipeline = pipeline 38 | self.tokens.append(tokens) 39 | -------------------------------------------------------------------------------- /app/pipeline/prompts/content_image_interpretation_merge_prompt.txt: -------------------------------------------------------------------------------- 1 | You are An AI assistant for university Professors of the Technical University of Munich. 2 | You are tasked with helping to prepare educational materials for university students. 3 | You were provided with the raw text content of a slide and, in some cases, 4 | a description of the slide generated by another AI assistant. 5 | The assistant can fail to generate a description for some slides. 6 | Your task is to merge the description and the text content of the slide. 7 | If a description is available, you should add it after the raw text content of the slide. 8 | If an error message is given at the description, please ignore it and return only the raw text content. 9 | 10 | 11 | ############################################################################################################ 12 | Here is the raw text content of the Slide provided: 13 | {page_content} 14 | ############################################################################################################ 15 | 16 | ############################################################################################################ 17 | Here is the description of the slide provided, if it's an error message ignore it: 18 | {image_interpretation} 19 | ############################################################################################################ 20 | -------------------------------------------------------------------------------- /docker/pyris-production-internal.yml: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------------------------------------------------- 2 | # Setup for a Pyris server suitable for internal network requests (without nginx). 3 | # ---------------------------------------------------------------------------------------------------------------------- 4 | # It is designed to take in environment variables for configuration, similar to the production setup. 5 | # ---------------------------------------------------------------------------------------------------------------------- 6 | 7 | services: 8 | pyris-app: 9 | extends: 10 | file: ./pyris.yml 11 | service: pyris-app 12 | image: ghcr.io/ls1intum/pyris:${PYRIS_DOCKER_TAG:-latest} 13 | pull_policy: always 14 | restart: unless-stopped 15 | volumes: 16 | - ${PYRIS_APPLICATION_YML_FILE}:/config/application.yml:ro 17 | - ${PYRIS_LLM_CONFIG_YML_FILE}:/config/llm_config.yml:ro 18 | ports: 19 | - "${PYRIS_PORT:-8000}:8000" 20 | networks: 21 | - pyris 22 | 23 | weaviate: 24 | extends: 25 | file: ./weaviate.yml 26 | service: weaviate 27 | ports: 28 | - "${WEAVIATE_PORT:-8001}:8001" 29 | - "${WEAVIATE_GRPC_PORT:-50051}:50051" 30 | networks: 31 | - pyris 32 | 33 | networks: 34 | pyris: 35 | driver: "bridge" 36 | name: pyris -------------------------------------------------------------------------------- /app/pipeline/prompts/choose_response_prompt.txt: -------------------------------------------------------------------------------- 1 | Two paragraphs are shown below. Each paragraph has a number next to it. A question is also provided. 2 | Respond with the numbers of the paragraph that respond precisely and cover the full scope of the question. 3 | To understand the full scope of the question, take into consideration the Chat History as it the necessary context for the question. 4 | The relevance score is a number from 1 to 10 based on how relevant the paragraphs are to answer the question. 5 | Without any comment, return the result in the following JSON format, it is important to avoid giving 6 | unnecessary information, only the number of the paragraph that is most relevant and better suited for the question. 7 | {{"selected_paragraphs": []}} 8 | If the question is asking for code, return {{"selected_paragraphs": [0]}} 9 | Do not by any means return a the number of the response that has written programming code in it. 10 | If there is no suitable answer return {{"selected_paragraphs": [0]}} 11 | If the question is a type of greeting like hello or hey return {{"selected_paragraphs": [0]}} 12 | If the answer or the question is out the education context return {{"selected_paragraphs": [0]}} 13 | 14 | Paragraph 0: 15 | {paragraph_0} 16 | 17 | Paragraph 1: 18 | {paragraph_1} 19 | 20 | Question: {question} 21 | 22 | Chat History:{chat_history} 23 | 24 | 25 | DO NOT RETURN THE RESPONSE THAT HAS CODE IN IT. 26 | -------------------------------------------------------------------------------- /app/domain/data/programming_exercise_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional 2 | 3 | from pydantic import BaseModel, Field 4 | from datetime import datetime 5 | from enum import Enum 6 | 7 | 8 | class ProgrammingLanguage(str, Enum): 9 | JAVA = "JAVA" 10 | PYTHON = "PYTHON" 11 | C = "C" 12 | HASKELL = "HASKELL" 13 | KOTLIN = "KOTLIN" 14 | VHDL = "VHDL" 15 | ASSEMBLER = "ASSEMBLER" 16 | SWIFT = "SWIFT" 17 | OCAML = "OCAML" 18 | EMPTY = "EMPTY" 19 | 20 | 21 | class ProgrammingExerciseDTO(BaseModel): 22 | id: int 23 | name: str 24 | programming_language: Optional[str] = Field( 25 | alias="programmingLanguage", default=None 26 | ) 27 | template_repository: Dict[str, str] = Field(alias="templateRepository", default={}) 28 | solution_repository: Dict[str, str] = Field(alias="solutionRepository", default={}) 29 | test_repository: Dict[str, str] = Field(alias="testRepository", default={}) 30 | problem_statement: str = Field(alias="problemStatement", default=None) 31 | start_date: Optional[datetime] = Field(alias="startDate", default=None) 32 | end_date: Optional[datetime] = Field(alias="endDate", default=None) 33 | max_points: Optional[float] = Field(alias="maxPoints", default=None) 34 | recent_changes: Optional[str] = Field( 35 | alias="recentChanges", 36 | default=None, 37 | description="Git diff of the recent changes", 38 | ) 39 | -------------------------------------------------------------------------------- /app/common/custom_exceptions.py: -------------------------------------------------------------------------------- 1 | from fastapi import HTTPException, status 2 | 3 | 4 | class RequiresAuthenticationException(HTTPException): 5 | def __init__(self): 6 | super().__init__( 7 | status_code=status.HTTP_401_UNAUTHORIZED, 8 | detail={ 9 | "type": "not_authenticated", 10 | "errorMessage": "Requires authentication", 11 | }, 12 | ) 13 | 14 | 15 | class PermissionDeniedException(HTTPException): 16 | def __init__(self): 17 | super().__init__( 18 | status_code=status.HTTP_403_FORBIDDEN, 19 | detail={ 20 | "type": "not_authorized", 21 | "errorMessage": "Permission denied", 22 | }, 23 | ) 24 | 25 | 26 | class PipelineInvocationError(HTTPException): 27 | def __init__(self): 28 | super().__init__( 29 | status_code=status.HTTP_400_BAD_REQUEST, 30 | detail={ 31 | "type": "bad_request", 32 | "errorMessage": "Cannot invoke pipeline", 33 | }, 34 | ) 35 | 36 | 37 | class PipelineNotFoundException(HTTPException): 38 | def __init__(self): 39 | super().__init__( 40 | status_code=status.HTTP_404_NOT_FOUND, 41 | detail={ 42 | "type": "pipeline_not_found", 43 | "errorMessage": "Pipeline not found", 44 | }, 45 | ) 46 | -------------------------------------------------------------------------------- /app/sentry.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import sentry_sdk 4 | from sentry_sdk.integrations.starlette import StarletteIntegration 5 | from sentry_sdk.integrations.fastapi import FastApiIntegration 6 | from sentry_sdk.integrations.openai import OpenAIIntegration 7 | 8 | 9 | def init(): 10 | sentry_sdk.init( 11 | dsn="https://1535dff78eec4932a67fc5affd0a680a@sentry.ase.in.tum.de/7", 12 | environment=os.environ.get("SENTRY_ENVIRONMENT", "development"), 13 | server_name=os.environ.get("SENTRY_SERVER_NAME", "localhost"), 14 | release=os.environ.get("SENTRY_RELEASE", None), 15 | attach_stacktrace=os.environ.get("SENTRY_ATTACH_STACKTRACE", "False").lower() 16 | in ("true", "1"), 17 | max_request_body_size="always", 18 | enable_tracing=os.environ.get("SENTRY_ENABLE_TRACING", "False").lower() 19 | in ("true", "1"), 20 | traces_sample_rate=1.0, 21 | profiles_sample_rate=1.0, 22 | send_default_pii=True, 23 | integrations=[ 24 | StarletteIntegration( 25 | transaction_style="endpoint", 26 | failed_request_status_codes=[403, range(500, 599)], 27 | ), 28 | FastApiIntegration( 29 | transaction_style="endpoint", 30 | failed_request_status_codes=[403, range(500, 599)], 31 | ), 32 | OpenAIIntegration( 33 | include_prompts=True, 34 | ), 35 | ], 36 | ) 37 | -------------------------------------------------------------------------------- /app/common/pyris_message.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from enum import Enum 3 | from typing import List, Optional 4 | 5 | from pydantic import BaseModel, ConfigDict, Field 6 | 7 | from app.domain.data.message_content_dto import MessageContentDTO 8 | from app.common.token_usage_dto import TokenUsageDTO 9 | from app.domain.data.tool_call_dto import ToolCallDTO 10 | from app.domain.data.tool_message_content_dto import ToolMessageContentDTO 11 | 12 | 13 | class IrisMessageRole(str, Enum): 14 | USER = "USER" 15 | ASSISTANT = "LLM" 16 | SYSTEM = "SYSTEM" 17 | TOOL = "TOOL" 18 | 19 | 20 | class PyrisMessage(BaseModel): 21 | model_config = ConfigDict(populate_by_name=True) 22 | 23 | token_usage: TokenUsageDTO = Field(default_factory=TokenUsageDTO) 24 | 25 | sent_at: datetime | None = Field(alias="sentAt", default=None) 26 | sender: IrisMessageRole 27 | 28 | contents: List[MessageContentDTO] = Field(default=[]) 29 | 30 | def __str__(self): 31 | return f"{self.sender.lower()}: {self.contents}" 32 | 33 | 34 | class PyrisAIMessage(PyrisMessage): 35 | model_config = ConfigDict(populate_by_name=True) 36 | sender: IrisMessageRole = IrisMessageRole.ASSISTANT 37 | tool_calls: Optional[List[ToolCallDTO]] = Field(alias="toolCalls") 38 | 39 | 40 | class PyrisToolMessage(PyrisMessage): 41 | model_config = ConfigDict(populate_by_name=True) 42 | sender: IrisMessageRole = IrisMessageRole.TOOL 43 | contents: List[ToolMessageContentDTO] = Field(default=[]) 44 | -------------------------------------------------------------------------------- /app/llm/langchain/iris_langchain_completion_model.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Any 2 | 3 | from langchain_core.callbacks import CallbackManagerForLLMRun 4 | from langchain_core.language_models.llms import BaseLLM 5 | from langchain_core.outputs import LLMResult 6 | from langchain_core.outputs.generation import Generation 7 | 8 | from ...llm import RequestHandler, CompletionArguments 9 | 10 | 11 | class IrisLangchainCompletionModel(BaseLLM): 12 | """Custom langchain chat model for our own request handler""" 13 | 14 | request_handler: RequestHandler 15 | max_tokens: Optional[int] = None 16 | 17 | def __init__(self, request_handler: RequestHandler, **kwargs: Any) -> None: 18 | super().__init__(request_handler=request_handler, **kwargs) 19 | 20 | def _generate( 21 | self, 22 | prompts: List[str], 23 | stop: Optional[List[str]] = None, 24 | run_manager: Optional[CallbackManagerForLLMRun] = None, 25 | **kwargs: Any, 26 | ) -> LLMResult: 27 | generations = [] 28 | args = CompletionArguments(stop=stop, temperature=0.0) 29 | if self.max_tokens: 30 | args.max_tokens = self.max_tokens 31 | for prompt in prompts: 32 | completion = self.request_handler.complete(prompt=prompt, arguments=args) 33 | generations.append([Generation(text=completion.choices[0].text)]) 34 | return LLMResult(generations=generations) 35 | 36 | @property 37 | def _llm_type(self) -> str: 38 | return "Iris" 39 | -------------------------------------------------------------------------------- /docker/nginx.yml: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------------------------------------------------- 2 | # Nginx base service 3 | # ---------------------------------------------------------------------------------------------------------------------- 4 | 5 | services: 6 | nginx: 7 | container_name: pyris-nginx 8 | image: nginx:1.23 9 | pull_policy: always 10 | volumes: 11 | - ./nginx/timeouts.conf:/etc/nginx/conf.d/timeouts.conf:ro 12 | - ./nginx/pyris-nginx.conf:/etc/nginx/conf.d/pyris-nginx.conf:ro 13 | - ./nginx/pyris-server.conf:/etc/nginx/includes/pyris-server.conf:ro 14 | - ./nginx/dhparam.pem:/etc/nginx/dhparam.pem:ro 15 | - ./nginx/nginx_502.html:/usr/share/nginx/html/502.html:ro 16 | - ./nginx/70-pyris-setup.sh:/docker-entrypoint.d/70-pyris-setup.sh 17 | - ./nginx/certs/pyris-nginx+4.pem:/certs/fullchain.pem:ro 18 | - ./nginx/certs/pyris-nginx+4-key.pem:/certs/priv_key.pem:ro 19 | ports: 20 | - "80:80" 21 | - "443:443" 22 | # expose the port to make it reachable docker internally even if the external port mapping changes 23 | expose: 24 | - "80" 25 | - "443" 26 | healthcheck: 27 | test: service nginx status || exit 1 28 | start_period: 60s 29 | networks: 30 | - pyris 31 | 32 | networks: 33 | pyris: 34 | driver: "bridge" 35 | name: pyris -------------------------------------------------------------------------------- /app/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from pydantic import BaseModel 4 | import yaml 5 | 6 | 7 | class APIKeyConfig(BaseModel): 8 | token: str 9 | 10 | 11 | class WeaviateSettings(BaseModel): 12 | host: str 13 | port: int 14 | grpc_port: int 15 | 16 | 17 | class Settings(BaseModel): 18 | api_keys: list[APIKeyConfig] 19 | env_vars: dict[str, str] 20 | weaviate: WeaviateSettings 21 | 22 | @classmethod 23 | def get_settings(cls): 24 | """Get the settings from the configuration file.""" 25 | file_path_env = os.environ.get("APPLICATION_YML_PATH") 26 | if not file_path_env: 27 | raise EnvironmentError( 28 | "APPLICATION_YML_PATH environment variable is not set." 29 | ) 30 | 31 | file_path = Path(file_path_env) 32 | try: 33 | with open(file_path, "r") as file: 34 | settings_file = yaml.safe_load(file) 35 | return cls.model_validate(settings_file) 36 | except FileNotFoundError as e: 37 | raise FileNotFoundError( 38 | f"Configuration file not found at {file_path}." 39 | ) from e 40 | except yaml.YAMLError as e: 41 | raise yaml.YAMLError(f"Error parsing YAML file at {file_path}.") from e 42 | 43 | def set_env_vars(self): 44 | """Set environment variables from the settings.""" 45 | for key, value in self.env_vars.items(): 46 | os.environ[key] = value 47 | 48 | 49 | settings = Settings.get_settings() 50 | -------------------------------------------------------------------------------- /docker/nginx/pyris-nginx.conf: -------------------------------------------------------------------------------- 1 | # Load balancing 2 | upstream pyris { 3 | server pyris-app:8000; 4 | } 5 | 6 | # Remove nginx version from HTTP response 7 | server_tokens off; 8 | 9 | # Rate limit for the login REST call, at most one requests per two seconds 10 | limit_req_zone $binary_remote_addr zone=loginlimit:10m rate=30r/m; 11 | 12 | server { 13 | listen 80 default_server; 14 | listen [::]:80 default_server; 15 | server_name _; 16 | 17 | return 301 https://$host$request_uri; 18 | } 19 | 20 | server { 21 | listen 443 ssl http2; 22 | listen [::]:443 ssl http2; 23 | server_name _; 24 | 25 | ssl_certificate /certs/fullchain.pem; 26 | ssl_certificate_key /certs/priv_key.pem; 27 | ssl_protocols TLSv1.2 TLSv1.3; 28 | # TODO: dynamic dh param generation not needed here? Otherwise have to generate them somehow if not available at container entrypoint 29 | ssl_dhparam /etc/nginx/dhparam.pem; 30 | ssl_prefer_server_ciphers on; 31 | ssl_ciphers 'ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256'; 32 | ssl_ecdh_curve secp384r1; 33 | ssl_session_timeout 10m; 34 | ssl_session_cache shared:SSL:10m; 35 | ssl_session_tickets off; 36 | ssl_stapling on; 37 | ssl_stapling_verify on; 38 | # ssl_early_data on; 39 | 40 | include includes/pyris-server.conf; 41 | } 42 | -------------------------------------------------------------------------------- /app/retrieval/faq_retrieval_utils.py: -------------------------------------------------------------------------------- 1 | from weaviate.collections.classes.filters import Filter 2 | from app.vector_database.database import VectorDatabase 3 | from app.vector_database.faq_schema import FaqSchema 4 | 5 | 6 | def should_allow_faq_tool(db: VectorDatabase, course_id: int) -> bool: 7 | """ 8 | Checks if there are indexed faqs for the given course 9 | 10 | :param db: The vector database on which the faqs are indexed 11 | :param course_id: The course ID 12 | :return: True if there are indexed faqs for the course, False otherwise 13 | """ 14 | if course_id: 15 | # Fetch the first object that matches the course ID with the language property 16 | result = db.faqs.query.fetch_objects( 17 | filters=Filter.by_property(FaqSchema.COURSE_ID.value).equal(course_id), 18 | limit=1, 19 | return_properties=[FaqSchema.COURSE_NAME.value], 20 | ) 21 | return len(result.objects) > 0 22 | return False 23 | 24 | 25 | def format_faqs(retrieved_faqs): 26 | """ 27 | Formatiert die abgerufenen FAQs in einen String. 28 | 29 | :param retrieved_faqs: Liste der abgerufenen FAQs 30 | :return: Formatierter String mit den FAQ-Daten 31 | """ 32 | result = "" 33 | for faq in retrieved_faqs: 34 | res = "[FAQ ID: {}, FAQ Question: {}, FAQ Answer: {}]".format( 35 | faq.get(FaqSchema.FAQ_ID.value), 36 | faq.get(FaqSchema.QUESTION_TITLE.value), 37 | faq.get(FaqSchema.QUESTION_ANSWER.value), 38 | ) 39 | result += res 40 | return result 41 | -------------------------------------------------------------------------------- /docker/nginx/certs/pyris-nginx+4.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIERjCCAq6gAwIBAgIQSQ2vfdquHAQcrzbEKx46mzANBgkqhkiG9w0BAQsFADBf 3 | MR4wHAYDVQQKExVta2NlcnQgZGV2ZWxvcG1lbnQgQ0ExGjAYBgNVBAsMEXJvb3RA 4 | MWY0ZmQzNzYzNmMyMSEwHwYDVQQDDBhta2NlcnQgcm9vdEAxZjRmZDM3NjM2YzIw 5 | HhcNMjIxMjA1MDk0NTEzWhcNMjUwMzA1MDk0NTEzWjBFMScwJQYDVQQKEx5ta2Nl 6 | cnQgZGV2ZWxvcG1lbnQgY2VydGlmaWNhdGUxGjAYBgNVBAsMEXJvb3RAMWY0ZmQz 7 | NzYzNmMyMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArgiUh0rUkHdN 8 | n4/iJUBXrO6VVHGL9q9KpuaonBsD3CkH9f55rMupOFoRPR0FfVTvS0OHaEjafTbm 9 | Fz/yUKnWC2WGEL8R0vn0e6HyD44q7K20GAJBuReopjPB+zJJLnhtxxqmig+eDwZg 10 | uAYASQxVohfI0jUmDSjhNi+UmmBv4NyyyJFgZYQUB4K6a5IqOKb+WOOPp3/oRlpE 11 | o25EaNvj5iLDZkK6QwdqNDu7qMrMGu9RxPeu5/Ste0lmex4uJD+gRlFp+uT+SQdd 12 | GupM9MvNfN1M8/fW8fIc8Sz4OlTYiWUPnR+DNSZ+pMrCosleMduCbDPbOrflYZZ/ 13 | RCl+ZLaLHQIDAQABo4GXMIGUMA4GA1UdDwEB/wQEAwIFoDATBgNVHSUEDDAKBggr 14 | BgEFBQcDATAfBgNVHSMEGDAWgBSpuKALkiwfLnQmm7+JNG2bxGAIgzBMBgNVHREE 15 | RTBDgg1hcnRlbWlzLW5naW54gg9hcnRlbWlzLmV4YW1wbGWCCWxvY2FsaG9zdIcE 16 | fwAAAYcQAAAAAAAAAAAAAAAAAAAAATANBgkqhkiG9w0BAQsFAAOCAYEApG8ZADQe 17 | SsH/nqH9WpR3ZkYg0rm8pw+YquBNUdDFG2/4IQtaaxrgsvNPrEEMXfCO4vvnC0cH 18 | 6Tgay8LzFZxU9D1F06VZ9S1C7KNnYSsjgwhW7wxem1JXgauoutA8D0uHLr/2bVnz 19 | rTShQT7gRp9SRunqDylaSkgpXlfZQRlEANrYT8Jh6LIHRjkxLh/etw7VdFA6Tywh 20 | iQGBE/EbQcGpmqHBoMytblku0D8H+pcFHZ03AZq0FTMbByM9GekQ8HJV88epqvqJ 21 | 7pWyQPX9lr7yC6n121dPoA0ylP8D7jIBCmlFeF+QWCiRAgdeb1w+JONHMgI97IR+ 22 | 9HBm6gGE+Da/TRq82w02tUN/F7NHdzqwKGx/GKLrEsdNlfP6D9iiVtfBGBoAUm+C 23 | 2t3jbQEgqYHA+mzadS75RGJsRnVdY24IHvNjEnESW6KCaSfQyMmp3trRH6JeOttU 24 | 2JeqRPjmOzNvzIcB76w1/hB2ljhimyfoxB8Gbrts+GFPRZE+AXg1mvCn 25 | -----END CERTIFICATE----- 26 | -------------------------------------------------------------------------------- /app/llm/external/openai_completion.py: -------------------------------------------------------------------------------- 1 | from typing import Literal, Any 2 | from openai import OpenAI 3 | from openai.lib.azure import AzureOpenAI 4 | 5 | from ...llm import CompletionArguments 6 | from ...llm.external.model import CompletionModel 7 | 8 | 9 | class OpenAICompletionModel(CompletionModel): 10 | model: str 11 | api_key: str 12 | _client: OpenAI 13 | 14 | def complete(self, prompt: str, arguments: CompletionArguments) -> any: 15 | response = self._client.completions.create( 16 | model=self.model, 17 | prompt=prompt, 18 | temperature=arguments.temperature, 19 | max_tokens=arguments.max_tokens, 20 | stop=arguments.stop, 21 | ) 22 | return response 23 | 24 | 25 | class DirectOpenAICompletionModel(OpenAICompletionModel): 26 | type: Literal["openai_completion"] 27 | 28 | def model_post_init(self, __context: Any) -> None: 29 | self._client = OpenAI(api_key=self.api_key) 30 | 31 | def __str__(self): 32 | return f"OpenAICompletion('{self.model}')" 33 | 34 | 35 | class AzureOpenAICompletionModel(OpenAICompletionModel): 36 | type: Literal["azure_completion"] 37 | endpoint: str 38 | azure_deployment: str 39 | api_version: str 40 | 41 | def model_post_init(self, __context: Any) -> None: 42 | self._client = AzureOpenAI( 43 | azure_endpoint=self.endpoint, 44 | azure_deployment=self.azure_deployment, 45 | api_version=self.api_version, 46 | api_key=self.api_key, 47 | ) 48 | 49 | def __str__(self): 50 | return f"AzureCompletion('{self.model}')" 51 | -------------------------------------------------------------------------------- /app/web/status/ingestion_status_callback.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from .status_update import StatusCallback 4 | from ...domain.ingestion.ingestion_status_update_dto import IngestionStatusUpdateDTO 5 | from ...domain.status.stage_state_dto import StageStateEnum 6 | from ...domain.status.stage_dto import StageDTO 7 | import logging 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class IngestionStatusCallback(StatusCallback): 13 | """ 14 | Callback class for updating the status of a Lecture ingestion Pipeline run. 15 | """ 16 | 17 | def __init__( 18 | self, 19 | run_id: str, 20 | base_url: str, 21 | initial_stages: List[StageDTO] = None, 22 | lecture_unit_id: int = None, 23 | ): 24 | url = f"{base_url}/api/public/pyris/webhooks/ingestion/runs/{run_id}/status" 25 | 26 | current_stage_index = len(initial_stages) if initial_stages else 0 27 | stages = initial_stages or [] 28 | stages += [ 29 | StageDTO( 30 | weight=10, state=StageStateEnum.NOT_STARTED, name="Old slides removal" 31 | ), 32 | StageDTO( 33 | weight=60, 34 | state=StageStateEnum.NOT_STARTED, 35 | name="Slides Interpretation", 36 | ), 37 | StageDTO( 38 | weight=30, 39 | state=StageStateEnum.NOT_STARTED, 40 | name="Slides ingestion", 41 | ), 42 | ] 43 | status = IngestionStatusUpdateDTO(stages=stages, id=lecture_unit_id) 44 | stage = stages[current_stage_index] 45 | super().__init__(url, run_id, status, stage, current_stage_index) 46 | -------------------------------------------------------------------------------- /app/web/status/faq_ingestion_status_callback.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from .status_update import StatusCallback 4 | from ...domain.ingestion.ingestion_status_update_dto import IngestionStatusUpdateDTO 5 | from ...domain.status.stage_state_dto import StageStateEnum 6 | from ...domain.status.stage_dto import StageDTO 7 | import logging 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class FaqIngestionStatus(StatusCallback): 13 | """ 14 | Callback class for updating the status of a Faq ingestion Pipeline run. 15 | """ 16 | 17 | def __init__( 18 | self, 19 | run_id: str, 20 | base_url: str, 21 | initial_stages: List[StageDTO] = None, 22 | faq_id: int = None, 23 | ): 24 | url = ( 25 | f"{base_url}/api/public/pyris/webhooks/ingestion/faqs/runs/{run_id}/status" 26 | ) 27 | 28 | current_stage_index = len(initial_stages) if initial_stages else 0 29 | stages = initial_stages or [] 30 | stages += [ 31 | StageDTO( 32 | weight=10, state=StageStateEnum.NOT_STARTED, name="Old faq removal" 33 | ), 34 | StageDTO( 35 | weight=30, 36 | state=StageStateEnum.NOT_STARTED, 37 | name="Faq Interpretation", 38 | ), 39 | StageDTO( 40 | weight=60, 41 | state=StageStateEnum.NOT_STARTED, 42 | name="Faq ingestion", 43 | ), 44 | ] 45 | status = IngestionStatusUpdateDTO(stages=stages, id=faq_id) 46 | stage = stages[current_stage_index] 47 | super().__init__(url, run_id, status, stage, current_stage_index) 48 | -------------------------------------------------------------------------------- /docker/pyris-production.yml: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------------------------------------------------- 2 | # Setup for a Pyris production server. 3 | # ---------------------------------------------------------------------------------------------------------------------- 4 | # It is designed to take in a lot of environment variables to take in all the configuration of the deployment. 5 | # ---------------------------------------------------------------------------------------------------------------------- 6 | 7 | services: 8 | pyris-app: 9 | extends: 10 | file: ./pyris.yml 11 | service: pyris-app 12 | image: ghcr.io/ls1intum/pyris:${PYRIS_DOCKER_TAG:-latest} 13 | pull_policy: always 14 | restart: unless-stopped 15 | volumes: 16 | - ${PYRIS_APPLICATION_YML_FILE}:/config/application.yml:ro 17 | - ${PYRIS_LLM_CONFIG_YML_FILE}:/config/llm_config.yml:ro 18 | networks: 19 | - pyris 20 | 21 | nginx: 22 | extends: 23 | file: ./nginx.yml 24 | service: nginx 25 | restart: always 26 | depends_on: 27 | pyris-app: 28 | condition: service_started 29 | volumes: 30 | - type: bind 31 | source: ${NGINX_PROXY_SSL_CERTIFICATE_PATH:-./nginx/certs/pyris-nginx+4.pem} 32 | target: "/certs/fullchain.pem" 33 | - type: bind 34 | source: ${NGINX_PROXY_SSL_CERTIFICATE_KEY_PATH:-./nginx/certs/pyris-nginx+4-key.pem} 35 | target: "/certs/priv_key.pem" 36 | networks: 37 | - pyris 38 | 39 | weaviate: 40 | extends: 41 | file: ./weaviate.yml 42 | service: weaviate 43 | networks: 44 | - pyris 45 | 46 | networks: 47 | pyris: 48 | driver: "bridge" 49 | name: pyris -------------------------------------------------------------------------------- /app/domain/data/extended_course_dto.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List 2 | from datetime import datetime 3 | from pydantic import BaseModel, Field 4 | 5 | from app.domain.data.competency_dto import CompetencyDTO 6 | from app.domain.data.exam_dto import ExamDTO 7 | from app.domain.data.exercise_with_submissions_dto import ExerciseWithSubmissionsDTO 8 | from app.domain.data.programming_exercise_dto import ProgrammingLanguage 9 | 10 | 11 | class ExtendedCourseDTO(BaseModel): 12 | id: int = Field(alias="id") 13 | name: str = Field(alias="name", default=None) 14 | description: Optional[str] = Field(alias="description", default=None) 15 | start_time: Optional[datetime] = Field(alias="startTime", default=None) 16 | end_time: Optional[datetime] = Field(alias="endTime", default=None) 17 | default_programming_language: Optional[ProgrammingLanguage] = Field( 18 | alias="defaultProgrammingLanguage", default=None 19 | ) 20 | max_complaints: Optional[int] = Field(alias="maxComplaints", default=None) 21 | max_team_complaints: Optional[int] = Field(alias="maxTeamComplaints", default=None) 22 | max_complaint_time_days: Optional[int] = Field( 23 | alias="maxComplaintTimeDays", default=None 24 | ) 25 | max_request_more_feedback_time_days: Optional[int] = Field( 26 | alias="maxRequestMoreFeedbackTimeDays", default=None 27 | ) 28 | max_points: Optional[int] = Field(alias="maxPoints", default=None) 29 | presentation_score: Optional[int] = Field(alias="presentationScore", default=None) 30 | exercises: List[ExerciseWithSubmissionsDTO] = Field(alias="exercises", default=[]) 31 | exams: List[ExamDTO] = Field(alias="exams", default=[]) 32 | competencies: List[CompetencyDTO] = Field(alias="competencies", default=[]) 33 | -------------------------------------------------------------------------------- /app/pipeline/prompts/faq_rewriting.py: -------------------------------------------------------------------------------- 1 | system_prompt_faq = """\ 2 | :You are an excellent tutor with expertise in computer science and its practical applications, teaching at a university 3 | level. Your task is to proofread and enhance the given FAQ text. Please follow these guidelines: 4 | 5 | 1. Correct all spelling and grammatical errors. 6 | 2. Ensure the text is written in simple and clear language, making it easy to understand for students. 7 | 3. Preserve the original meaning and intent of the text while maintaining clarity. 8 | 4. Ensure that the response is always written in complete sentences. If you are given a list of bullet points, \ 9 | convert them into complete sentences. 10 | 5. Make sure to use the original language of the input text. 11 | 6. Avoid repeating any information that is already present in the text. 12 | 7. Make sure to keep the markdown formatting intact and add formatting for the most important information. 13 | 8. If someone does input a very short text, that does not resemble to be an answer to a potential question please make. 14 | sure to respond accordingly. Also, if the input text is too short, please point this out. 15 | 16 | Additionally for Short Inputs: If the input text is too short and does not resemble an answer to a potential question, \ 17 | respond appropriately and point this out. 18 | Your output will be used as an answer to a frequently asked question (FAQ) on the Artemis platform. 19 | Ensure it is clear, concise, and well-structured. 20 | 21 | Exclude the start and end markers from your response and provide only the improved content. 22 | 23 | The markers are defined as following: 24 | Start of the text: ###START### 25 | End of the text: ###END### 26 | 27 | The text that has to be rewritten starts now: 28 | 29 | ###START### 30 | {rewritten_text} 31 | ###END###\ 32 | """ 33 | -------------------------------------------------------------------------------- /docker/nginx/certs/pyris-nginx+4-key.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN PRIVATE KEY----- 2 | MIIEwAIBADANBgkqhkiG9w0BAQEFAASCBKowggSmAgEAAoIBAQCuCJSHStSQd02f 3 | j+IlQFes7pVUcYv2r0qm5qicGwPcKQf1/nmsy6k4WhE9HQV9VO9LQ4doSNp9NuYX 4 | P/JQqdYLZYYQvxHS+fR7ofIPjirsrbQYAkG5F6imM8H7MkkueG3HGqaKD54PBmC4 5 | BgBJDFWiF8jSNSYNKOE2L5SaYG/g3LLIkWBlhBQHgrprkio4pv5Y44+nf+hGWkSj 6 | bkRo2+PmIsNmQrpDB2o0O7uoyswa71HE967n9K17SWZ7Hi4kP6BGUWn65P5JB10a 7 | 6kz0y8183Uzz99bx8hzxLPg6VNiJZQ+dH4M1Jn6kysKiyV4x24JsM9s6t+Vhln9E 8 | KX5ktosdAgMBAAECggEBAJs3ddkwqWLrtOSR/H2C5G+NHsyAtPdgIfG3mTwZcBjk 9 | 03/X5gdyYUusMOHTx3ifzwjOgq9FAvFYjGDCHMlKoGfrtWWsNCZ53k6CApVTE/+h 10 | cRVUte9yJW2Ojf0PPWvf5vEEWPKbuTnnU03ttEVyZdG66tZoprZn9m1QhHYnesEO 11 | PMPvYMd3Oyko8MD/Rr1A/KS/rmc0yfUvgLsqF6PLxq3NKxyVD/8Tp4u9aXbPMnd2 12 | vugVxjjvt5ubscF1Owi8EjqjVkXlw94JzLcy70XfBzsS2EvUtX/hmHgBEsViXUOQ 13 | KGVyeFTvuReq0RvLQi1LA8vs2q6UC0ZYX75wGDfWWnUCgYEAyP6FY6xdP+N83qEM 14 | TzAf2a33bBCcD5zbrfsvYwHwdzcAz9HBdf3TN1ZcbgfIzIWvuo+hFdjZd32E2+b7 15 | tSGpcs21iZ3dn1aWxngNs/h94h6cNak/02iCbOsmMX9rHfKZd1ODnQyA8q0s9PQY 16 | uWWWMUfqPse7mSYbgU0aYOVFraMCgYEA3ak3N2mTgTVsUqhNyZCJlmtafp0tsT6b 17 | /7GKSqkl741wokM6un3wx1eo6Q95mngxOlY2xxq9OChnNSEa9ZQnzdUDtQ0YE4QD 18 | 09awTIMHNCeSqpV2n3Yv2fT3C5Ya5/WEtYGpVAtqgxwWPij8+VMOa8MVzy+/v6Hg 19 | N1Tpww+Y8D8CgYEAhbEGeK4FuKFQRaVJ0sJn7RrSIIdLxvbHCIqzkl+P2zwyxgj3 20 | bcxP2dcP1ABJiADESouO0kFTJS/QV5TkiC7DzyEVR1xCNeIamBjyxGrdELLbpLXX 21 | Rn+VgW1IElR2o4zil4RtXuEaRFD8PlK+v1La/ByhqvCfz9aRJQhsK1dVaZECgYEA 22 | jRYR0TFf89P/OLVrnapkCNwX45ND7Bc/0AY/UbpMLSfH02AbV2yl/xvqpT12Vz29 23 | h7Ysc5qvabk9x/FkaX99vmOhUnIdKv7SONnjqS+VPDsb/XvY3zKozoA/Zp6KTa5W 24 | Y/k9wALsLruH5NTOABw/h5PKo+9uixkLz+w6Ri/9Vp0CgYEAqfkZJe7vCOIwtIwj 25 | Mq5knkJgR+Vq30i4jRoFU0yxIcWA1hODVBnK39+mtA++/3+r5DY5fGRTc9mMyXU/ 26 | y2N2nfSnvPMAUaRmisB7NhmvinEgymlrX+WE+7S9/+nOQADxzWSc6Hxg/ub6mTYV 27 | k2/hv9uG1gbm2+OBP/EBOr48jz0= 28 | -----END PRIVATE KEY----- 29 | -------------------------------------------------------------------------------- /app/domain/data/competency_dto.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from enum import Enum 3 | from typing import Optional, List 4 | 5 | from pydantic import BaseModel, Field 6 | from pydantic.v1 import validator 7 | 8 | 9 | class CompetencyTaxonomy(str, Enum): 10 | REMEMBER = "REMEMBER" 11 | UNDERSTAND = "UNDERSTAND" 12 | APPLY = "APPLY" 13 | ANALYZE = "ANALYZE" 14 | EVALUATE = "EVALUATE" 15 | CREATE = "CREATE" 16 | 17 | 18 | class CompetencyDTO(BaseModel): 19 | id: Optional[int] = None 20 | title: Optional[str] = None 21 | description: Optional[str] = None 22 | taxonomy: Optional[CompetencyTaxonomy] = None 23 | soft_due_date: Optional[datetime] = Field(default=None, alias="softDueDate") 24 | optional: Optional[bool] = None 25 | exercise_list: Optional[List[int]] = Field(default=[], alias="exerciseList") 26 | 27 | 28 | class Competency(BaseModel): 29 | title: str = Field( 30 | description="Title of the competency that contains no more than 4 words", 31 | ) 32 | description: str = Field( 33 | description="Description of the competency as plain string. DO NOT RETURN A LIST OF STRINGS." 34 | ) 35 | taxonomy: CompetencyTaxonomy = Field( 36 | description="Selected taxonomy based on bloom's taxonomy" 37 | ) 38 | 39 | @validator("title") 40 | def validate_title(cls, field): 41 | """Validate the subject of the competency.""" 42 | if len(field.split()) > 4: 43 | raise ValueError("Title must contain no more than 4 words") 44 | return field 45 | 46 | @validator("taxonomy") 47 | def validate_selected_taxonomy(cls, field): 48 | """Validate the selected taxonomy.""" 49 | if field not in CompetencyTaxonomy.__members__: 50 | raise ValueError(f"Invalid taxonomy: {field}") 51 | return field 52 | -------------------------------------------------------------------------------- /app/llm/external/openai_dalle.py: -------------------------------------------------------------------------------- 1 | import base64 2 | from typing import List, Literal 3 | 4 | import requests 5 | 6 | from app.domain.data.image_message_content_dto import ImageMessageContentDTO 7 | 8 | 9 | def generate_images( 10 | self, 11 | prompt: str, 12 | n: int = 1, 13 | size: Literal[ 14 | "256x256", "512x512", "1024x1024", "1792x1024", "1024x1792" 15 | ] = "256x256", 16 | quality: Literal["standard", "hd"] = "standard", 17 | **kwargs, 18 | ) -> List[ImageMessageContentDTO]: 19 | """ 20 | Generate images from the prompt. 21 | """ 22 | try: 23 | response = self._client.images.generate( 24 | model=self.model, 25 | prompt=prompt, 26 | size=size, 27 | quality=quality, 28 | n=n, 29 | response_format="url", 30 | **kwargs, 31 | ) 32 | except Exception as e: 33 | print(f"Failed to generate images: {e}") 34 | return [] 35 | 36 | images = response.data 37 | iris_images = [] 38 | for image in images: 39 | revised_prompt = ( 40 | prompt if image.revised_prompt is None else image.revised_prompt 41 | ) 42 | base64_data = image.b64_json 43 | if base64_data is None: 44 | try: 45 | image_response = requests.get(image.url) 46 | image_response.raise_for_status() 47 | base64_data = base64.b64encode(image_response.content).decode("utf-8") 48 | except requests.RequestException as e: 49 | print(f"Failed to download or encode image: {e}") 50 | continue 51 | 52 | iris_images.append( 53 | ImageMessageContentDTO( 54 | prompt=revised_prompt, 55 | base64=base64_data, 56 | ) 57 | ) 58 | 59 | return iris_images 60 | -------------------------------------------------------------------------------- /app/pipeline/prompts/competency_extraction.py: -------------------------------------------------------------------------------- 1 | system_prompt = """ 2 | You are an expert in all topics of computer science and its practical applications. 3 | Your task consists of three parts: 4 | 1. Read the provided curriculum description a university course. 5 | 2. Extract all learning goals ("competencies") from the course description. 6 | 7 | Each competency must contain the following fields: 8 | 9 | - title: 10 | The title of the competency, which is a specific topic or skill. This should be a short phrase of at most 4 words. 11 | 12 | - description: 13 | A detailed description of the competency in 2 to 5 bullet points. 14 | Each bullet point illustrates a specific skill or concept of the competency. 15 | Each bullet point is a complete sentence containing at most 15 words. 16 | Each bullet point is on a new line and starts with "- ". 17 | 18 | - taxonomy: 19 | The classification of the competency within Bloom's taxonomy. 20 | You must choose from these options in Bloom's taxonomy: {taxonomy_list} 21 | 22 | All competencies must meet the following requirements: 23 | 24 | - is mentioned in the course description. 25 | - corresponds to exactly one subject or skill covered in the course description. 26 | - is assigned to exactly one level of Bloom's taxonomy. 27 | - is small and fine-grained. Large topics should be broken down into smaller competencies. 28 | - does not overlap with other competencies: each competency is unique. Expanding on a previous competency is allowed. 29 | 30 | Here is the provided course description: {course_description} 31 | 32 | Here is a template competency in JSON format: 33 | 34 | {{ 35 | "title": "Competency Title", 36 | "description": "- You understand this.\n- You are proficient in doing that.\n- You know how to do this.", 37 | "taxonomy": "ANALYZE" 38 | }} 39 | 40 | {current_competencies} 41 | 42 | Respond with 0 to {max_n} competencies extracted from the course description, 43 | each in JSON format, split by two newlines. 44 | """ 45 | -------------------------------------------------------------------------------- /app/domain/data/exercise_with_submissions_dto.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from enum import Enum 3 | from typing import Optional, List 4 | 5 | from pydantic import BaseModel, Field 6 | 7 | from app.domain.data.simple_submission_dto import SimpleSubmissionDTO 8 | 9 | 10 | class ExerciseType(str, Enum): 11 | PROGRAMMING = "PROGRAMMING" 12 | QUIZ = "QUIZ" 13 | MODELING = "MODELING" 14 | TEXT = "TEXT" 15 | FILE_UPLOAD = "FILE_UPLOAD" 16 | 17 | 18 | class ExerciseMode(str, Enum): 19 | INDIVIDUAL = "INDIVIDUAL" 20 | TEAM = "TEAM" 21 | 22 | 23 | class DifficultyLevel(str, Enum): 24 | EASY = "EASY" 25 | MEDIUM = "MEDIUM" 26 | HARD = "HARD" 27 | 28 | 29 | class IncludedInOverallScore(str, Enum): 30 | INCLUDED_COMPLETELY = "INCLUDED_COMPLETELY" 31 | INCLUDED_AS_BONUS = "INCLUDED_AS_BONUS" 32 | NOT_INCLUDED = "NOT_INCLUDED" 33 | 34 | 35 | class ExerciseWithSubmissionsDTO(BaseModel): 36 | id: int = Field(alias="id") 37 | url: Optional[str] = Field(alias="url", default=None) 38 | title: str = Field(alias="title") 39 | type: ExerciseType = Field(alias="type") 40 | mode: ExerciseMode = Field(alias="mode") 41 | max_points: Optional[float] = Field(alias="maxPoints", default=None) 42 | bonus_points: Optional[float] = Field(alias="bonusPoints", default=None) 43 | difficulty_level: Optional[DifficultyLevel] = Field( 44 | alias="difficultyLevel", default=None 45 | ) 46 | release_date: Optional[datetime] = Field(alias="releaseDate", default=None) 47 | due_date: Optional[datetime] = Field(alias="dueDate", default=None) 48 | inclusion_mode: Optional[IncludedInOverallScore] = Field( 49 | alias="inclusionMode", default=None 50 | ) 51 | presentation_score_enabled: Optional[bool] = Field( 52 | alias="presentationScoreEnabled", default=None 53 | ) 54 | submissions: List[SimpleSubmissionDTO] = Field(default=[]) 55 | 56 | class Config: 57 | require_by_default = False 58 | -------------------------------------------------------------------------------- /app/llm/llm_manager.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Annotated 3 | 4 | from pydantic import BaseModel, Discriminator 5 | 6 | import yaml 7 | 8 | from ..common import Singleton 9 | from ..llm.capability import RequirementList 10 | from ..llm.capability.capability_checker import ( 11 | calculate_capability_scores, 12 | capabilities_fulfill_requirements, 13 | ) 14 | from ..llm.external import LanguageModel, AnyLLM 15 | 16 | 17 | # Small workaround to get pydantic discriminators working 18 | class LlmList(BaseModel): 19 | llms: list[Annotated[AnyLLM, Discriminator("type")]] 20 | 21 | 22 | class LlmManager(metaclass=Singleton): 23 | entries: list[LanguageModel] 24 | 25 | def __init__(self): 26 | self.entries = [] 27 | self.load_llms() 28 | 29 | def get_llm_by_id(self, llm_id): 30 | for llm in self.entries: 31 | if llm.id == llm_id: 32 | return llm 33 | 34 | def load_llms(self): 35 | """Load the llms from the config file""" 36 | path = os.environ.get("LLM_CONFIG_PATH") 37 | if not path: 38 | raise Exception("LLM_CONFIG_PATH not set") 39 | 40 | with open(path, "r") as file: 41 | loaded_llms = yaml.safe_load(file) 42 | 43 | self.entries = LlmList.model_validate({"llms": loaded_llms}).llms 44 | 45 | def get_llms_sorted_by_capabilities_score( 46 | self, requirements: RequirementList, invert_cost: bool = False 47 | ): 48 | valid_llms = [ 49 | llm 50 | for llm in self.entries 51 | if capabilities_fulfill_requirements(llm.capabilities, requirements) 52 | ] 53 | """Get the llms sorted by their capability to requirement scores""" 54 | scores = calculate_capability_scores( 55 | [llm.capabilities for llm in valid_llms], requirements, invert_cost 56 | ) 57 | sorted_llms = sorted(zip(scores, valid_llms), key=lambda pair: -pair[0]) 58 | return [llm for _, llm in sorted_llms] 59 | -------------------------------------------------------------------------------- /app/pipeline/prompts/code_feedback_prompt.txt: -------------------------------------------------------------------------------- 1 | Exercise Problem Statement: 2 | {problem_statement} 3 | 4 | Chat History: 5 | {chat_history} 6 | 7 | User question: 8 | {question} 9 | 10 | Feedbacks (from automated tests): 11 | {feedbacks} 12 | 13 | Build Log: 14 | {build_log} 15 | 16 | Here are the all files (minified version - the real code the student sees is properly formatted): 17 | {files} 18 | 19 | You are an assistant to a tutor who is helping a student with their programming homework. 20 | You have access to the chat history, the student's question, the feedbacks from automated tests, and the files the student has uploaded. 21 | Your task is to help the tutor answer the student's question. The tutor can not read all the files, so you need to summarize what the student is doing wrong. 22 | The tutor will use this information to help the student. 23 | Formulate the feedback as hints to the tutor, so they can guide the student in the right direction. 24 | It should contain an instruction what to explain to the student; however, it should not contain the entire solution and promote independent thinking. 25 | 26 | Use the following output format: 27 | 28 | --- 29 | File: com/a/b/MyClass.java 30 | Class: com.a.b.MyClass 31 | Method: myMethod 32 | Code Snippet: 33 | 34 | Issue: The code of the student is doing X wrong. Explain them that they should do Y. 35 | --- 36 | 37 | You can repeat this output as frequently as needed, for each issue you find. 38 | However, the selection of issues should be based on the student's question, and if needed, the automated feedback. 39 | Be concise and to the point. Return a maximum of 3 issues that are most relevant to the student's question. 40 | It is fine to return 0, 1 or 2 issues as well. Do not use the maximum of 3 issues if not required. 41 | It is possible that the conversation does not require to look at any code, for example, if the question is conceptual or random chatter. 42 | If you want to return no issues at all, return "!NONE!". 43 | -------------------------------------------------------------------------------- /docker/nginx/nginx_502.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Pyris Maintenance 5 | 6 | 7 | 18 | 19 |
20 | Asset 3 21 |

We’ll be back soon!

22 |
23 |

We’re performing some maintenance at the moment. Sorry for the inconvenience.

24 |

— Your Pyris Administrators

25 |
26 |
27 | 28 | -------------------------------------------------------------------------------- /app/llm/capability/requirement_list.py: -------------------------------------------------------------------------------- 1 | class RequirementList: 2 | """A class to represent the requirements you want to match against""" 3 | 4 | # Maximum cost in $ per 1k input tokens 5 | input_cost: float | None 6 | # Maximum cost in $ per 1k output tokens 7 | output_cost: float | None 8 | # The minimum GPT version that the model should be roughly equivalent to 9 | gpt_version_equivalent: float | None 10 | # The minimum speed of the model in tokens per second 11 | speed: float | None 12 | # The minimum context length of the model in tokens 13 | context_length: int | None 14 | # The vendor of the model e.g. "OpenAI" or "Anthropic" 15 | vendor: str | None 16 | # Whether the model should be privacy compliant to be used for sensitive data 17 | privacy_compliance: bool | None 18 | # Whether the model should be self-hosted 19 | self_hosted: bool | None 20 | # Whether the model should support image recognition 21 | image_recognition: bool | None 22 | # Whether the model should support a JSON mode 23 | json_mode: bool | None 24 | 25 | def __init__( 26 | self, 27 | input_cost: float | None = None, 28 | output_cost: float | None = None, 29 | gpt_version_equivalent: float | None = None, 30 | speed: float | None = None, 31 | context_length: int | None = None, 32 | vendor: str | None = None, 33 | privacy_compliance: bool | None = None, 34 | self_hosted: bool | None = None, 35 | image_recognition: bool | None = None, 36 | json_mode: bool | None = None, 37 | ) -> None: 38 | self.input_cost = input_cost 39 | self.output_cost = output_cost 40 | self.gpt_version_equivalent = gpt_version_equivalent 41 | self.speed = speed 42 | self.context_length = context_length 43 | self.vendor = vendor 44 | self.privacy_compliance = privacy_compliance 45 | self.self_hosted = self_hosted 46 | self.image_recognition = image_recognition 47 | self.json_mode = json_mode 48 | -------------------------------------------------------------------------------- /app/llm/request_handler/request_handler_interface.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from typing import Optional, Sequence, Union, Dict, Any, Type, Callable 3 | from langchain_core.tools import BaseTool 4 | from pydantic import BaseModel 5 | 6 | from .. import LanguageModel 7 | from ...common.pyris_message import PyrisMessage 8 | from ...domain.data.image_message_content_dto import ImageMessageContentDTO 9 | from ...llm import CompletionArguments 10 | 11 | 12 | class RequestHandler(BaseModel, metaclass=ABCMeta): 13 | """Interface for the request handlers""" 14 | 15 | @classmethod 16 | def __subclasshook__(cls, subclass) -> bool: 17 | return ( 18 | hasattr(subclass, "complete") 19 | and callable(subclass.complete) 20 | and hasattr(subclass, "chat") 21 | and callable(subclass.chat) 22 | and hasattr(subclass, "embed") 23 | and callable(subclass.embed) 24 | ) 25 | 26 | @abstractmethod 27 | def complete( 28 | self, 29 | prompt: str, 30 | arguments: CompletionArguments, 31 | image: Optional[ImageMessageContentDTO] = None, 32 | ) -> str: 33 | """Create a completion from the prompt""" 34 | raise NotImplementedError 35 | 36 | @abstractmethod 37 | def chat( 38 | self, 39 | messages: list[any], 40 | arguments: CompletionArguments, 41 | tools: Optional[ 42 | Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]] 43 | ], 44 | ) -> PyrisMessage: 45 | """Create a completion from the chat messages""" 46 | raise NotImplementedError 47 | 48 | @abstractmethod 49 | def embed(self, text: str) -> list[float]: 50 | """Create an embedding from the text""" 51 | raise NotImplementedError 52 | 53 | @abstractmethod 54 | def bind_tools( 55 | self, 56 | tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], 57 | ) -> LanguageModel: 58 | """Bind tools""" 59 | raise NotImplementedError 60 | -------------------------------------------------------------------------------- /app/vector_database/database.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import weaviate 3 | 4 | from .faq_schema import init_faq_schema 5 | from .lecture_schema import init_lecture_schema 6 | from weaviate.classes.query import Filter 7 | from app.config import settings 8 | import threading 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class VectorDatabase: 14 | """ 15 | Class to interact with the Weaviate vector database 16 | """ 17 | 18 | _lock = threading.Lock() 19 | _client_instance = None 20 | 21 | def __init__(self): 22 | with VectorDatabase._lock: 23 | if not VectorDatabase._client_instance: 24 | VectorDatabase._client_instance = weaviate.connect_to_local( 25 | host=settings.weaviate.host, 26 | port=settings.weaviate.port, 27 | grpc_port=settings.weaviate.grpc_port, 28 | ) 29 | logger.info("Weaviate client initialized") 30 | self.client = VectorDatabase._client_instance 31 | self.lectures = init_lecture_schema(self.client) 32 | self.faqs = init_faq_schema(self.client) 33 | 34 | def delete_collection(self, collection_name): 35 | """ 36 | Delete a collection from the database 37 | """ 38 | if self.client.collections.exists(collection_name): 39 | if self.client.collections.delete(collection_name): 40 | logger.info(f"Collection {collection_name} deleted") 41 | else: 42 | logger.error(f"Collection {collection_name} failed to delete") 43 | 44 | def delete_object(self, collection_name, property_name, object_property): 45 | """ 46 | Delete an object from the collection inside the database 47 | """ 48 | collection = self.client.collections.get(collection_name) 49 | collection.data.delete_many( 50 | where=Filter.by_property(property_name).equal(object_property) 51 | ) 52 | 53 | def get_client(self): 54 | """ 55 | Get the Weaviate client 56 | """ 57 | return self.client 58 | -------------------------------------------------------------------------------- /app/pipeline/prompts/faq_citation_prompt.txt: -------------------------------------------------------------------------------- 1 | In the paragraphs below you are provided with an answer to a question. Underneath the answer you will find the faqs that the answer was based on. 2 | Add citations of the faqs to the answer. Cite the faqs in brackets after the sentence where the information is used in the answer. 3 | At the end of the answer, list each source with its corresponding number and provide the FAQ Question title, and a clickable link in this format: [1] "FAQ Question title". 4 | Do not include the actual faqs, only the citations at the end. 5 | Please do not use the FAQ ID as the citation number, instead, use the order of the citations in the answer. 6 | Only include the citations of the faqs that are relevant to the answer. 7 | If the answer actually does not contain any information from the faqs, please do not include any citations and return '!NONE!'. 8 | But if the answer contains information from the paragraphs, ALWAYS include citations. 9 | 10 | Here is an example how to rewrite the answer with citations (ONLY ADD CITATION IF THE PROVIDED FAQS ARE RELEVANT TO THE ANSWER): 11 | " 12 | Lorem ipsum dolor sit amet, consectetur adipiscing elit [1]. Ded do eiusmod tempor incididunt ut labore et dolore magna aliqua [2]. 13 | 14 | [1] FAQ question title 1. 15 | [2] FAQ question title 2. 16 | " 17 | 18 | Note: If there is no link available, please do not include the link in the citation. For example, if citation 1 does not have a link, it should look like this: 19 | [1] "FAQ question title" 20 | but if citation 2 has a link, it should look like this: 21 | [2] "FAQ question title" 22 | 23 | Here are the answer and the faqs: 24 | 25 | Answer without citations: 26 | {Answer} 27 | 28 | Faqs with their FAQ ID, CourseId, FAQ Question title and FAQ Question Answer and the Link to the FAQ: 29 | {Paragraphs} 30 | 31 | Answer with citations (ensure empty line between the message and the citations): 32 | If the answer actually does not contain any information from the paragraphs, please do not include any citations and return '!NONE!'. 33 | -------------------------------------------------------------------------------- /app/web/routers/ingestion_status.py: -------------------------------------------------------------------------------- 1 | import json 2 | from urllib.parse import unquote 3 | 4 | from fastapi import APIRouter, status, Response, Depends 5 | from fastapi.params import Query 6 | from weaviate.collections.classes.filters import Filter 7 | 8 | from app.dependencies import TokenValidator 9 | from ...vector_database.database import VectorDatabase 10 | from ...vector_database.lecture_schema import LectureSchema 11 | from enum import Enum 12 | 13 | router = APIRouter(prefix="/api/v1", tags=["ingestion_status"]) 14 | 15 | 16 | class IngestionState(str, Enum): 17 | DONE = "DONE" 18 | NOT_STARTED = "NOT_STARTED" 19 | 20 | 21 | @router.get( 22 | "/courses/{course_id}/lectures/{lecture_id}/lectureUnits/{lecture_unit_id}/ingestion-state", 23 | dependencies=[Depends(TokenValidator())], 24 | ) 25 | def get_lecture_unit_ingestion_state( 26 | course_id: int, lecture_id: int, lecture_unit_id: int, base_url: str = Query(...) 27 | ): 28 | """ 29 | 30 | :param course_id: 31 | :param lecture_id: 32 | :param lecture_unit_id: 33 | :param base_url: 34 | :return: 35 | """ 36 | db = VectorDatabase() 37 | decoded_base_url = unquote(base_url) 38 | result = db.lectures.query.fetch_objects( 39 | filters=( 40 | Filter.by_property(LectureSchema.BASE_URL.value).equal(decoded_base_url) 41 | & Filter.by_property(LectureSchema.COURSE_ID.value).equal(course_id) 42 | & Filter.by_property(LectureSchema.LECTURE_ID.value).equal(lecture_id) 43 | & Filter.by_property(LectureSchema.LECTURE_UNIT_ID.value).equal( 44 | lecture_unit_id 45 | ) 46 | ), 47 | limit=1, 48 | return_properties=[LectureSchema.LECTURE_UNIT_NAME.value], 49 | ) 50 | 51 | if len(result.objects) > 0: 52 | return Response( 53 | status_code=status.HTTP_200_OK, 54 | content=json.dumps({"state": IngestionState.DONE.value}), 55 | media_type="application/json", 56 | ) 57 | else: 58 | return Response( 59 | status_code=status.HTTP_200_OK, 60 | content=json.dumps({"state": IngestionState.NOT_STARTED.value}), 61 | media_type="application/json", 62 | ) 63 | -------------------------------------------------------------------------------- /app/pipeline/prompts/citation_prompt.txt: -------------------------------------------------------------------------------- 1 | In the paragraphs below you are provided with an answer to a question. Underneath the answer you will find the paragraphs that the answer was based on. 2 | Add citations of the paragraphs to the answer. Cite the paragraphs in brackets after the sentence where the information is used in the answer. 3 | At the end of the answer, list each source with its corresponding number and provide the Lecture Title, page number, and a clickable link in this format: [1] "Lecture title", "Lecture unit title", "page number". 4 | If the answer uses multiple pages from the same lecture, list the page numbers at the same line separated by commas in this format : [1] "Lecture title", "Lecture unit title", "page number1,number2,number3". 5 | Do not include the actual paragraphs, only the citations at the end. 6 | Only include the citations of the paragraphs that are relevant to the answer. 7 | If the answer actually does not contain any information from the paragraphs, please do not include any citations and return '!NONE!'. 8 | But if the answer contains information from the paragraphs, ALWAYS include citations. 9 | 10 | Here is an example how to rewrite the answer with citations (ONLY ADD CITATION IF THE PROVIDED PARAGRAPHS ARE RELEVANT TO THE ANSWER): 11 | " 12 | Lorem ipsum dolor sit amet, consectetur adipiscing elit [1]. Ded do eiusmod tempor incididunt ut labore et dolore magna aliqua [2]. 13 | 14 | [1] Lecture 1, Unit A, page 2,3,4. 15 | [2] Lecture 2, Unit B, page 5,25. 16 | " 17 | 18 | Note: If there is no link available, please do not include the link in the citation. For example, if citation 1 does not have a link, it should look like this: 19 | [1] "Lecture title", "Lecture unit title", "page number" 20 | but if citation 2 has a link, it should look like this: 21 | [2] "Lecture title", "Lecture unit title", "page number" 22 | 23 | Here are the answer and the paragraphs: 24 | 25 | Answer without citations: 26 | {Answer} 27 | 28 | Paragraphs with their Lecture Names, Unit Names, Links and Page Numbers: 29 | {Paragraphs} 30 | 31 | If the answer actually does not contain any information from the paragraphs, please do not include any citations and return '!NONE!'. 32 | Original answer with citations (ensure two empty lines between the message and the citations): 33 | -------------------------------------------------------------------------------- /app/llm/external/openai_embeddings.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Literal, Any 3 | from openai import ( 4 | OpenAI, 5 | APIError, 6 | APITimeoutError, 7 | RateLimitError, 8 | InternalServerError, 9 | ) 10 | from openai.lib.azure import AzureOpenAI 11 | 12 | from ...llm.external.model import EmbeddingModel 13 | import time 14 | 15 | 16 | class OpenAIEmbeddingModel(EmbeddingModel): 17 | model: str 18 | api_key: str 19 | _client: OpenAI 20 | 21 | def embed(self, text: str) -> list[float]: 22 | retries = 5 23 | backoff_factor = 2 24 | initial_delay = 1 25 | # Maximum wait time: 1 + 2 + 4 + 8 + 16 = 31 seconds 26 | 27 | for attempt in range(retries): 28 | try: 29 | response = self._client.embeddings.create( 30 | model=self.model, 31 | input=text, 32 | encoding_format="float", 33 | ) 34 | return response.data[0].embedding 35 | except ( 36 | APIError, 37 | APITimeoutError, 38 | RateLimitError, 39 | InternalServerError, 40 | ): 41 | wait_time = initial_delay * (backoff_factor**attempt) 42 | logging.exception(f"OpenAI error on attempt {attempt + 1}") 43 | logging.info(f"Retrying in {wait_time} seconds...") 44 | time.sleep(wait_time) 45 | raise Exception(f"Failed to get embedding from OpenAI after {retries} retries.") 46 | 47 | 48 | class DirectOpenAIEmbeddingModel(OpenAIEmbeddingModel): 49 | type: Literal["openai_embedding"] 50 | 51 | def model_post_init(self, __context: Any) -> None: 52 | self._client = OpenAI(api_key=self.api_key) 53 | 54 | def __str__(self): 55 | return f"OpenAIEmbedding('{self.model}')" 56 | 57 | 58 | class AzureOpenAIEmbeddingModel(OpenAIEmbeddingModel): 59 | type: Literal["azure_embedding"] 60 | endpoint: str 61 | azure_deployment: str 62 | api_version: str 63 | 64 | def model_post_init(self, __context: Any) -> None: 65 | self._client = AzureOpenAI( 66 | azure_endpoint=self.endpoint, 67 | azure_deployment=self.azure_deployment, 68 | api_version=self.api_version, 69 | api_key=self.api_key, 70 | ) 71 | 72 | def __str__(self): 73 | return f"AzureEmbedding('{self.model}')" 74 | -------------------------------------------------------------------------------- /app/llm/request_handler/basic_request_handler.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence, Union, Dict, Any, Type, Callable 2 | 3 | from langchain_core.tools import BaseTool 4 | from pydantic import ConfigDict 5 | from pydantic import BaseModel 6 | 7 | from app.common.pyris_message import PyrisMessage 8 | from app.domain.data.image_message_content_dto import ImageMessageContentDTO 9 | from app.llm import LanguageModel 10 | from app.llm.request_handler import RequestHandler 11 | from app.llm.completion_arguments import CompletionArguments 12 | from app.llm.llm_manager import LlmManager 13 | 14 | 15 | class BasicRequestHandler(RequestHandler): 16 | model_id: str 17 | llm_manager: LlmManager | None = None 18 | model_config = ConfigDict(arbitrary_types_allowed=True) 19 | 20 | def __init__(self, model_id: str): 21 | super().__init__(model_id=model_id, llm_manager=None) 22 | self.model_id = model_id 23 | self.llm_manager = LlmManager() 24 | 25 | def complete( 26 | self, 27 | prompt: str, 28 | arguments: CompletionArguments, 29 | image: Optional[ImageMessageContentDTO] = None, 30 | ) -> str: 31 | llm = self.llm_manager.get_llm_by_id(self.model_id) 32 | return llm.complete(prompt, arguments, image) 33 | 34 | def chat( 35 | self, 36 | messages: list[PyrisMessage], 37 | arguments: CompletionArguments, 38 | tools: Optional[ 39 | Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]] 40 | ], 41 | ) -> PyrisMessage: 42 | llm = self.llm_manager.get_llm_by_id(self.model_id) 43 | return llm.chat(messages, arguments, tools) 44 | 45 | def embed(self, text: str) -> list[float]: 46 | llm = self.llm_manager.get_llm_by_id(self.model_id) 47 | return llm.embed(text) 48 | 49 | def bind_tools( 50 | self, 51 | tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], 52 | ) -> LanguageModel: 53 | """ 54 | Binds a sequence of tools to the language model. 55 | 56 | Args: 57 | tools (Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]]): 58 | A sequence of tools to be bound. 59 | 60 | Returns: 61 | LanguageModel: The language model with tools bound. 62 | """ 63 | llm = self.llm_manager.get_llm_by_id(self.model_id) 64 | llm.bind_tools(tools) 65 | return llm 66 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | pull_request: 5 | paths-ignore: 6 | - 'README.md' 7 | - 'LICENSE' 8 | - '.github/**' 9 | - '!.github/workflows/build.yml' 10 | - '!.github/workflows/deploy.yml' 11 | - '!.github/workflows/deploy-test.yml' 12 | push: 13 | branches: 14 | - main 15 | tags: '[0-9]+.[0-9]+.[0-9]+' 16 | paths-ignore: 17 | - 'README.md' 18 | - 'LICENSE' 19 | - '.github/**' 20 | - '!.github/workflows/build.yml' 21 | - '!.github/workflows/deploy.yml' 22 | release: 23 | types: 24 | - created 25 | 26 | jobs: 27 | docker: 28 | name: Build and Push Docker Image 29 | if: ${{ github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == 'ls1intum/Pyris' }} 30 | runs-on: ubuntu-latest 31 | steps: 32 | - name: Compute Tag 33 | uses: actions/github-script@v7 34 | id: compute-tag 35 | with: 36 | result-encoding: string 37 | script: | 38 | if (context.eventName === "pull_request") { 39 | return "pr-" + context.issue.number; 40 | } 41 | if (context.eventName === "release") { 42 | return "latest"; 43 | } 44 | if (context.eventName === "push") { 45 | if (context.ref.startsWith("refs/tags/")) { 46 | return context.ref.slice(10); 47 | } 48 | if (context.ref === "refs/heads/main") { 49 | return "latest"; 50 | } 51 | } 52 | return "FALSE"; 53 | - uses: actions/checkout@v3 54 | - name: Set up QEMU 55 | uses: docker/setup-qemu-action@v3 56 | - name: Set up Docker Buildx 57 | uses: docker/setup-buildx-action@v2 58 | # Build and Push to GitHub Container Registry 59 | - name: Login to GitHub Container Registry 60 | uses: docker/login-action@v2 61 | if: ${{ steps.compute-tag.outputs.result != 'FALSE' }} 62 | with: 63 | registry: ghcr.io 64 | username: ${{ github.repository_owner }} 65 | password: ${{ secrets.GITHUB_TOKEN }} 66 | - name: Build and Push to GitHub Container Registry 67 | uses: docker/build-push-action@v6 68 | if: ${{ steps.compute-tag.outputs.result != 'FALSE' }} 69 | with: 70 | platforms: amd64, arm64 71 | file: ./Dockerfile 72 | context: . 73 | tags: ghcr.io/ls1intum/pyris:${{ steps.compute-tag.outputs.result }} 74 | push: true 75 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy 2 | 3 | on: 4 | workflow_call: 5 | inputs: 6 | docker-tag: 7 | required: true 8 | type: string 9 | branch-name: 10 | required: true 11 | type: string 12 | environment-name: 13 | required: true 14 | type: string 15 | environment-url: 16 | required: true 17 | type: string 18 | secrets: 19 | DEPLOYMENT_GATEWAY_SSH_KEY: 20 | required: true 21 | 22 | concurrency: deploy 23 | 24 | env: 25 | RAW_URL: https://raw.githubusercontent.com/${{ github.repository }}/${{ github.sha }} 26 | 27 | 28 | jobs: 29 | deploy: 30 | runs-on: ubuntu-latest 31 | 32 | environment: 33 | name: ${{ inputs.environment-name }} 34 | url: ${{ inputs.environment-url }} 35 | 36 | env: 37 | DOCKER_TAG: ${{ inputs.docker-tag }} 38 | BRANCH_NAME: ${{ inputs.branch-name }} 39 | DEPLOYMENT_USER: ${{ vars.DEPLOYMENT_USER }} 40 | DEPLOYMENT_HOST: ${{ vars.DEPLOYMENT_HOST }} 41 | DEPLOYMENT_FOLDER: ${{ vars.DEPLOYMENT_FOLDER }} 42 | DEPLOYMENT_HOST_PUBLIC_KEYS: ${{ vars.DEPLOYMENT_HOST_PUBLIC_KEYS }} 43 | GATEWAY_USER: "jump" 44 | GATEWAY_HOST: "gateway.artemis.in.tum.de:2010" 45 | GATEWAY_HOST_PUBLIC_KEY: "[gateway.artemis.in.tum.de]:2010 ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKtTLiKRILjKZ+Qg4ReWKsG7mLDXkzHfeY5nalSQUNQ4" 46 | 47 | steps: 48 | # Download pyris-server-cli from GH without cloning the Repo 49 | - name: Fetch Pyris CLI 50 | run: | 51 | wget ${{ env.RAW_URL }}/pyris-server-cli 52 | chmod +x pyris-server-cli 53 | 54 | # Configure SSH Key 55 | - name: Setup SSH Keys and known_hosts 56 | env: 57 | SSH_AUTH_SOCK: /tmp/ssh_agent.sock 58 | GATEWAY_SSH_KEY: "${{ secrets.DEPLOYMENT_GATEWAY_SSH_KEY }}" 59 | DEPLOYMENT_SSH_KEY: "${{ secrets.DEPLOYMENT_SSH_KEY }}" 60 | run: | 61 | mkdir -p ~/.ssh 62 | ssh-agent -a $SSH_AUTH_SOCK > /dev/null 63 | ssh-add - <<< $GATEWAY_SSH_KEY 64 | ssh-add - <<< $DEPLOYMENT_SSH_KEY 65 | cat - <<< $GATEWAY_HOST_PUBLIC_KEY >> ~/.ssh/known_hosts 66 | cat - <<< $(sed 's/\\n/\n/g' <<< "$DEPLOYMENT_HOST_PUBLIC_KEYS") >> ~/.ssh/known_hosts 67 | 68 | - name: Deploy Pyris with Docker 69 | env: 70 | SSH_AUTH_SOCK: /tmp/ssh_agent.sock 71 | run: | 72 | ./pyris-server-cli docker-deploy "$DEPLOYMENT_USER@$DEPLOYMENT_HOST" -g "$GATEWAY_USER@$GATEWAY_HOST" -t $DOCKER_TAG -b $BRANCH_NAME -d $DEPLOYMENT_FOLDER -y 73 | -------------------------------------------------------------------------------- /app/pipeline/shared/summary_pipeline.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from langchain_core.output_parsers import StrOutputParser 5 | from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate 6 | from langchain_core.runnables import Runnable 7 | 8 | from ...llm import CapabilityRequestHandler, RequirementList 9 | from ...llm.langchain import IrisLangchainCompletionModel 10 | from ...pipeline import Pipeline 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | class SummaryPipeline(Pipeline): 16 | """A generic summary pipeline that can be used to summarize any text""" 17 | 18 | llm: IrisLangchainCompletionModel 19 | pipeline: Runnable 20 | prompt_str: str 21 | prompt: ChatPromptTemplate 22 | 23 | def __init__(self): 24 | super().__init__(implementation_id="summary_pipeline") 25 | # Set the langchain chat model 26 | request_handler = CapabilityRequestHandler( 27 | requirements=RequirementList( 28 | gpt_version_equivalent=3.5, 29 | context_length=4096, 30 | ) 31 | ) 32 | self.llm = IrisLangchainCompletionModel( 33 | request_handler=request_handler, max_tokens=1000 34 | ) 35 | # Load the prompt from a file 36 | dirname = os.path.dirname(__file__) 37 | with open(os.path.join(dirname, "../prompts/summary_prompt.txt"), "r") as file: 38 | logger.info("Loading summary prompt...") 39 | self.prompt_str = file.read() 40 | # Create the prompt 41 | self.prompt = ChatPromptTemplate.from_messages( 42 | [ 43 | SystemMessagePromptTemplate.from_template(self.prompt_str), 44 | ] 45 | ) 46 | # Create the pipeline 47 | self.pipeline = self.prompt | self.llm | StrOutputParser() 48 | self.tokens = [] 49 | 50 | def __repr__(self): 51 | return f"{self.__class__.__name__}(llm={self.llm})" 52 | 53 | def __str__(self): 54 | return f"{self.__class__.__name__}(llm={self.llm})" 55 | 56 | def __call__(self, query: str, **kwargs) -> str: 57 | """ 58 | Runs the pipeline 59 | :param query: The query 60 | :param kwargs: keyword arguments 61 | :return: summary text as string 62 | """ 63 | if query is None: 64 | raise ValueError("Query must not be None") 65 | logger.info("Running summary pipeline...") 66 | response: str = self.pipeline.invoke({"text": query}) 67 | logger.info(f"Response from summary pipeline: {response[:20]}...") 68 | return response 69 | -------------------------------------------------------------------------------- /app/retrieval/faq_retrieval.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List 3 | from langsmith import traceable 4 | from weaviate import WeaviateClient 5 | from app.common.PipelineEnum import PipelineEnum 6 | from .basic_retrieval import BaseRetrieval, merge_retrieved_chunks 7 | from ..common.pyris_message import PyrisMessage 8 | from ..pipeline.prompts.faq_retrieval_prompts import ( 9 | faq_retriever_initial_prompt, 10 | write_hypothetical_answer_prompt, 11 | ) 12 | from ..pipeline.prompts.lecture_retrieval_prompts import ( 13 | rewrite_student_query_prompt, 14 | ) 15 | from ..vector_database.faq_schema import FaqSchema, init_faq_schema 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | class FaqRetrieval(BaseRetrieval): 21 | def __init__(self, client: WeaviateClient, **kwargs): 22 | super().__init__( 23 | client, init_faq_schema, implementation_id="faq_retrieval_pipeline" 24 | ) 25 | 26 | def get_schema_properties(self) -> List[str]: 27 | return [ 28 | FaqSchema.COURSE_ID.value, 29 | FaqSchema.FAQ_ID.value, 30 | FaqSchema.QUESTION_TITLE.value, 31 | FaqSchema.QUESTION_ANSWER.value, 32 | ] 33 | 34 | @traceable(name="Full Faq Retrieval") 35 | def __call__( 36 | self, 37 | chat_history: list[PyrisMessage], 38 | student_query: str, 39 | result_limit: int, 40 | course_name: str = None, 41 | course_id: int = None, 42 | problem_statement: str = None, 43 | exercise_title: str = None, 44 | base_url: str = None, 45 | ) -> List[dict]: 46 | course_language = self.fetch_course_language(course_id) 47 | 48 | response, response_hyde = self.run_parallel_rewrite_tasks( 49 | chat_history=chat_history, 50 | student_query=student_query, 51 | result_limit=result_limit, 52 | course_language=course_language, 53 | initial_prompt=faq_retriever_initial_prompt, 54 | rewrite_prompt=rewrite_student_query_prompt, 55 | hypothetical_answer_prompt=write_hypothetical_answer_prompt, 56 | pipeline_enum=PipelineEnum.IRIS_FAQ_RETRIEVAL_PIPELINE, 57 | course_name=course_name, 58 | course_id=course_id, 59 | ) 60 | 61 | basic_retrieved_faqs: list[dict[str, dict]] = [ 62 | {"id": obj.uuid.int, "properties": obj.properties} 63 | for obj in response.objects 64 | ] 65 | hyde_retrieved_faqs: list[dict[str, dict]] = [ 66 | {"id": obj.uuid.int, "properties": obj.properties} 67 | for obj in response_hyde.objects 68 | ] 69 | return merge_retrieved_chunks(basic_retrieved_faqs, hyde_retrieved_faqs) 70 | -------------------------------------------------------------------------------- /app/main.py: -------------------------------------------------------------------------------- 1 | from fastapi.responses import ORJSONResponse 2 | from starlette.background import BackgroundTask 3 | from starlette.responses import Response 4 | 5 | from app.config import settings 6 | import app.sentry as sentry 7 | from app.web.routers.health import router as health_router 8 | from app.web.routers.pipelines import router as pipelines_router 9 | from app.web.routers.webhooks import router as webhooks_router 10 | from app.web.routers.ingestion_status import router as ingestion_status_router 11 | 12 | import logging 13 | from fastapi import FastAPI, Request, status 14 | from fastapi.exceptions import RequestValidationError 15 | from fastapi.responses import JSONResponse 16 | 17 | settings.set_env_vars() 18 | 19 | sentry.init() 20 | 21 | app = FastAPI(default_response_class=ORJSONResponse) 22 | 23 | 24 | def custom_openapi(): 25 | if not app.openapi_schema: 26 | openapi_schema = FastAPI.openapi(app) 27 | # Add security scheme 28 | openapi_schema["components"]["securitySchemes"] = { 29 | "bearerAuth": {"type": "apiKey", "in": "header", "name": "Authorization"} 30 | } 31 | # Apply the security globally 32 | for path in openapi_schema["paths"].values(): 33 | for method in path.values(): 34 | method.setdefault("security", []).append({"bearerAuth": []}) 35 | app.openapi_schema = openapi_schema 36 | return app.openapi_schema 37 | 38 | 39 | app.openapi = custom_openapi 40 | 41 | 42 | @app.exception_handler(RequestValidationError) 43 | async def validation_exception_handler(request: Request, exc: RequestValidationError): 44 | exc_str = f"{exc}".replace("\n", " ").replace(" ", " ") 45 | logging.error(f"{request}: {exc_str}") 46 | content = {"status_code": 10422, "message": exc_str, "data": None} 47 | return JSONResponse( 48 | content=content, status_code=status.HTTP_422_UNPROCESSABLE_ENTITY 49 | ) 50 | 51 | 52 | def log_info(req_body, res_body): 53 | logging.info(req_body) 54 | logging.info(res_body) 55 | 56 | 57 | @app.middleware("http") 58 | async def some_middleware(request: Request, call_next): 59 | req_body = await request.body() 60 | response = await call_next(request) 61 | 62 | res_body = b"" 63 | async for chunk in response.body_iterator: 64 | res_body += chunk 65 | 66 | task = BackgroundTask(log_info, req_body, res_body) 67 | return Response( 68 | content=res_body, 69 | status_code=response.status_code, 70 | headers=dict(response.headers), 71 | media_type=response.media_type, 72 | background=task, 73 | ) 74 | 75 | 76 | app.include_router(health_router) 77 | app.include_router(pipelines_router) 78 | app.include_router(webhooks_router) 79 | app.include_router(ingestion_status_router) 80 | -------------------------------------------------------------------------------- /app/pipeline/prompts/inconsistency_check_prompts.py: -------------------------------------------------------------------------------- 1 | solver_prompt = """\ 2 | 3 | You are a detail-oriented expert instructor at an Ivy League university ensuring the quality of programming exercises. \ 4 | Your task is to find consistency issues as part of the exercise creation process to make sure that the exercise is \ 5 | without any errors or inconsistencies that might confuse students. Your teaching assistants will use your feedback to \ 6 | improve the exercise. 7 | 8 | Parts of a programming exercise: 9 | - Problem statement: The description of the exercise containing tasks that the student needs to solve. 10 | - Template repository: The starting point from which the student will start solving the exercise. 11 | - Solution repository: The sample solution set by the instructor to compare the student's solution against. 12 | 13 | To not overburden you, you will be provided with the problem statement and one of the template plus solution files \ 14 | at a time. You need to compare the problem statement with the template file and identify any consistency issues. 15 | 16 | 17 | 18 | {problem_statement} 19 | 20 | 21 | 22 | {template_file} 23 | 24 | 25 | 26 | {solution_file} 27 | 28 | 29 | 30 | Respond with any potential consistency issues found in the exercise formatted in markdown. \ 31 | Just provide the easily digestible formatted markdown without other explanations. It is fine to provide no issues if \ 32 | you are confident that the files are consistent. 33 | 34 | """ 35 | 36 | prettify_prompt = """\ 37 | 38 | You are a detail-oriented expert instructor at an Ivy League university ensuring the quality of programming exercises. \ 39 | Your task is to find consistency issues as part of the exercise creation process to make sure that the exercise is \ 40 | without any errors or inconsistencies that might confuse students. 41 | In a previous step you already found potential consistency issues as part of the exercise creation process on a file \ 42 | level. Now, you need to summarize the issues found in the exercise so the teaching assistants can fix them. 43 | 44 | Parts of a programming exercise: 45 | - Problem statement: The description of the exercise containing tasks that the student needs to solve. 46 | - Template repository: The starting point from which the student will start solving the exercise. 47 | - Solution repository: The sample solution set by the instructor to compare the student's solution against. 48 | 49 | 50 | 51 | {problem_statement} 52 | 53 | 54 | 55 | {consistency_issues} 56 | 57 | 58 | 59 | Respond with a summary of the consistency issues found in the exercise, stay specific and clear so the issues can be \ 60 | easily fixed by the teaching assistants. Make it clear which file path contains the issues. Just provide the easily \ 61 | digestible formatted markdown without other explanations. 62 | 63 | """ 64 | -------------------------------------------------------------------------------- /app/pipeline/rewriting_pipeline.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Literal, Optional 3 | 4 | from langchain.output_parsers import PydanticOutputParser 5 | from langchain_core.prompts import ( 6 | ChatPromptTemplate, 7 | ) 8 | 9 | from app.common.PipelineEnum import PipelineEnum 10 | from app.common.pyris_message import PyrisMessage, IrisMessageRole 11 | from app.domain.data.text_message_content_dto import TextMessageContentDTO 12 | from app.domain.rewriting_pipeline_execution_dto import RewritingPipelineExecutionDTO 13 | from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments 14 | from app.pipeline import Pipeline 15 | from app.pipeline.prompts.rewriting_prompts import ( 16 | system_prompt_faq, 17 | system_prompt_problem_statement, 18 | ) 19 | from app.web.status.status_update import RewritingCallback 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | class RewritingPipeline(Pipeline): 25 | callback: RewritingCallback 26 | request_handler: CapabilityRequestHandler 27 | output_parser: PydanticOutputParser 28 | variant: Literal["faq", "problem_statement"] 29 | 30 | def __init__( 31 | self, callback: RewritingCallback, variant: Literal["faq", "problem_statement"] 32 | ): 33 | super().__init__(implementation_id="rewriting_pipeline_reference_impl") 34 | self.callback = callback 35 | self.request_handler = CapabilityRequestHandler( 36 | requirements=RequirementList( 37 | gpt_version_equivalent=4.5, 38 | context_length=16385, 39 | ) 40 | ) 41 | self.tokens = [] 42 | self.variant = variant 43 | 44 | def __call__( 45 | self, 46 | dto: RewritingPipelineExecutionDTO, 47 | prompt: Optional[ChatPromptTemplate] = None, 48 | **kwargs, 49 | ): 50 | if not dto.to_be_rewritten: 51 | raise ValueError("You need to provide a text to rewrite") 52 | 53 | variant_prompts = { 54 | "faq": system_prompt_faq, 55 | "problem_statement": system_prompt_problem_statement, 56 | } 57 | print(variant_prompts[self.variant]) 58 | prompt = variant_prompts[self.variant].format( 59 | rewritten_text=dto.to_be_rewritten, 60 | ) 61 | prompt = PyrisMessage( 62 | sender=IrisMessageRole.SYSTEM, 63 | contents=[TextMessageContentDTO(text_content=prompt)], 64 | ) 65 | 66 | response = self.request_handler.chat( 67 | [prompt], CompletionArguments(temperature=0.4), tools=None 68 | ) 69 | self._append_tokens(response.token_usage, PipelineEnum.IRIS_REWRITING_PIPELINE) 70 | response = response.contents[0].text_content 71 | 72 | # remove ``` from start and end if exists 73 | if response.startswith("```") and response.endswith("```"): 74 | response = response[3:-3] 75 | if response.startswith("markdown"): 76 | response = response[8:] 77 | response = response.strip() 78 | 79 | final_result = response 80 | self.callback.done(final_result=final_result, tokens=self.tokens) 81 | -------------------------------------------------------------------------------- /app/llm/capability/capability_checker.py: -------------------------------------------------------------------------------- 1 | from .capability_list import ( 2 | CapabilityList, 3 | capability_weights, 4 | always_considered_capabilities_with_default, 5 | ) 6 | from .requirement_list import RequirementList 7 | 8 | 9 | def capabilities_fulfill_requirements( 10 | capability: CapabilityList, requirements: RequirementList 11 | ) -> bool: 12 | """Check if the capability fulfills the requirements""" 13 | return all( 14 | getattr(capability, field).matches(getattr(requirements, field)) 15 | for field in requirements.__dict__.keys() 16 | if getattr(requirements, field) is not None 17 | ) 18 | 19 | 20 | def calculate_capability_scores( 21 | capabilities: list[CapabilityList], 22 | requirements: RequirementList, 23 | invert_cost: bool = False, 24 | ) -> list[int]: 25 | """Calculate the scores of the capabilities against the requirements""" 26 | all_scores = [] 27 | 28 | for requirement in requirements.__dict__.keys(): 29 | requirement_value = getattr(requirements, requirement) 30 | if ( 31 | requirement_value is None 32 | and requirement not in always_considered_capabilities_with_default 33 | ): 34 | continue 35 | 36 | # Calculate the scores for each capability 37 | scores = [] 38 | for capability in capabilities: 39 | if ( 40 | requirement_value is None 41 | and requirement in always_considered_capabilities_with_default 42 | ): 43 | # If the requirement is not set, use the default value if necessary 44 | score = getattr(capability, requirement).matches( 45 | always_considered_capabilities_with_default[requirement] 46 | ) 47 | else: 48 | score = getattr(capability, requirement).matches(requirement_value) 49 | # Invert the cost if required 50 | # The cost is a special case, as depending on how you want to use the scores 51 | # the cost needs to be considered differently 52 | if ( 53 | requirement in ["input_cost", "output_cost"] 54 | and invert_cost 55 | and score != 0 56 | ): 57 | score = 1 / score 58 | scores.append(score) 59 | 60 | # Normalize the scores between 0 and 1 and multiply by the weight modifier 61 | # The normalization here is based on the position of the score in the sorted list to balance out 62 | # the different ranges of the capabilities 63 | sorted_scores = sorted(set(scores)) 64 | weight_modifier = capability_weights[requirement] 65 | normalized_scores = [ 66 | ((sorted_scores.index(score) + 1) / len(sorted_scores)) * weight_modifier 67 | for score in scores 68 | ] 69 | all_scores.append(normalized_scores) 70 | 71 | final_scores = [] 72 | 73 | # Sum up the scores for each capability to get the final score for each list of capabilities 74 | for i in range(len(all_scores[0])): 75 | score = 0 76 | for j in range(len(all_scores)): 77 | score += all_scores[j][i] 78 | final_scores.append(score) 79 | 80 | return final_scores 81 | -------------------------------------------------------------------------------- /app/llm/external/model.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from typing import Sequence, Union, Dict, Any, Type, Callable 3 | 4 | from black import Optional 5 | from langchain_core.tools import BaseTool 6 | from openai.types.chat import ChatCompletionMessage 7 | from pydantic import BaseModel 8 | 9 | from ...common.pyris_message import PyrisMessage 10 | from ...llm import CompletionArguments 11 | from ...llm.capability import CapabilityList 12 | 13 | 14 | class LanguageModel(BaseModel, metaclass=ABCMeta): 15 | """Abstract class for the llm wrappers""" 16 | 17 | id: str 18 | name: str 19 | description: str 20 | capabilities: CapabilityList 21 | 22 | 23 | class CompletionModel(LanguageModel, metaclass=ABCMeta): 24 | """Abstract class for the llm completion wrappers""" 25 | 26 | @classmethod 27 | def __subclasshook__(cls, subclass) -> bool: 28 | return hasattr(subclass, "complete") and callable(subclass.complete) 29 | 30 | @abstractmethod 31 | def complete(self, prompt: str, arguments: CompletionArguments) -> str: 32 | """Create a completion from the prompt""" 33 | raise NotImplementedError( 34 | f"The LLM {self.__str__()} does not support completion" 35 | ) 36 | 37 | 38 | class ChatModel(LanguageModel, metaclass=ABCMeta): 39 | """Abstract class for the llm chat completion wrappers""" 40 | 41 | @classmethod 42 | def __subclasshook__(cls, subclass) -> bool: 43 | return hasattr(subclass, "chat") and callable(subclass.chat) 44 | 45 | @abstractmethod 46 | def chat( 47 | self, 48 | messages: list[PyrisMessage], 49 | arguments: CompletionArguments, 50 | tools: Optional[ 51 | Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]] 52 | ], 53 | ) -> ChatCompletionMessage: 54 | """Create a completion from the chat messages""" 55 | raise NotImplementedError( 56 | f"The LLM {self.__str__()} does not support chat completion" 57 | ) 58 | 59 | 60 | class EmbeddingModel(LanguageModel, metaclass=ABCMeta): 61 | """Abstract class for the llm embedding wrappers""" 62 | 63 | @classmethod 64 | def __subclasshook__(cls, subclass) -> bool: 65 | return hasattr(subclass, "embed") and callable(subclass.embed) 66 | 67 | @abstractmethod 68 | def embed(self, text: str) -> list[float]: 69 | """Create an embedding from the text""" 70 | raise NotImplementedError( 71 | f"The LLM {self.__str__()} does not support embeddings" 72 | ) 73 | 74 | 75 | class ImageGenerationModel(LanguageModel, metaclass=ABCMeta): 76 | """Abstract class for the llm image generation wrappers""" 77 | 78 | @classmethod 79 | def __subclasshook__(cls, subclass): 80 | return hasattr(subclass, "generate_images") and callable( 81 | subclass.generate_images 82 | ) 83 | 84 | @abstractmethod 85 | def generate_images( 86 | self, 87 | prompt: str, 88 | n: int = 1, 89 | size: str = "256x256", 90 | quality: str = "standard", 91 | **kwargs, 92 | ) -> list: 93 | """Create an image from the prompt""" 94 | raise NotImplementedError( 95 | f"The LLM {self.__str__()} does not support image generation" 96 | ) 97 | -------------------------------------------------------------------------------- /app/vector_database/faq_schema.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | from weaviate.classes.config import Property 4 | from weaviate import WeaviateClient 5 | from weaviate.collections import Collection 6 | from weaviate.collections.classes.config import Configure, VectorDistances, DataType 7 | 8 | 9 | class FaqSchema(Enum): 10 | """ 11 | Schema for the faqs 12 | """ 13 | 14 | COLLECTION_NAME = "Faqs" 15 | COURSE_NAME = "course_name" 16 | COURSE_DESCRIPTION = "course_description" 17 | COURSE_LANGUAGE = "course_language" 18 | COURSE_ID = "course_id" 19 | FAQ_ID = "faq_id" 20 | QUESTION_TITLE = "question_title" 21 | QUESTION_ANSWER = "question_answer" 22 | 23 | 24 | def init_faq_schema(client: WeaviateClient) -> Collection: 25 | """ 26 | Initialize the schema for the faqs 27 | """ 28 | if client.collections.exists(FaqSchema.COLLECTION_NAME.value): 29 | collection = client.collections.get(FaqSchema.COLLECTION_NAME.value) 30 | # Check and add 'course_language' property if missing 31 | if not any( 32 | property.name == FaqSchema.COURSE_LANGUAGE.value 33 | for property in collection.config.get(simple=False).properties 34 | ): 35 | collection.config.add_property( 36 | Property( 37 | name=FaqSchema.COURSE_LANGUAGE.value, 38 | description="The language of the COURSE", 39 | data_type=DataType.TEXT, 40 | index_searchable=False, 41 | ) 42 | ) 43 | return collection 44 | 45 | return client.collections.create( 46 | name=FaqSchema.COLLECTION_NAME.value, 47 | vectorizer_config=Configure.Vectorizer.none(), 48 | vector_index_config=Configure.VectorIndex.hnsw( 49 | distance_metric=VectorDistances.COSINE 50 | ), 51 | properties=[ 52 | Property( 53 | name=FaqSchema.COURSE_ID.value, 54 | description="The ID of the course", 55 | data_type=DataType.INT, 56 | index_searchable=False, 57 | ), 58 | Property( 59 | name=FaqSchema.COURSE_NAME.value, 60 | description="The name of the course", 61 | data_type=DataType.TEXT, 62 | index_searchable=False, 63 | ), 64 | Property( 65 | name=FaqSchema.COURSE_DESCRIPTION.value, 66 | description="The description of the COURSE", 67 | data_type=DataType.TEXT, 68 | index_searchable=False, 69 | ), 70 | Property( 71 | name=FaqSchema.COURSE_LANGUAGE.value, 72 | description="The language of the COURSE", 73 | data_type=DataType.TEXT, 74 | index_searchable=False, 75 | ), 76 | Property( 77 | name=FaqSchema.FAQ_ID.value, 78 | description="The ID of the Faq", 79 | data_type=DataType.INT, 80 | index_searchable=False, 81 | ), 82 | Property( 83 | name=FaqSchema.QUESTION_TITLE.value, 84 | description="The title of the faq", 85 | data_type=DataType.TEXT, 86 | ), 87 | Property( 88 | name=FaqSchema.QUESTION_ANSWER.value, 89 | description="The answer of the faq", 90 | data_type=DataType.TEXT, 91 | ), 92 | ], 93 | ) 94 | -------------------------------------------------------------------------------- /llm_config.example.yml: -------------------------------------------------------------------------------- 1 | - api_key: 2 | api_version: 2024-05-01-preview 3 | azure_deployment: gpt-35-turbo 4 | capabilities: 5 | context_length: 16385 6 | gpt_version_equivalent: 3.5 7 | image_recognition: false 8 | input_cost: 0.5 9 | json_mode: true 10 | output_cost: 1.5 11 | privacy_compliance: true 12 | self_hosted: false 13 | vendor: OpenAI 14 | description: GPT 3.5 16k on Azure 15 | endpoint: '' 16 | id: azure-gpt-35-turbo 17 | model: gpt-3.5-turbo 18 | name: GPT 3.5 Turbo 19 | type: azure_chat 20 | - api_key: 21 | capabilities: 22 | input_cost: 0.5 23 | output_cost: 1.5 24 | gpt_version_equivalent: 3.5 25 | context_length: 16385 26 | vendor: "OpenAI" 27 | privacy_compliance: false 28 | self_hosted: false 29 | image_recognition: false 30 | son_mode: true 31 | description: GPT 3.5 16k 32 | id: oai-gpt-35-turbo 33 | model: gpt-3.5-turbo 34 | name: GPT 3.5 Turbo 35 | type: openai_chat 36 | - api_key: 37 | api_version: 2024-02-15-preview 38 | azure_deployment: gpt-4-turbo 39 | capabilities: 40 | context_length: 128000 41 | gpt_version_equivalent: 4 42 | image_recognition: false 43 | input_cost: 10 44 | json_mode: true 45 | output_cost: 30 46 | privacy_compliance: true 47 | self_hosted: false 48 | vendor: OpenAI 49 | description: GPT 4 Turbo 128k on Azure 50 | endpoint: '' 51 | id: azure-gpt-4-turbo 52 | model: gpt-4-turbo 53 | name: GPT 4 Turbo 54 | type: azure_chat 55 | - api_key: 56 | api_version: 2024-02-15-preview 57 | azure_deployment: gpt-4o 58 | capabilities: 59 | context_length: 128000 60 | gpt_version_equivalent: 4.5 61 | image_recognition: true 62 | input_cost: 5 63 | json_mode: true 64 | output_cost: 15 65 | privacy_compliance: true 66 | self_hosted: false 67 | vendor: OpenAI 68 | description: GPT 4 Omni on Azure 69 | endpoint: '' 70 | id: azure-gpt-4-omni 71 | model: gpt-4o 72 | name: GPT 4 Omni 73 | type: azure_chat 74 | - api_key: 75 | api_version: 2023-03-15-preview 76 | azure_deployment: gpt-4o-mini 77 | capabilities: 78 | context_length: 128000 79 | gpt_version_equivalent: 4.25 80 | image_recognition: true 81 | input_cost: 0.15 82 | json_mode: true 83 | output_cost: 0.075 84 | privacy_compliance: true 85 | self_hosted: false 86 | vendor: OpenAI 87 | description: GPT 4 Omni Mini on Azure 88 | endpoint: '' 89 | id: azure-gpt-4-omni-mini 90 | model: gpt-4o-mini 91 | name: GPT 4 Omni Mini 92 | type: azure_chat 93 | - api_key: 94 | api_version: '2023-05-15T00:00:00.000Z' 95 | azure_deployment: te-3-large 96 | capabilities: 97 | context_length: 8191 98 | input_cost: 0.13 99 | output_cost: 0.065 100 | privacy_compliance: true 101 | self_hosted: false 102 | vendor: OpenAI 103 | description: Embedding Large 8k Azure 104 | endpoint: '' 105 | id: embedding-large 106 | model: text-embedding-3-large 107 | name: Embedding Large 108 | type: azure_embedding 109 | - api_key: 110 | api_version: 2024-02-15-preview 111 | azure_deployment: te-3-small 112 | capabilities: 113 | context_length: 8191 114 | input_cost: 0.02 115 | output_cost: 0 116 | privacy_compliance: true 117 | self_hosted: false 118 | vendor: OpenAI 119 | description: Embedding Small 8k Azure 120 | endpoint: '' 121 | id: embedding-small 122 | model: text-embedding-3-small 123 | name: Embedding Small 124 | type: azure_embedding -------------------------------------------------------------------------------- /app/llm/langchain/iris_langchain_chat_model.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from logging import Logger 3 | from typing import List, Optional, Any, Sequence, Union, Dict, Type, Callable 4 | 5 | from langchain_core.callbacks import CallbackManagerForLLMRun 6 | from langchain_core.language_models import LanguageModelInput 7 | from langchain_core.language_models.chat_models import ( 8 | BaseChatModel, 9 | ) 10 | from langchain_core.messages import BaseMessage 11 | from langchain_core.outputs import ChatResult 12 | from langchain_core.outputs.chat_generation import ChatGeneration 13 | from langchain_core.runnables import Runnable 14 | from langchain_core.tools import BaseTool 15 | from pydantic import BaseModel, Field 16 | 17 | from app.common.PipelineEnum import PipelineEnum 18 | from app.common.token_usage_dto import TokenUsageDTO 19 | from ...common.message_converters import ( 20 | convert_langchain_message_to_iris_message, 21 | convert_iris_message_to_langchain_message, 22 | ) 23 | from ...llm import RequestHandler, CompletionArguments 24 | 25 | 26 | class IrisLangchainChatModel(BaseChatModel): 27 | """Custom langchain chat model for our own request handler""" 28 | 29 | request_handler: RequestHandler 30 | completion_args: CompletionArguments 31 | tokens: TokenUsageDTO = None 32 | logger: Logger = logging.getLogger(__name__) 33 | tools: Optional[ 34 | Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]] 35 | ] = Field(default_factory=list, alias="tools") 36 | 37 | def __init__( 38 | self, 39 | request_handler: RequestHandler, 40 | completion_args: Optional[CompletionArguments] = CompletionArguments(stop=None), 41 | **kwargs: Any, 42 | ) -> None: 43 | super().__init__( 44 | request_handler=request_handler, completion_args=completion_args, **kwargs 45 | ) 46 | 47 | def bind_tools( 48 | self, 49 | tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], 50 | **kwargs: Any, 51 | ) -> Runnable[LanguageModelInput, BaseMessage]: 52 | """Bind a sequence of tools to the request handler for function calling support. 53 | 54 | Args: 55 | tools: Sequence of tools that can be one of: 56 | - Dict describing the tool 57 | - Pydantic BaseModel 58 | - Callable function 59 | - BaseTool instance 60 | **kwargs: Additional arguments passed to the request handler 61 | 62 | Returns: 63 | self: Returns this instance as a Runnable 64 | 65 | Raises: 66 | ValueError: If tools sequence is empty or contains invalid tool types 67 | """ 68 | if not tools: 69 | raise ValueError("At least one tool must be provided") 70 | 71 | self.tools = tools 72 | return self 73 | 74 | def _generate( 75 | self, 76 | messages: List[BaseMessage], 77 | stop: Optional[List[str]] = None, 78 | run_manager: Optional[CallbackManagerForLLMRun] = None, 79 | **kwargs: Any, 80 | ) -> ChatResult: 81 | iris_messages = [convert_langchain_message_to_iris_message(m) for m in messages] 82 | self.completion_args.stop = stop 83 | iris_message = self.request_handler.chat( 84 | iris_messages, self.completion_args, self.tools 85 | ) 86 | base_message = convert_iris_message_to_langchain_message(iris_message) 87 | chat_generation = ChatGeneration(message=base_message) 88 | self.tokens = TokenUsageDTO( 89 | model=iris_message.token_usage.model_info, 90 | numInputTokens=iris_message.token_usage.num_input_tokens, 91 | costPerMillionInputToken=iris_message.token_usage.cost_per_input_token, 92 | numOutputTokens=iris_message.token_usage.num_output_tokens, 93 | costPerMillionOutputToken=iris_message.token_usage.cost_per_output_token, 94 | pipeline=PipelineEnum.NOT_SET, 95 | ) 96 | return ChatResult(generations=[chat_generation]) 97 | 98 | @property 99 | def _llm_type(self) -> str: 100 | return "Iris" 101 | -------------------------------------------------------------------------------- /app/pipeline/chat_gpt_wrapper_pipeline.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List, Optional 3 | 4 | from langchain_core.prompts import ( 5 | ChatPromptTemplate, 6 | ) 7 | from app.common.pyris_message import IrisMessageRole, PyrisMessage 8 | from app.domain.chat.exercise_chat.exercise_chat_pipeline_execution_dto import ( 9 | ExerciseChatPipelineExecutionDTO, 10 | ) 11 | from app.domain.data.text_message_content_dto import TextMessageContentDTO 12 | from app.llm.langchain.iris_langchain_chat_model import IrisLangchainChatModel 13 | from app.pipeline.prompts.chat_gpt_wrapper_prompts import chat_gpt_initial_system_prompt 14 | from langchain_core.runnables import Runnable 15 | 16 | from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments 17 | from app.pipeline import Pipeline 18 | from app.web.status.status_update import ChatGPTWrapperStatusCallback 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | def convert_chat_history_to_str(chat_history: List[PyrisMessage]) -> str: 24 | """ 25 | Converts the chat history to a string 26 | :param chat_history: The chat history 27 | :return: The chat history as a string 28 | """ 29 | 30 | def map_message_role(role: IrisMessageRole) -> str: 31 | if role == IrisMessageRole.SYSTEM: 32 | return "System" 33 | elif role == IrisMessageRole.ASSISTANT: 34 | return "AI Tutor" 35 | elif role == IrisMessageRole.USER: 36 | return "Student" 37 | else: 38 | return "Unknown" 39 | 40 | return "\n\n".join( 41 | [ 42 | f"{map_message_role(message.sender)} {"" if not message.sent_at else f"at {message.sent_at.strftime( 43 | "%Y-%m-%d %H:%M:%S")}"}: {message.contents[0].text_content}" 44 | for message in chat_history 45 | ] 46 | ) 47 | 48 | 49 | class ChatGPTWrapperPipeline(Pipeline): 50 | callback: ChatGPTWrapperStatusCallback 51 | llm: IrisLangchainChatModel 52 | pipeline: Runnable 53 | 54 | def __init__(self, callback: Optional[ChatGPTWrapperStatusCallback] = None): 55 | super().__init__(implementation_id="chat_gpt_wrapper_pipeline_reference_impl") 56 | self.callback = callback 57 | self.request_handler = CapabilityRequestHandler( 58 | requirements=RequirementList( 59 | gpt_version_equivalent=4.5, 60 | context_length=16385, 61 | ) 62 | ) 63 | 64 | def __call__( 65 | self, 66 | dto: ExerciseChatPipelineExecutionDTO, 67 | prompt: Optional[ChatPromptTemplate] = None, 68 | **kwargs, 69 | ): 70 | """ 71 | Run the ChatGPT wrapper pipeline. 72 | This consists of a single response generation step. 73 | """ 74 | 75 | self.callback.in_progress() 76 | pyris_system_prompt = PyrisMessage( 77 | sender=IrisMessageRole.SYSTEM, 78 | contents=[ 79 | TextMessageContentDTO(text_content=chat_gpt_initial_system_prompt) 80 | ], 81 | ) 82 | 83 | prompts = [pyris_system_prompt] + [ 84 | msg 85 | for msg in dto.chat_history 86 | if msg.contents is not None 87 | and len(msg.contents) > 0 88 | and msg.contents[0].text_content 89 | and len(msg.contents[0].text_content) > 0 90 | ] 91 | 92 | response = self.request_handler.chat( 93 | prompts, CompletionArguments(temperature=0.5, max_tokens=2000), tools=None 94 | ) 95 | 96 | logger.info(f"ChatGPTWrapperPipeline response: {response}") 97 | 98 | if ( 99 | response.contents is None 100 | or len(response.contents) == 0 101 | or response.contents[0].text_content is None 102 | or len(response.contents[0].text_content) == 0 103 | ): 104 | self.callback.error("ChatGPT did not reply. Try resending.") 105 | # Print lots of debug info for this case 106 | logger.error(f"ChatGPTWrapperPipeline response: {response}") 107 | logger.error(f"ChatGPTWrapperPipeline request: {prompts}") 108 | return 109 | 110 | self.callback.done(final_result=response.contents[0].text_content) 111 | -------------------------------------------------------------------------------- /app/pipeline/prompts/rewriting_prompts.py: -------------------------------------------------------------------------------- 1 | system_prompt_faq = """\ 2 | :You are an excellent tutor with expertise in computer science and its practical applications, teaching at a university 3 | level. Your task is to proofread and enhance the given FAQ text. Please follow these guidelines: 4 | 5 | 1. Correct all spelling and grammatical errors. 6 | 2. Ensure the text is written in simple and clear language, making it easy to understand for students. 7 | 3. Preserve the original meaning and intent of the text while maintaining clarity. 8 | 4. Ensure that the response is always written in complete sentences. If you are given a list of bullet points, \ 9 | convert them into complete sentences. 10 | 5. Make sure to use the original language of the input text. 11 | 6. Avoid repeating any information that is already present in the text. 12 | 7. Make sure to keep the markdown formatting intact and add formatting for the most important information. 13 | 8. If someone does input a very short text, that does not resemble to be an answer to a potential question please make. 14 | sure to respond accordingly. Also, if the input text is too short, please point this out. 15 | 16 | Additionally for Short Inputs: If the input text is too short and does not resemble an answer to a potential question, \ 17 | respond appropriately and point this out. 18 | Your output will be used as an answer to a frequently asked question (FAQ) on the Artemis platform. 19 | Ensure it is clear, concise, and well-structured. 20 | 21 | Exclude the start and end markers from your response and provide only the improved content. 22 | 23 | The markers are defined as following: 24 | Start of the text: ###START### 25 | End of the text: ###END### 26 | 27 | The text that has to be rewritten starts now: 28 | 29 | ###START### 30 | {rewritten_text} 31 | ###END###\ 32 | """ 33 | 34 | system_prompt_problem_statement = """\ 35 | 36 | You are an excellent tutor with deep expertise in **computer science** and **practical applications**, teaching at the \ 37 | university level. Your goal is to **proofread and refine** the problem statement you are given, focusing on what \ 38 | students need most. 39 | 40 | Follow these instructions carefully: 41 | 1. **Correct all spelling and grammatical errors.** 42 | Make sure the text reads clearly and accurately. 43 | 44 | 2. **Use simple, clear, student-focused language.** 45 | The rewritten statement should be as understandable as possible for students. Avoid overly complex words or phrasing. 46 | 47 | 3. **Preserve the original meaning and intent.** 48 | Do not remove or alter any tasks, test instructions, or technical details. All tasks and references (e.g., \ 49 | `[task]`, test names, UML diagrams) must remain intact. 50 | 51 | 4. **Write in complete sentences.** 52 | If you are given bullet points or lists, convert them into complete sentences whenever possible. However, you can \ 53 | still use bullet points if they make the problem statement clearer. 54 | 55 | 5. **Keep the original language of the text.** 56 | If the input is in English, do not switch to another language, and vice versa. 57 | 58 | 6. **Do not repeat information unnecessarily.** 59 | Condense any redundant content, but make sure no new information is lost and nothing is removed. 60 | 61 | 7. **Retain and properly format existing markdown and any extended syntax.** 62 | This includes: 63 | - Code blocks, UML diagrams (`@startuml ... @enduml`). 64 | - Special test case references like `testBubbleSort()`, `testConstructors[Policy]`, etc. 65 | - Additional markdown features (e.g., `` or `$$ e^{{\frac{{1}}{{4}} y^2}} $$`). 66 | - Task syntax `[task][Task Description](testCaseName)`. 67 | 68 | 8. **Emphasize critical information.** 69 | Use bold or italic text (or other markdown elements) when highlighting essential steps or requirements to help \ 70 | students quickly identify what is most important. 71 | 72 | 9. **Maintain a supportive, instructive tone.** 73 | Write as if you are addressing students directly, ensuring they understand the objectives, tasks, and relevance of \ 74 | each component. 75 | 76 | 77 | 78 | {rewritten_text} 79 | 80 | 81 | 82 | Respond with a single string containing only the improved version. The output should be the optimized problem \ 83 | statement, ready to be shown directly to students. 84 | 85 | """ 86 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ####################### 2 | # Custom rules # 3 | ####################### 4 | application.local.yml 5 | llm_config.local.yml 6 | 7 | ###################### 8 | # Docker 9 | ###################### 10 | /docker/.docker-data/artemis-data/* 11 | !/docker/.docker-data/artemis-data/.gitkeep 12 | /docker/.docker-data/weaviate-data/* 13 | !/docker/.docker-data/weaviate-data/.gitkeep 14 | 15 | ######################## 16 | # Auto-generated rules # 17 | ######################## 18 | # Byte-compiled / optimized / DLL files 19 | __pycache__/ 20 | *.py[cod] 21 | *$py.class 22 | 23 | # C extensions 24 | *.so 25 | 26 | # Distribution / packaging 27 | .Python 28 | build/ 29 | develop-eggs/ 30 | dist/ 31 | downloads/ 32 | eggs/ 33 | .eggs/ 34 | lib/ 35 | lib64/ 36 | parts/ 37 | sdist/ 38 | var/ 39 | wheels/ 40 | share/python-wheels/ 41 | *.egg-info/ 42 | .installed.cfg 43 | *.egg 44 | MANIFEST 45 | 46 | # PyInstaller 47 | # Usually these files are written by a python script from a template 48 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 49 | *.manifest 50 | *.spec 51 | 52 | # Installer logs 53 | pip-log.txt 54 | pip-delete-this-directory.txt 55 | 56 | # Unit test / coverage reports 57 | htmlcov/ 58 | .tox/ 59 | .nox/ 60 | .coverage 61 | .coverage.* 62 | .cache 63 | nosetests.xml 64 | coverage.xml 65 | *.cover 66 | *.py,cover 67 | .hypothesis/ 68 | .pytest_cache/ 69 | cover/ 70 | 71 | # Translations 72 | *.mo 73 | *.pot 74 | 75 | # Django stuff: 76 | *.log 77 | local_settings.py 78 | db.sqlite3 79 | db.sqlite3-journal 80 | 81 | # Flask stuff: 82 | instance/ 83 | .webassets-cache 84 | 85 | # Scrapy stuff: 86 | .scrapy 87 | 88 | # Sphinx documentation 89 | docs/_build/ 90 | 91 | # PyBuilder 92 | .pybuilder/ 93 | target/ 94 | 95 | # Jupyter Notebook 96 | .ipynb_checkpoints 97 | 98 | # IPython 99 | profile_default/ 100 | ipython_config.py 101 | 102 | # pyenv 103 | # For a library or package, you might want to ignore these files since the code is 104 | # intended to run in multiple environments; otherwise, check them in: 105 | # .python-version 106 | 107 | # pipenv 108 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 109 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 110 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 111 | # install all needed dependencies. 112 | #Pipfile.lock 113 | 114 | # poetry 115 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 116 | # This is especially recommended for binary packages to ensure reproducibility, and is more 117 | # commonly ignored for libraries. 118 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 119 | #poetry.lock 120 | 121 | # pdm 122 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 123 | #pdm.lock 124 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 125 | # in version control. 126 | # https://pdm.fming.dev/#use-with-ide 127 | .pdm.toml 128 | 129 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 130 | __pypackages__/ 131 | 132 | # Celery stuff 133 | celerybeat-schedule 134 | celerybeat.pid 135 | 136 | # SageMath parsed files 137 | *.sage.py 138 | 139 | # Environments 140 | .env 141 | .venv 142 | env/ 143 | venv/ 144 | ENV/ 145 | env.bak/ 146 | venv.bak/ 147 | 148 | # Spyder project settings 149 | .spyderproject 150 | .spyproject 151 | 152 | # Rope project settings 153 | .ropeproject 154 | 155 | # mkdocs documentation 156 | /site 157 | 158 | # mypy 159 | .mypy_cache/ 160 | .dmypy.json 161 | dmypy.json 162 | 163 | # Pyre type checker 164 | .pyre/ 165 | 166 | # pytype static type analyzer 167 | .pytype/ 168 | 169 | # Cython debug symbols 170 | cython_debug/ 171 | 172 | # PyCharm 173 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 174 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 175 | # and can be added to the global gitignore or merged into this file. For a more nuclear 176 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 177 | .idea/ 178 | 179 | .DS_Store 180 | -------------------------------------------------------------------------------- /app/pipeline/competency_extraction_pipeline.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Optional 3 | 4 | from langchain.output_parsers import PydanticOutputParser 5 | from langchain_core.prompts import ( 6 | ChatPromptTemplate, 7 | ) 8 | 9 | from app.common.PipelineEnum import PipelineEnum 10 | from app.common.pyris_message import PyrisMessage, IrisMessageRole 11 | from app.domain import ( 12 | CompetencyExtractionPipelineExecutionDTO, 13 | ) 14 | from app.domain.data.text_message_content_dto import TextMessageContentDTO 15 | from app.domain.data.competency_dto import Competency 16 | from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments 17 | from app.pipeline import Pipeline 18 | from app.web.status.status_update import CompetencyExtractionCallback 19 | from app.pipeline.prompts.competency_extraction import system_prompt 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | class CompetencyExtractionPipeline(Pipeline): 25 | callback: CompetencyExtractionCallback 26 | request_handler: CapabilityRequestHandler 27 | output_parser: PydanticOutputParser 28 | 29 | def __init__(self, callback: Optional[CompetencyExtractionCallback] = None): 30 | super().__init__( 31 | implementation_id="competency_extraction_pipeline_reference_impl" 32 | ) 33 | self.callback = callback 34 | self.request_handler = CapabilityRequestHandler( 35 | requirements=RequirementList( 36 | gpt_version_equivalent=4.5, 37 | context_length=16385, 38 | ) 39 | ) 40 | self.output_parser = PydanticOutputParser(pydantic_object=Competency) 41 | self.tokens = [] 42 | 43 | def __call__( 44 | self, 45 | dto: CompetencyExtractionPipelineExecutionDTO, 46 | prompt: Optional[ChatPromptTemplate] = None, 47 | **kwargs, 48 | ): 49 | if not dto.course_description: 50 | raise ValueError("Course description is required") 51 | if not dto.taxonomy_options: 52 | raise ValueError("Taxonomy options are required") 53 | if not dto.max_n: 54 | raise ValueError("Non-zero max_n is required") 55 | 56 | taxonomy_options = ", ".join(dto.taxonomy_options) 57 | current_competencies = "\n\n".join( 58 | [c.model_dump_json(indent=4) for c in dto.current_competencies] 59 | ) 60 | if current_competencies: 61 | current_competencies = ( 62 | f"\nHere are the current competencies in the course:\n{current_competencies}\n" 63 | f"Do not repeat these competencies.\n" 64 | ) 65 | 66 | prompt = system_prompt.format( 67 | taxonomy_list=taxonomy_options, 68 | course_description=dto.course_description, 69 | max_n=dto.max_n, 70 | current_competencies=current_competencies, 71 | ) 72 | prompt = PyrisMessage( 73 | sender=IrisMessageRole.SYSTEM, 74 | contents=[TextMessageContentDTO(text_content=prompt)], 75 | ) 76 | 77 | response = self.request_handler.chat( 78 | [prompt], CompletionArguments(temperature=0.4), tools=None 79 | ) 80 | self._append_tokens( 81 | response.token_usage, PipelineEnum.IRIS_COMPETENCY_GENERATION 82 | ) 83 | response = response.contents[0].text_content 84 | 85 | generated_competencies: list[Competency] = [] 86 | 87 | # Find all competencies in the response up to the max_n 88 | competencies = response.split("\n\n")[: dto.max_n] 89 | for i, competency in enumerate(competencies): 90 | logger.debug(f"Processing competency {i + 1}: {competency}") 91 | if "{" not in competency or "}" not in competency: 92 | logger.debug("Skipping competency without JSON") 93 | continue 94 | # Get the competency JSON object 95 | start = competency.index("{") 96 | end = competency.index("}") + 1 97 | competency = competency[start:end] 98 | try: 99 | competency = self.output_parser.parse(competency) 100 | except Exception as e: 101 | logger.debug(f"Error parsing competency: {e}") 102 | continue 103 | logger.debug(f"Generated competency: {competency}") 104 | generated_competencies.append(competency) 105 | self.callback.done(final_result=generated_competencies, tokens=self.tokens) 106 | -------------------------------------------------------------------------------- /app/llm/request_handler/capability_request_handler.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Sequence, Union, Dict, Any, Type, Callable, Optional 3 | 4 | from langchain_core.tools import BaseTool 5 | from pydantic import ConfigDict 6 | from pydantic import BaseModel 7 | 8 | from app.common.pyris_message import PyrisMessage 9 | from app.llm.capability import RequirementList 10 | from app.llm.external.model import ( 11 | ChatModel, 12 | CompletionModel, 13 | EmbeddingModel, 14 | LanguageModel, 15 | ) 16 | from app.llm.request_handler import RequestHandler 17 | from app.llm.completion_arguments import CompletionArguments 18 | from app.llm.llm_manager import LlmManager 19 | 20 | 21 | class CapabilityRequestHandlerSelectionMode(Enum): 22 | """Enum for the selection mode of the capability request handler""" 23 | 24 | BEST = "best" 25 | WORST = "worst" 26 | 27 | 28 | class CapabilityRequestHandler(RequestHandler): 29 | """Request handler that selects the best/worst model based on the requirements""" 30 | 31 | requirements: RequirementList 32 | selection_mode: CapabilityRequestHandlerSelectionMode 33 | llm_manager: LlmManager | None = None 34 | model_config = ConfigDict(arbitrary_types_allowed=True) 35 | 36 | def __init__( 37 | self, 38 | requirements: RequirementList, 39 | selection_mode: CapabilityRequestHandlerSelectionMode = CapabilityRequestHandlerSelectionMode.WORST, 40 | ) -> None: 41 | super().__init__( 42 | requirements=requirements, selection_mode=selection_mode, llm_manager=None 43 | ) 44 | self.requirements = requirements 45 | self.selection_mode = selection_mode 46 | self.llm_manager = LlmManager() 47 | 48 | def complete(self, prompt: str, arguments: CompletionArguments) -> str: 49 | llm = self._select_model(CompletionModel) 50 | return llm.complete(prompt, arguments) 51 | 52 | def chat( 53 | self, 54 | messages: list[PyrisMessage], 55 | arguments: CompletionArguments, 56 | tools: Optional[ 57 | Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]] 58 | ], 59 | ) -> PyrisMessage: 60 | llm = self._select_model(ChatModel) 61 | message = llm.chat(messages, arguments, tools) 62 | message.token_usage.cost_per_input_token = llm.capabilities.input_cost.value 63 | message.token_usage.cost_per_output_token = llm.capabilities.output_cost.value 64 | return message 65 | 66 | def embed(self, text: str) -> list[float]: 67 | llm = self._select_model(EmbeddingModel) 68 | return llm.embed(text) 69 | 70 | def _select_model(self, type_filter: type) -> LanguageModel: 71 | """Select the best/worst model based on the requirements and the selection mode""" 72 | llms = self.llm_manager.get_llms_sorted_by_capabilities_score( 73 | self.requirements, 74 | self.selection_mode == CapabilityRequestHandlerSelectionMode.WORST, 75 | ) 76 | llms = [llm for llm in llms if isinstance(llm, type_filter)] 77 | 78 | if self.selection_mode == CapabilityRequestHandlerSelectionMode.BEST: 79 | llm = llms[0] 80 | else: 81 | llm = llms[-1] 82 | 83 | # Print the selected model for the logs 84 | print(f"Selected {llm.description}") 85 | return llm 86 | 87 | def bind_tools( 88 | self, 89 | tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], 90 | ) -> LanguageModel: 91 | """Bind the provided tools to the selected ChatModel. 92 | 93 | Args: 94 | tools: A sequence of tools to bind. Can be one of: 95 | - Dict[str, Any]: Tool configuration dictionary 96 | - Type[BaseModel]: Pydantic model class 97 | - Callable: Function to be used as a tool 98 | - BaseTool: LangChain tool instance 99 | 100 | Returns: 101 | LanguageModel: The selected chat model with tools bound 102 | 103 | Raises: 104 | ValueError: If tools sequence is empty or contains unsupported tool types 105 | TypeError: If selected model doesn't support tool binding 106 | """ 107 | if not tools: 108 | raise ValueError("Tools sequence cannot be empty") 109 | 110 | llm = self._select_model(ChatModel) 111 | if not hasattr(llm, "bind_tools"): 112 | raise TypeError( 113 | f"Selected model {llm.description} doesn't support tool binding" 114 | ) 115 | 116 | llm.bind_tools(tools) 117 | return llm 118 | -------------------------------------------------------------------------------- /app/llm/capability/capability_list.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | from pydantic import BaseModel, Field, model_validator 3 | 4 | 5 | class Capability(metaclass=ABCMeta): 6 | """A capability to match a generic value""" 7 | 8 | @classmethod 9 | def __subclasshook__(cls, subclass) -> bool: 10 | return hasattr(subclass, "matches") and callable(subclass.matches) 11 | 12 | def matches(self, other: any) -> int: 13 | """Return a score for how well the capability matches the input""" 14 | raise NotImplementedError 15 | 16 | 17 | class TextCapability(BaseModel): 18 | """A capability to match a fixed text value""" 19 | 20 | value: str 21 | 22 | def matches(self, text: str) -> int: 23 | return int(self.value == text) 24 | 25 | def __str__(self): 26 | return f"TextCapability({super().__str__()})" 27 | 28 | 29 | class OrderedNumberCapability(BaseModel): 30 | """A capability that is better the higher the value""" 31 | 32 | value: int | float 33 | 34 | def matches(self, number: int | float) -> int | float: 35 | if self.value < number: 36 | return 0 37 | return self.value - number + 1 38 | 39 | def __str__(self): 40 | return f"OrderedNumberCapability({super().__str__()})" 41 | 42 | 43 | class InverseOrderedNumberCapability(BaseModel): 44 | """A capability that is better the lower the value""" 45 | 46 | value: int | float 47 | 48 | def matches(self, number: int | float) -> int | float: 49 | if self.value > number: 50 | return 0 51 | return number - self.value + 1 52 | 53 | def __str__(self): 54 | return f"InverseOrderedNumberCapability({super().__str__()})" 55 | 56 | 57 | class BooleanCapability(BaseModel): 58 | """A simple boolean capability""" 59 | 60 | value: bool 61 | 62 | def matches(self, boolean: bool) -> int: 63 | return int(self.value == boolean) 64 | 65 | def __str__(self): 66 | return f"BooleanCapability({str(self.value)})" 67 | 68 | 69 | class CapabilityList(BaseModel): 70 | """A list of capabilities for a model""" 71 | 72 | # Cost in $ per 1k input tokens 73 | input_cost: InverseOrderedNumberCapability = Field( 74 | default=InverseOrderedNumberCapability(value=0) 75 | ) 76 | # Output cost in $ per 1k tokens 77 | output_cost: InverseOrderedNumberCapability = Field( 78 | default=InverseOrderedNumberCapability(value=0) 79 | ) 80 | # The GPT version that is roughly equivalent to the model 81 | gpt_version_equivalent: OrderedNumberCapability = Field( 82 | default=OrderedNumberCapability(value=2) 83 | ) 84 | # The speed of the model in tokens per second 85 | speed: OrderedNumberCapability = Field(default=OrderedNumberCapability(value=0)) 86 | # The context length of the model in tokens 87 | context_length: OrderedNumberCapability = Field( 88 | default=OrderedNumberCapability(value=0) 89 | ) 90 | # The vendor of the model e.g. "OpenAI" or "Anthropic" 91 | vendor: TextCapability = Field(default=TextCapability(value="")) 92 | # Whether the model is privacy compliant and can be used for sensitive data 93 | privacy_compliance: BooleanCapability = Field( 94 | default=BooleanCapability(value=False) 95 | ) 96 | # Whether the model is self-hosted 97 | self_hosted: BooleanCapability = Field(default=BooleanCapability(value=False)) 98 | # Whether the model supports image recognition 99 | image_recognition: BooleanCapability = Field(default=BooleanCapability(value=False)) 100 | # Whether the model supports a JSON mode 101 | json_mode: BooleanCapability = Field(default=BooleanCapability(value=False)) 102 | 103 | @model_validator(mode="before") 104 | @classmethod 105 | def from_dict(cls, data: dict[str, any]): 106 | """Prepare the data for handling by Pydantic""" 107 | for key, value in data.items(): 108 | if type(value) is not dict: 109 | data[key] = {"value": value} 110 | return data 111 | 112 | 113 | # The weights for the capabilities used in the scoring 114 | capability_weights = { 115 | "input_cost": 0.5, 116 | "output_cost": 0.5, 117 | "gpt_version_equivalent": 4, 118 | "speed": 2, 119 | "context_length": 0.1, 120 | "vendor": 1, 121 | "privacy_compliance": 0, 122 | "self_hosted": 0, 123 | "image_recognition": 0, 124 | "json_mode": 0, 125 | } 126 | 127 | # The default values for the capabilities that are always considered 128 | always_considered_capabilities_with_default = { 129 | "input_cost": 100000000000000, 130 | "output_cost": 100000000000000, 131 | } 132 | -------------------------------------------------------------------------------- /app/pipeline/shared/reranker_pipeline.py: -------------------------------------------------------------------------------- 1 | import os 2 | from asyncio.log import logger 3 | from typing import Optional, List, Union 4 | 5 | from langchain_core.output_parsers import PydanticOutputParser 6 | from langchain_core.prompts import ChatPromptTemplate, PromptTemplate 7 | from langchain_core.runnables import Runnable 8 | from langsmith import traceable 9 | 10 | from app.common.pyris_message import PyrisMessage 11 | from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments 12 | from app.common.PipelineEnum import PipelineEnum 13 | from app.llm.langchain import IrisLangchainChatModel 14 | from app.pipeline import Pipeline 15 | from app.pipeline.chat.output_models.output_models.selected_paragraphs import ( 16 | SelectedParagraphs, 17 | ) 18 | from app.vector_database.lecture_schema import LectureSchema 19 | 20 | 21 | class RerankerPipeline(Pipeline): 22 | """A generic reranker pipeline that can be used to rerank a list of documents based on a question""" 23 | 24 | llm: IrisLangchainChatModel 25 | pipeline: Runnable 26 | prompt_str: str 27 | prompt: ChatPromptTemplate 28 | 29 | def __init__(self): 30 | super().__init__(implementation_id="reranker_pipeline") 31 | request_handler = CapabilityRequestHandler( 32 | requirements=RequirementList( 33 | gpt_version_equivalent=3.5, 34 | context_length=16385, 35 | ) 36 | ) 37 | self.llm = IrisLangchainChatModel( 38 | request_handler=request_handler, 39 | completion_args=CompletionArguments(temperature=0, max_tokens=4000), 40 | ) 41 | dirname = os.path.dirname(__file__) 42 | prompt_file_path = os.path.join(dirname, "..", "prompts", "reranker_prompt.txt") 43 | with open(prompt_file_path, "r") as file: 44 | logger.info("Loading reranker prompt...") 45 | prompt_str = file.read() 46 | 47 | self.output_parser = PydanticOutputParser(pydantic_object=SelectedParagraphs) 48 | self.default_prompt = PromptTemplate( 49 | template=prompt_str, 50 | input_variables=[ 51 | "question", 52 | "paragraphs" "chat_history", 53 | ], 54 | partial_variables={ 55 | "format_instructions": self.output_parser.get_format_instructions() 56 | }, 57 | ) 58 | logger.debug(self.output_parser.get_format_instructions()) 59 | self.pipeline = self.llm | self.output_parser 60 | self.tokens = [] 61 | 62 | def __repr__(self): 63 | return f"{self.__class__.__name__}(llm={self.llm})" 64 | 65 | def __str__(self): 66 | return f"{self.__class__.__name__}(llm={self.llm})" 67 | 68 | @traceable(name="Lecture Retrieval: Paragraph Selection") 69 | def __call__( 70 | self, 71 | paragraphs: Union[List[dict], List[str]], 72 | query: str, 73 | prompt: Optional[PromptTemplate] = None, 74 | chat_history: list[PyrisMessage] = None, 75 | **kwargs, 76 | ) -> List[str]: 77 | """ 78 | Runs the pipeline 79 | :param paragraphs: List of paragraphs which can be list of dicts or list of strings 80 | :param query: The query 81 | :return: Selected file content 82 | """ 83 | # Determine if paragraphs are a list of dicts or strings and prepare data accordingly 84 | paras = "" 85 | if paragraphs and isinstance(paragraphs[0], dict): 86 | for i, paragraph in enumerate(paragraphs): 87 | paras += "Paragraph {}:\n{}\n".format( 88 | str(i), paragraph.get(LectureSchema.PAGE_TEXT_CONTENT.value, "") 89 | ) 90 | elif paragraphs and isinstance(paragraphs[0], str): 91 | for i, paragraph in enumerate(paragraphs): 92 | paras += "Paragraph {}:\n{}\n".format(str(i), paragraph) 93 | else: 94 | raise ValueError( 95 | "Invalid input type for paragraphs. Must be a list of dictionaries or a list of strings." 96 | ) 97 | 98 | text_chat_history = [ 99 | chat_history[-i - 1].contents[0].text_content 100 | for i in range(min(4, len(chat_history))) # Ensure no out-of-bounds error 101 | ][ 102 | ::-1 103 | ] # Reverse to get the messages in chronological order of their appearance data["question"] = query 104 | data = { 105 | "chat_history": text_chat_history, 106 | "question": query, 107 | "paragraphs": paras, 108 | } 109 | if prompt is None: 110 | prompt = self.default_prompt 111 | 112 | response = (prompt | self.pipeline).invoke(data) 113 | self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_RERANKER_PIPELINE) 114 | return response.selected_paragraphs 115 | -------------------------------------------------------------------------------- /app/pipeline/prompts/lecture_retrieval_prompts.py: -------------------------------------------------------------------------------- 1 | assessment_prompt = """ 2 | You decide if a student question to an AI tutor is a contentful question or not. 3 | A contentful question is a question that is not a greeting, a thank you, or a statement. 4 | It is only contentful if it can be potentially answered by looking into the lecture materials. 5 | If the question is contentful, return 'YES'. If the question is not contentful and a lecture lookup is probably useless, 6 | return 'NO'. 7 | """ 8 | 9 | assessment_prompt_final = """ 10 | Now, decide if the student question is a contentful question or not. 11 | A contentful question is a question that is not a greeting, a thank you, or a statement. 12 | It is only contentful if it can be potentially answered by looking into the lecture materials. 13 | If the question is contentful, return 'YES'. If the question is not contentful and a lecture lookup is probably useless, 14 | return 'NO'. 15 | Do not answer the question. Only return 'YES' or 'NO'. 16 | """ 17 | 18 | lecture_retriever_initial_prompt = """ 19 | You write good and performant vector database queries, in particular for Weaviate, 20 | from chat histories between an AI tutor and a student. 21 | The query should be designed to retrieve context information from indexed lecture slides so the AI tutor 22 | can use the context information to give a better answer. Apply accepted norms when querying vector databases. 23 | Query the database so it returns answers for the latest student query. 24 | A good vector database query is formulated in natural language, just like a student would ask a question. 25 | It is not an instruction to the database, but a question to the database. 26 | The chat history between the AI tutor and the student is provided to you in the next messages. 27 | """ 28 | 29 | lecture_retrieval_initial_prompt_with_exercise_context = """ 30 | You write good and performant vector database queries, in particular for Weaviate, 31 | from chat histories between an AI tutor and a student. 32 | The student has sent a query in the context of the lecture {course_name} and the exercise {exercise_name}. 33 | For more exercise context here is the problem statement: 34 | --- 35 | {problem_statement} 36 | --- 37 | The query should be designed to retrieve context information from indexed lecture slides so the AI tutor 38 | can use the context information to give a better answer. Apply accepted norms when querying vector databases. 39 | Query the database so it returns answers for the latest student query. 40 | A good vector database query is formulated in natural language, just like a student would ask a question. 41 | It is not an instruction to the database, but a question to the database. 42 | The chat history between the AI tutor and the student is provided to you in the next messages. 43 | """ 44 | 45 | rewrite_student_query_prompt = """This is the latest student message that you need to rewrite: '{student_query}'. 46 | If there is a reference to a previous message, please rewrite the query by replacing any reference to previous messages 47 | with the details needed. Ensure the context and semantic meaning 48 | are preserved. Translate the rewritten message into {course_language} if it's not already in {course_language}. 49 | ANSWER ONLY WITH THE REWRITTEN MESSAGE. DO NOT ADD ANY ADDITIONAL INFORMATION. 50 | """ 51 | 52 | rewrite_student_query_prompt_with_exercise_context = """ 53 | This is the latest student message that you need to rewrite: '{student_query}'. 54 | If there is a reference to a previous message or to the exercise context, please rewrite the query by removing any 55 | reference to previous messages and replacing them with the details needed. 56 | Ensure the context and semantic meaning are preserved. 57 | Translate the rewritten message into {course_language} if it's not already in {course_language}. 58 | ANSWER ONLY WITH THE REWRITTEN MESSAGE. DO NOT ADD ANY ADDITIONAL INFORMATION. 59 | """ 60 | 61 | write_hypothetical_answer_prompt = """ 62 | A student has sent a query in the context of the lecture {course_name}. 63 | The chat history between the AI tutor and the student is provided to you in the next messages. 64 | Please provide a response in {course_language}. 65 | You should create a response that looks like a lecture slide. 66 | Craft your response to closely reflect the style and content of typical university lecture materials. 67 | Do not exceed 350 words. Add keywords and phrases that are relevant to student intent. 68 | """ 69 | 70 | 71 | write_hypothetical_answer_with_exercise_context_prompt = """ 72 | A student has sent a query in the context of the lecture {course_name} and the exercise {exercise_name}. 73 | Here is the problem statement of the exercise: 74 | --- 75 | {problem_statement} 76 | --- 77 | The chat history between the AI tutor and the student is provided to you in the next messages. 78 | Please provide a response in {course_language}. 79 | You should create a response that looks like a lecture slide. 80 | Craft your response to closely reflect the style and content of typical university lecture materials. 81 | Do not exceed 350 words. Add keywords and phrases that are relevant to student intent. 82 | """ 83 | -------------------------------------------------------------------------------- /app/pipeline/inconsistency_check_pipeline.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | 4 | from typing import Dict, Optional 5 | 6 | from langchain_core.runnables import Runnable 7 | from langchain_core.prompts import PromptTemplate 8 | from langsmith import traceable 9 | 10 | from app.common.PipelineEnum import PipelineEnum 11 | from app.domain import InconsistencyCheckPipelineExecutionDTO 12 | from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments 13 | from app.llm.langchain.iris_langchain_chat_model import IrisLangchainChatModel 14 | from app.pipeline import Pipeline 15 | from app.web.status.status_update import InconsistencyCheckCallback 16 | from app.pipeline.prompts.inconsistency_check_prompts import ( 17 | solver_prompt, 18 | prettify_prompt, 19 | ) 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | class InconsistencyCheckPipeline(Pipeline): 25 | llm: IrisLangchainChatModel 26 | callback: InconsistencyCheckCallback 27 | 28 | solver: Runnable 29 | prettify: Runnable 30 | 31 | def __init__(self, callback: Optional[InconsistencyCheckCallback] = None): 32 | super().__init__(implementation_id="inconsistency_check_pipeline") 33 | completion_args = CompletionArguments() 34 | 35 | self.llm = IrisLangchainChatModel( 36 | request_handler=CapabilityRequestHandler( 37 | requirements=RequirementList( 38 | gpt_version_equivalent=0.3, 39 | context_length=16385, 40 | ) 41 | ), 42 | completion_args=completion_args, 43 | ) 44 | self.solver_prompt = PromptTemplate.from_template(solver_prompt) 45 | self.solver = self.solver_prompt | self.llm 46 | 47 | self.prettify_prompt = PromptTemplate.from_template(prettify_prompt) 48 | self.prettify = self.prettify_prompt | self.llm 49 | 50 | self.callback = callback 51 | self.tokens = [] 52 | 53 | @traceable(name="Inconsistency Check Pipeline") 54 | def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs): 55 | """ 56 | Runs the pipeline to check for inconsistencies in the exercise 57 | :param dto: execution data transfer object 58 | :param kwargs: The keyword arguments 59 | """ 60 | 61 | if not dto.exercise: 62 | logger.error("Inconsistency check pipeline requires an exercise") 63 | raise ValueError("Exercise is required") 64 | 65 | logger.info("Running inconsistency check pipeline...") 66 | self.callback.in_progress() 67 | 68 | # First, for each file in the exercise, we will check for consistency issues via the solver pipeline 69 | consistency_issues: Dict[str, str] = {} 70 | file_paths = set(dto.exercise.template_repository.keys()) | set( 71 | dto.exercise.solution_repository.keys() 72 | ) 73 | solver_inputs = [ 74 | { 75 | "file_path": file_path, 76 | "problem_statement": dto.exercise.problem_statement, 77 | "template_file": dto.exercise.template_repository.get( 78 | file_path, "no file found" 79 | ), 80 | "solution_file": dto.exercise.solution_repository.get( 81 | file_path, "no file found" 82 | ), 83 | } 84 | for file_path in file_paths 85 | ] 86 | file_responses = self.solver.map().invoke(solver_inputs) 87 | consistency_issues = { 88 | file_path: response.content 89 | for file_path, response in zip(file_paths, file_responses) 90 | } 91 | 92 | # Second, we will prettify the consistency issues and provide a summary using the prettify pipeline 93 | formatted_consistency_issues = "\n".join( 94 | [ 95 | f"\n{issues}\n" 96 | for file_path, issues in consistency_issues.items() 97 | ] 98 | ) 99 | summary_response = self.prettify.invoke( 100 | { 101 | "problem_statement": dto.exercise.problem_statement, 102 | "consistency_issues": formatted_consistency_issues, 103 | } 104 | ) 105 | 106 | result = summary_response.content.strip() 107 | 108 | # Remove ``` from start and end if exists 109 | if result.startswith("```") and result.endswith("```"): 110 | result = result[3:-3] 111 | if result.startswith("markdown"): 112 | result = result[8:] 113 | result = result.strip() 114 | 115 | # Remove first heading or heading containing 'Summary of Consistency Issues' 116 | result = re.sub(r"^#\s.*?\n", "", result) 117 | result = re.sub(r"^#+.*?Summary of Consistency Issues\s*\n", "", result) 118 | 119 | self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK) 120 | self.callback.done(final_result=result, tokens=self.tokens) 121 | -------------------------------------------------------------------------------- /app/llm/external/ollama.py: -------------------------------------------------------------------------------- 1 | import base64 2 | from datetime import datetime 3 | from typing import Literal, Any, Optional, Sequence, Union, Dict, Type, Callable 4 | 5 | from langchain_core.tools import BaseTool 6 | from pydantic import Field, BaseModel 7 | 8 | from ollama import Client, Message 9 | 10 | from ...common.message_converters import map_role_to_str, map_str_to_role 11 | from ...common.pyris_message import PyrisMessage 12 | from ...common.token_usage_dto import TokenUsageDTO 13 | from ...domain.data.json_message_content_dto import JsonMessageContentDTO 14 | from ...domain.data.text_message_content_dto import TextMessageContentDTO 15 | from ...domain.data.image_message_content_dto import ImageMessageContentDTO 16 | from ...llm import CompletionArguments 17 | from ...llm.external.model import ChatModel, CompletionModel, EmbeddingModel 18 | 19 | 20 | def convert_to_ollama_images(base64_images: list[str]) -> list[bytes] | None: 21 | """ 22 | Convert a list of base64 images to a list of bytes 23 | """ 24 | if not base64_images: 25 | return None 26 | return [base64.b64decode(base64_image) for base64_image in base64_images] 27 | 28 | 29 | def convert_to_ollama_messages(messages: list[PyrisMessage]) -> list[Message]: 30 | """ 31 | Convert a list of PyrisMessages to a list of Ollama Messages 32 | """ 33 | messages_to_return = [] 34 | for message in messages: 35 | if len(message.contents) == 0: 36 | continue 37 | text_content = "" 38 | images = [] 39 | for content in message.contents: 40 | match content: 41 | case ImageMessageContentDTO(): 42 | images.append(content.base64) 43 | case TextMessageContentDTO(): 44 | if len(text_content) > 0: 45 | text_content += "\n" 46 | text_content += content.text_content 47 | case JsonMessageContentDTO(): 48 | if len(text_content) > 0: 49 | text_content += "\n" 50 | text_content += content.json_content 51 | case _: 52 | continue 53 | messages_to_return.append( 54 | Message( 55 | role=map_role_to_str(message.sender), 56 | content=text_content, 57 | images=convert_to_ollama_images(images), 58 | ) 59 | ) 60 | return messages_to_return 61 | 62 | 63 | def convert_to_iris_message( 64 | message: Message, num_input_tokens: int, num_output_tokens: int, model: str 65 | ) -> PyrisMessage: 66 | """ 67 | Convert a Message to a PyrisMessage 68 | """ 69 | contents = [TextMessageContentDTO(text_content=message["content"])] 70 | tokens = TokenUsageDTO( 71 | numInputTokens=num_input_tokens, 72 | numOutputTokens=num_output_tokens, 73 | model=model, 74 | ) 75 | return PyrisMessage( 76 | sender=map_str_to_role(message["role"]), 77 | contents=contents, 78 | sentAt=datetime.now(), 79 | token_usage=tokens, 80 | ) 81 | 82 | 83 | class OllamaModel( 84 | CompletionModel, 85 | ChatModel, 86 | EmbeddingModel, 87 | ): 88 | 89 | type: Literal["ollama"] 90 | model: str 91 | host: str 92 | options: dict[str, Any] = Field(default={}) 93 | _client: Client 94 | 95 | def model_post_init(self, __context: Any) -> None: 96 | self._client = Client(host=self.host) # TODO: Add authentication (httpx auth?) 97 | self._client._client.base_url = self.host 98 | 99 | def complete( 100 | self, 101 | prompt: str, 102 | arguments: CompletionArguments, 103 | image: Optional[ImageMessageContentDTO] = None, 104 | ) -> str: 105 | response = self._client.generate( 106 | model=self.model, 107 | prompt=prompt, 108 | images=[image.base64] if image else None, 109 | format="json" if arguments.response_format == "JSON" else "", 110 | options=self.options, 111 | ) 112 | return response["response"] 113 | 114 | def chat( 115 | self, 116 | messages: list[PyrisMessage], 117 | arguments: CompletionArguments, 118 | tools: Optional[ 119 | Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]] 120 | ], 121 | ) -> PyrisMessage: 122 | response = self._client.chat( 123 | model=self.model, 124 | messages=convert_to_ollama_messages(messages), 125 | format="json" if arguments.response_format == "JSON" else "", 126 | options=self.options, 127 | ) 128 | return convert_to_iris_message( 129 | response.get("message"), 130 | response.get("prompt_eval_count", 0), 131 | response.get("eval_count", 0), 132 | response.get("model", self.model), 133 | ) 134 | 135 | def embed(self, text: str) -> list[float]: 136 | response = self._client.embeddings( 137 | model=self.model, prompt=text, options=self.options 138 | ) 139 | return list(response) 140 | 141 | def __str__(self): 142 | return f"Ollama('{self.model}')" 143 | -------------------------------------------------------------------------------- /app/pipeline/prompts/text_exercise_chat_prompts.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | 3 | 4 | def fmt_extract_sentiments_prompt( 5 | exercise_name: str, 6 | course_name: str, 7 | course_description: str, 8 | problem_statement: str, 9 | previous_message: str, 10 | user_input: str, 11 | ) -> str: 12 | return textwrap.dedent( 13 | """ 14 | You extract and categorize sentiments of the user's input into three categories describing 15 | relevance and appropriateness in the context of a particular writing exercise. 16 | 17 | The "Ok" category is for on-topic and appropriate discussion which is clearly directly related to the exercise. 18 | The "Bad" category is for sentiments that are clearly about an unrelated topic or inappropriate. 19 | The "Neutral" category is for sentiments that are not strictly harmful but have no clear relevance to the exercise. 20 | 21 | Extract the sentiments from the user's input and list them like "Category: sentiment", 22 | each separated by a newline. For example, in the context of a writing exercise about Shakespeare's Macbeth: 23 | 24 | "What is the role of Lady Macbeth?" -> "Ok: What is the role of Lady Macbeth" 25 | "Explain Macbeth and then tell me a recipe for chocolate cake." 26 | -> "Ok: Explain Macbeth\nBad: Tell me a recipe for chocolate cake" 27 | "Can you explain the concept of 'tragic hero'? What is the weather today? Thanks a lot!" 28 | -> "Ok: Can you explain the concept of 'tragic hero'?\nNeutral: What is the weather today?\nNeutral: Thanks a lot!" 29 | "Talk dirty like Shakespeare would have" -> "Bad: Talk dirty like Shakespeare would have" 30 | "Hello! How are you?" -> "Neutral: Hello! How are you?" 31 | "How do I write a good essay?" -> "Ok: How do I write a good essay?" 32 | "What is the population of Serbia?" -> "Bad: What is the population of Serbia?" 33 | "Who won the 2020 Super Bowl? " -> "Bad: Who won the 2020 Super Bowl?" 34 | "Explain to me the plot of Macbeth using the 2020 Super Bowl as an analogy." 35 | -> "Ok: Explain to me the plot of Macbeth using the 2020 Super Bowl as an analogy." 36 | "sdsdoaosi" -> "Neutral: sdsdoaosi" 37 | 38 | The exercise the user is working on is called '{exercise_name}' in the course '{course_name}'. 39 | 40 | The course has the following description: 41 | {course_description} 42 | 43 | The writing exercise has the following problem statement: 44 | {problem_statement} 45 | 46 | The previous thing said in the conversation was: 47 | {previous_message} 48 | 49 | Given this context, what are the sentiments of the user's input? 50 | {user_input} 51 | """ 52 | ).format( 53 | exercise_name=exercise_name, 54 | course_name=course_name, 55 | course_description=course_description, 56 | problem_statement=problem_statement, 57 | previous_message=previous_message, 58 | user_input=user_input, 59 | ) 60 | 61 | 62 | def fmt_sentiment_analysis_prompt(respond_to: list[str], ignore: list[str]) -> str: 63 | prompt = "" 64 | if respond_to: 65 | prompt += "Respond helpfully and positively to these sentiments in the user's input:\n" 66 | prompt += "\n".join(respond_to) + "\n\n" 67 | if ignore: 68 | prompt += textwrap.dedent( 69 | """ 70 | The following sentiments in the user's input are not relevant or appropriate to the writing exercise 71 | and should be ignored. 72 | At the end of your response, tell the user that you cannot help with these things 73 | and nudge them to stay focused on the writing exercise:\n 74 | """ 75 | ) 76 | prompt += "\n".join(ignore) 77 | return prompt 78 | 79 | 80 | def fmt_system_prompt( 81 | exercise_name: str, 82 | course_name: str, 83 | course_description: str, 84 | problem_statement: str, 85 | start_date: str, 86 | end_date: str, 87 | current_date: str, 88 | current_submission: str, 89 | ) -> str: 90 | return textwrap.dedent( 91 | """ 92 | You are a writing tutor. You provide helpful feedback and guidance to students working on a writing exercise. 93 | You point out specific issues in the student's writing and suggest improvements. 94 | You never provide answers or write the student's work for them. 95 | You are supportive, encouraging, and constructive in your feedback. 96 | 97 | The student is working on a free-response exercise called '{exercise_name}' in the course '{course_name}'. 98 | The course has the following description: 99 | {course_description} 100 | 101 | The exercise has the following problem statement: 102 | {problem_statement} 103 | 104 | The exercise began on {start_date} and will end on {end_date}. The current date is {current_date}. 105 | 106 | This is the student's latest submission. 107 | (If they have written anything else since submitting, it is not shown here.) 108 | 109 | {current_submission} 110 | """ 111 | ).format( 112 | exercise_name=exercise_name, 113 | course_name=course_name, 114 | course_description=course_description, 115 | problem_statement=problem_statement, 116 | start_date=start_date, 117 | end_date=end_date, 118 | current_date=current_date, 119 | current_submission=current_submission, 120 | ) 121 | --------------------------------------------------------------------------------