├── app
    ├── __init__.py
    ├── web
    │   ├── __init__.py
    │   ├── status
    │   │   ├── __init__.py
    │   │   ├── lecture_deletion_status_callback.py
    │   │   ├── ingestion_status_callback.py
    │   │   └── faq_ingestion_status_callback.py
    │   └── routers
    │   │   ├── __init__.py
    │   │   ├── health.py
    │   │   └── ingestion_status.py
    ├── domain
    │   ├── chat
    │   │   ├── __init__.py
    │   │   ├── course_chat
    │   │   │   ├── __init__.py
    │   │   │   ├── course_chat_status_update_dto.py
    │   │   │   └── course_chat_pipeline_execution_dto.py
    │   │   ├── exercise_chat
    │   │   │   ├── __init__.py
    │   │   │   ├── exercise_chat_status_update_dto.py
    │   │   │   └── exercise_chat_pipeline_execution_dto.py
    │   │   ├── lecture_chat
    │   │   │   ├── __init__.py
    │   │   │   └── lecture_chat_pipeline_execution_dto.py
    │   │   ├── chat_pipeline_execution_dto.py
    │   │   ├── interaction_suggestion_dto.py
    │   │   └── chat_pipeline_execution_base_data_dto.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── metrics
    │   │   │   ├── map_entry_dto.py
    │   │   │   ├── lecture_unit_information_dto.py
    │   │   │   ├── competency_progress_dto.py
    │   │   │   ├── exercise_student_metrics_dto.py
    │   │   │   ├── lecture_unit_student_metrics_dto.py
    │   │   │   ├── competency_jol_dto.py
    │   │   │   ├── competency_information_dto.py
    │   │   │   ├── competency_student_metrics_dto.py
    │   │   │   └── student_metrics_dto.py
    │   │   ├── course_dto.py
    │   │   ├── text_message_content_dto.py
    │   │   ├── json_message_content_dto.py
    │   │   ├── user_dto.py
    │   │   ├── image_message_content_dto.py
    │   │   ├── build_log_entry.py
    │   │   ├── feedback_dto.py
    │   │   ├── simple_submission_dto.py
    │   │   ├── tool_message_content_dto.py
    │   │   ├── faq_dto.py
    │   │   ├── result_dto.py
    │   │   ├── tool_call_dto.py
    │   │   ├── message_content_dto.py
    │   │   ├── text_exercise_dto.py
    │   │   ├── lecture_dto.py
    │   │   ├── lecture_unit_dto.py
    │   │   ├── programming_submission_dto.py
    │   │   ├── exam_dto.py
    │   │   ├── programming_exercise_dto.py
    │   │   ├── extended_course_dto.py
    │   │   ├── competency_dto.py
    │   │   └── exercise_with_submissions_dto.py
    │   ├── ingestion
    │   │   ├── __init__.py
    │   │   ├── ingestion_status_update_dto.py
    │   │   ├── deletionPipelineExecutionDto.py
    │   │   └── ingestion_pipeline_execution_dto.py
    │   ├── status
    │   │   ├── __init__.py
    │   │   ├── rewriting_status_update_dto.py
    │   │   ├── inconsistency_check_status_update_dto.py
    │   │   ├── text_exercise_chat_status_update_dto.py
    │   │   ├── stage_state_dto.py
    │   │   ├── competency_extraction_status_update_dto.py
    │   │   ├── stage_dto.py
    │   │   ├── status_update_dto.py
    │   │   └── lecture_chat_status_update_dto.py
    │   ├── feature_dto.py
    │   ├── error_response_dto.py
    │   ├── model_dto.py
    │   ├── rewriting_pipeline_execution_dto.py
    │   ├── event
    │   │   └── pyris_event_dto.py
    │   ├── inconsistency_check_pipeline_execution_dto.py
    │   ├── pipeline_execution_settings_dto.py
    │   ├── pipeline_execution_dto.py
    │   ├── text_exercise_chat_pipeline_execution_dto.py
    │   ├── competency_extraction_pipeline_execution_dto.py
    │   └── __init__.py
    ├── ingestion
    │   ├── __init__.py
    │   └── abstract_ingestion.py
    ├── pipeline
    │   ├── chat
    │   │   ├── __init__.py
    │   │   └── output_models
    │   │   │   ├── __init__.py
    │   │   │   └── output_models
    │   │   │       ├── __init__.py
    │   │   │       ├── selected_file_model.py
    │   │   │       └── selected_paragraphs.py
    │   ├── __init__.py
    │   ├── shared
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   ├── summary_pipeline.py
    │   │   └── reranker_pipeline.py
    │   ├── prompts
    │   │   ├── summary_prompt.txt
    │   │   ├── chat_gpt_wrapper_prompts.py
    │   │   ├── reranker_prompt.txt
    │   │   ├── faq_retrieval_prompts.py
    │   │   ├── content_image_interpretation_merge_prompt.txt
    │   │   ├── choose_response_prompt.txt
    │   │   ├── faq_rewriting.py
    │   │   ├── competency_extraction.py
    │   │   ├── code_feedback_prompt.txt
    │   │   ├── faq_citation_prompt.txt
    │   │   ├── citation_prompt.txt
    │   │   ├── inconsistency_check_prompts.py
    │   │   ├── rewriting_prompts.py
    │   │   ├── lecture_retrieval_prompts.py
    │   │   └── text_exercise_chat_prompts.py
    │   ├── pipeline.py
    │   ├── rewriting_pipeline.py
    │   ├── chat_gpt_wrapper_pipeline.py
    │   ├── competency_extraction_pipeline.py
    │   └── inconsistency_check_pipeline.py
    ├── common
    │   ├── __init__.py
    │   ├── singleton.py
    │   ├── token_usage_dto.py
    │   ├── PipelineEnum.py
    │   ├── custom_exceptions.py
    │   └── pyris_message.py
    ├── vector_database
    │   ├── __init__.py
    │   ├── database.py
    │   └── faq_schema.py
    ├── llm
    │   ├── __init__.py
    │   ├── capability
    │   │   ├── __init__.py
    │   │   ├── requirement_list.py
    │   │   ├── capability_checker.py
    │   │   └── capability_list.py
    │   ├── langchain
    │   │   ├── __init__.py
    │   │   ├── iris_langchain_embedding_model.py
    │   │   ├── iris_langchain_completion_model.py
    │   │   └── iris_langchain_chat_model.py
    │   ├── request_handler
    │   │   ├── __init__.py
    │   │   ├── request_handler_interface.py
    │   │   ├── basic_request_handler.py
    │   │   └── capability_request_handler.py
    │   ├── completion_arguments.py
    │   ├── external
    │   │   ├── __init__.py
    │   │   ├── openai_completion.py
    │   │   ├── openai_dalle.py
    │   │   ├── openai_embeddings.py
    │   │   ├── model.py
    │   │   └── ollama.py
    │   └── llm_manager.py
    ├── dependencies.py
    ├── sentry.py
    ├── config.py
    ├── retrieval
    │   ├── faq_retrieval_utils.py
    │   └── faq_retrieval.py
    └── main.py
├── .github
    ├── CODEOWNERS
    ├── workflows
    │   ├── pullrequest-labeler.yml
    │   ├── lint.yml
    │   ├── build.yml
    │   └── deploy.yml
    ├── dependabot.yml
    └── labeler.yml
├── docker
    ├── nginx
    │   ├── 70-pyris-setup.sh
    │   ├── timeouts.conf
    │   ├── dhparam.pem
    │   ├── pyris-server.conf
    │   ├── pyris-nginx.conf
    │   ├── certs
    │   │   ├── pyris-nginx+4.pem
    │   │   └── pyris-nginx+4-key.pem
    │   └── nginx_502.html
    ├── weaviate
    │   └── default.env
    ├── weaviate.yml
    ├── pyris.yml
    ├── pyris-dev.yml
    ├── pyris-production-internal.yml
    ├── nginx.yml
    └── pyris-production.yml
├── application.example.yml
├── .flake8
├── .pre-commit-config.yaml
├── requirements.txt
├── Dockerfile
├── .whitesource
├── log_conf.yml
├── LICENSE
├── llm_config.example.yml
└── .gitignore


/app/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/web/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/domain/chat/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/domain/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/ingestion/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/web/status/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | *    @bassner


--------------------------------------------------------------------------------
/app/domain/ingestion/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/domain/status/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/pipeline/chat/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/domain/chat/course_chat/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/domain/chat/exercise_chat/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/domain/chat/lecture_chat/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/pipeline/chat/output_models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/pipeline/chat/output_models/output_models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/common/__init__.py:
--------------------------------------------------------------------------------
1 | from app.common.singleton import Singleton
2 | 


--------------------------------------------------------------------------------
/app/pipeline/__init__.py:
--------------------------------------------------------------------------------
1 | from app.pipeline.pipeline import Pipeline
2 | 


--------------------------------------------------------------------------------
/app/pipeline/shared/__init__.py:
--------------------------------------------------------------------------------
1 | from ...pipeline.shared.summary_pipeline import SummaryPipeline
2 | 


--------------------------------------------------------------------------------
/app/pipeline/prompts/summary_prompt.txt:
--------------------------------------------------------------------------------
1 | Write a concise summary of the following:
2 | "{text}"
3 | CONCISE SUMMARY:


--------------------------------------------------------------------------------
/app/vector_database/__init__.py:
--------------------------------------------------------------------------------
1 | import app.vector_database.database
2 | import app.vector_database.lecture_schema
3 | 


--------------------------------------------------------------------------------
/docker/nginx/70-pyris-setup.sh:
--------------------------------------------------------------------------------
1 | # disable default.conf
2 | mv  /etc/nginx/conf.d/default.conf /etc/nginx/conf.d/default.conf.disabled || true
3 | 


--------------------------------------------------------------------------------
/docker/nginx/timeouts.conf:
--------------------------------------------------------------------------------
1 | proxy_send_timeout 900s;
2 | proxy_read_timeout 900s;
3 | fastcgi_send_timeout 900s;
4 | fastcgi_read_timeout 900s;
5 | 


--------------------------------------------------------------------------------
/app/domain/feature_dto.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel
2 | 
3 | 
4 | class FeatureDTO(BaseModel):
5 |     id: str
6 |     name: str
7 |     description: str
8 | 


--------------------------------------------------------------------------------
/app/domain/error_response_dto.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel, Field
2 | 
3 | 
4 | class IrisErrorResponseDTO(BaseModel):
5 |     error_message: str = Field(alias="errorMessage")
6 | 


--------------------------------------------------------------------------------
/application.example.yml:
--------------------------------------------------------------------------------
 1 | api_keys:
 2 |   - token: "secret"
 3 | 
 4 | weaviate:
 5 |   host: "localhost"
 6 |   port: "8001"
 7 |   grpc_port: "50051"
 8 | 
 9 | env_vars:
10 |   SOME: 'value'


--------------------------------------------------------------------------------
/app/domain/status/rewriting_status_update_dto.py:
--------------------------------------------------------------------------------
1 | from app.domain.status.status_update_dto import StatusUpdateDTO
2 | 
3 | 
4 | class RewritingStatusUpdateDTO(StatusUpdateDTO):
5 |     result: str = ""
6 | 


--------------------------------------------------------------------------------
/app/web/routers/__init__.py:
--------------------------------------------------------------------------------
1 | from ..routers.health import router as health_router
2 | from ..routers.pipelines import router as pipelines_router
3 | from ..routers.webhooks import router as webhooks_router
4 | 


--------------------------------------------------------------------------------
/app/domain/data/metrics/map_entry_dto.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 | from pydantic import BaseModel
3 | 
4 | 
5 | class MapEntryDTO(BaseModel):
6 |     key: Optional[int] = None
7 |     value: Optional[int] = None
8 | 


--------------------------------------------------------------------------------
/app/domain/status/inconsistency_check_status_update_dto.py:
--------------------------------------------------------------------------------
1 | from app.domain.status.status_update_dto import StatusUpdateDTO
2 | 
3 | 
4 | class InconsistencyCheckStatusUpdateDTO(StatusUpdateDTO):
5 |     result: str = ""
6 | 


--------------------------------------------------------------------------------
/app/domain/model_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class PyrisModelDTO(BaseModel):
 7 |     id: str
 8 |     name: str
 9 |     description: Optional[str] = None
10 | 


--------------------------------------------------------------------------------
/app/llm/__init__.py:
--------------------------------------------------------------------------------
1 | from app.llm.completion_arguments import *
2 | from app.llm.external import *
3 | from app.llm.capability import *
4 | from app.llm.request_handler import *
5 | from app.llm.capability import RequirementList
6 | 


--------------------------------------------------------------------------------
/app/pipeline/prompts/chat_gpt_wrapper_prompts.py:
--------------------------------------------------------------------------------
1 | chat_gpt_initial_system_prompt = """
2 | You are a helpful, smart, kind, and efficient AI assistant.
3 | You always fulfill the user's requests to the best of your ability.
4 | """
5 | 


--------------------------------------------------------------------------------
/app/llm/capability/__init__.py:
--------------------------------------------------------------------------------
1 | from ..capability.capability_list import CapabilityList
2 | from ..capability.requirement_list import RequirementList
3 | from ..capability.capability_checker import capabilities_fulfill_requirements
4 | 


--------------------------------------------------------------------------------
/app/domain/data/course_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class CourseDTO(BaseModel):
 7 |     id: int
 8 |     name: Optional[str]
 9 |     description: Optional[str] = Field(None)
10 | 


--------------------------------------------------------------------------------
/app/domain/status/text_exercise_chat_status_update_dto.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 | 
3 | from app.domain.status.status_update_dto import StatusUpdateDTO
4 | 
5 | 
6 | class TextExerciseChatStatusUpdateDTO(StatusUpdateDTO):
7 |     result: Optional[str]
8 | 


--------------------------------------------------------------------------------
/app/domain/data/text_message_content_dto.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel, ConfigDict, Field
2 | 
3 | 
4 | class TextMessageContentDTO(BaseModel):
5 |     model_config = ConfigDict(populate_by_name=True)
6 | 
7 |     text_content: str = Field(alias="textContent")
8 | 


--------------------------------------------------------------------------------
/app/domain/status/stage_state_dto.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class StageStateEnum(str, Enum):
 5 |     NOT_STARTED = "NOT_STARTED"
 6 |     IN_PROGRESS = "IN_PROGRESS"
 7 |     DONE = "DONE"
 8 |     SKIPPED = "SKIPPED"
 9 |     ERROR = "ERROR"
10 | 


--------------------------------------------------------------------------------
/app/domain/chat/lecture_chat/lecture_chat_pipeline_execution_dto.py:
--------------------------------------------------------------------------------
1 | from app.domain import ChatPipelineExecutionDTO
2 | from app.domain.data.course_dto import CourseDTO
3 | 
4 | 
5 | class LectureChatPipelineExecutionDTO(ChatPipelineExecutionDTO):
6 |     course: CourseDTO
7 | 


--------------------------------------------------------------------------------
/app/domain/ingestion/ingestion_status_update_dto.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 | 
3 | from ...domain.status.status_update_dto import StatusUpdateDTO
4 | 
5 | 
6 | class IngestionStatusUpdateDTO(StatusUpdateDTO):
7 |     result: Optional[str] = None
8 |     id: Optional[int] = None
9 | 


--------------------------------------------------------------------------------
/app/common/singleton.py:
--------------------------------------------------------------------------------
1 | class Singleton(type):
2 |     _instances = {}
3 | 
4 |     def __call__(cls, *args, **kwargs):
5 |         if cls not in cls._instances:
6 |             cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
7 |         return cls._instances[cls]
8 | 


--------------------------------------------------------------------------------
/app/domain/rewriting_pipeline_execution_dto.py:
--------------------------------------------------------------------------------
1 | from pydantic import Field, BaseModel
2 | from . import PipelineExecutionDTO
3 | 
4 | 
5 | class RewritingPipelineExecutionDTO(BaseModel):
6 |     execution: PipelineExecutionDTO
7 |     to_be_rewritten: str = Field(alias="toBeRewritten")
8 | 


--------------------------------------------------------------------------------
/.github/workflows/pullrequest-labeler.yml:
--------------------------------------------------------------------------------
 1 | name: Pull Request Labeler
 2 | on: pull_request_target
 3 | 
 4 | jobs:
 5 |   label:
 6 |     runs-on: ubuntu-latest
 7 |     steps:
 8 |       - uses: actions/labeler@v5
 9 |         with:
10 |           repo-token: "${{ secrets.GITHUB_TOKEN }}"
11 | 


--------------------------------------------------------------------------------
/app/domain/status/competency_extraction_status_update_dto.py:
--------------------------------------------------------------------------------
1 | from app.domain.data.competency_dto import Competency
2 | from app.domain.status.status_update_dto import StatusUpdateDTO
3 | 
4 | 
5 | class CompetencyExtractionStatusUpdateDTO(StatusUpdateDTO):
6 |     result: list[Competency] = []
7 | 


--------------------------------------------------------------------------------
/app/domain/chat/course_chat/course_chat_status_update_dto.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, List
2 | 
3 | from app.domain.status.status_update_dto import StatusUpdateDTO
4 | 
5 | 
6 | class CourseChatStatusUpdateDTO(StatusUpdateDTO):
7 |     result: Optional[str] = None
8 |     suggestions: List[str] = []
9 | 


--------------------------------------------------------------------------------
/app/domain/data/json_message_content_dto.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel, ConfigDict, Field, Json
2 | from typing import Any
3 | 
4 | 
5 | class JsonMessageContentDTO(BaseModel):
6 |     model_config = ConfigDict(populate_by_name=True)
7 | 
8 |     json_content: Json[Any] = Field(alias="jsonContent")
9 | 


--------------------------------------------------------------------------------
/app/domain/chat/exercise_chat/exercise_chat_status_update_dto.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, List
2 | 
3 | from app.domain.status.status_update_dto import StatusUpdateDTO
4 | 
5 | 
6 | class ExerciseChatStatusUpdateDTO(StatusUpdateDTO):
7 |     result: Optional[str] = None
8 |     suggestions: List[str] = []
9 | 


--------------------------------------------------------------------------------
/app/llm/langchain/__init__.py:
--------------------------------------------------------------------------------
1 | from ...llm.langchain.iris_langchain_completion_model import (
2 |     IrisLangchainCompletionModel,
3 | )
4 | from ...llm.langchain.iris_langchain_chat_model import IrisLangchainChatModel
5 | from ...llm.langchain.iris_langchain_embedding_model import IrisLangchainEmbeddingModel
6 | 


--------------------------------------------------------------------------------
/app/domain/data/user_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class UserDTO(BaseModel):
 7 |     id: int
 8 |     first_name: Optional[str] = Field(alias="firstName", default=None)
 9 |     last_name: Optional[str] = Field(alias="lastName", default=None)
10 | 


--------------------------------------------------------------------------------
/app/domain/data/image_message_content_dto.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel, Field, ConfigDict
2 | from typing import Optional
3 | 
4 | 
5 | class ImageMessageContentDTO(BaseModel):
6 |     base64: str = Field(..., alias="pdfFile")
7 |     prompt: Optional[str] = None
8 |     model_config = ConfigDict(populate_by_name=True)
9 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | max-line-length = 120
 3 | exclude =
 4 |     .git,
 5 |     __pycache__,
 6 |     .idea
 7 | per-file-ignores =
 8 |     # imported but unused
 9 |     __init__.py: F401, F403
10 |     open_ai_chat_wrapper.py: F811
11 |     open_ai_completion_wrapper.py: F811
12 |     open_ai_embedding_wrapper.py: F811
13 | 
14 | 


--------------------------------------------------------------------------------
/app/llm/request_handler/__init__.py:
--------------------------------------------------------------------------------
1 | from ..request_handler.request_handler_interface import RequestHandler
2 | from ..request_handler.basic_request_handler import BasicRequestHandler
3 | 
4 | from ..request_handler.capability_request_handler import (
5 |     CapabilityRequestHandler,
6 |     CapabilityRequestHandlerSelectionMode,
7 | )
8 | 


--------------------------------------------------------------------------------
/docker/weaviate/default.env:
--------------------------------------------------------------------------------
 1 | QUERY_DEFAULTS_LIMIT=25
 2 | AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true
 3 | PERSISTENCE_DATA_PATH=/var/lib/weaviate
 4 | DEFAULT_VECTORIZER_MODULE=none
 5 | ENABLE_MODULES=
 6 | CLUSTER_HOSTNAME=pyris
 7 | LIMIT_RESOURCES=true
 8 | DISK_USE_WARNING_PERCENTAGE=80
 9 | vectorCacheMaxObjects=1000000
10 | 
11 | 


--------------------------------------------------------------------------------
/app/domain/status/stage_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | from app.domain.status.stage_state_dto import StageStateEnum
 6 | 
 7 | 
 8 | class StageDTO(BaseModel):
 9 |     name: Optional[str] = None
10 |     weight: int
11 |     state: StageStateEnum
12 |     message: Optional[str] = None
13 | 


--------------------------------------------------------------------------------
/app/domain/status/status_update_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | from app.common.token_usage_dto import TokenUsageDTO
 6 | from ...domain.status.stage_dto import StageDTO
 7 | 
 8 | 
 9 | class StatusUpdateDTO(BaseModel):
10 |     stages: List[StageDTO]
11 |     tokens: List[TokenUsageDTO] = []
12 | 


--------------------------------------------------------------------------------
/app/domain/event/pyris_event_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import TypeVar, Generic, Optional
 2 | 
 3 | from pydantic import Field, BaseModel
 4 | 
 5 | T = TypeVar("T")
 6 | 
 7 | 
 8 | class PyrisEventDTO(BaseModel, Generic[T]):
 9 |     event_type: Optional[str] = Field(default=None, alias="eventType")
10 |     event: Optional[T] = Field(default=None, alias="event")
11 | 


--------------------------------------------------------------------------------
/app/domain/inconsistency_check_pipeline_execution_dto.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | 
 3 | from . import PipelineExecutionDTO
 4 | from .data.programming_exercise_dto import ProgrammingExerciseDTO
 5 | 
 6 | 
 7 | class InconsistencyCheckPipelineExecutionDTO(BaseModel):
 8 |     execution: PipelineExecutionDTO
 9 |     exercise: ProgrammingExerciseDTO
10 | 


--------------------------------------------------------------------------------
/app/domain/data/build_log_entry.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from typing import Optional
 3 | 
 4 | from pydantic import BaseModel
 5 | 
 6 | 
 7 | class BuildLogEntryDTO(BaseModel):
 8 |     timestamp: Optional[datetime] = None
 9 |     message: Optional[str] = None
10 | 
11 |     def __str__(self):
12 |         return f"{self.timestamp}: {self.message}"
13 | 


--------------------------------------------------------------------------------
/app/pipeline/chat/output_models/output_models/selected_file_model.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from pydantic.v1 import BaseModel as V1BaseModel, Field as V1Field
 4 | 
 5 | 
 6 | class SelectedFiles(V1BaseModel):
 7 |     selected_files: List[str] = V1Field(
 8 |         description="List of selected files from the repository. Minimum 0 files, maximum 5 files."
 9 |     )
10 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "pip"
 4 |     directory: "/"
 5 |     schedule:
 6 |       interval: "weekly"
 7 |     groups:
 8 |       python-deps:
 9 |         applies-to: version-updates
10 |         patterns:
11 |           - "*"
12 |   - package-ecosystem: "github-actions"
13 |     directory: "/"
14 |     schedule:
15 |       interval: "weekly"
16 | 


--------------------------------------------------------------------------------
/app/domain/data/feedback_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class FeedbackDTO(BaseModel):
 7 |     text: Optional[str] = None
 8 |     test_case_name: Optional[str] = Field(alias="testCaseName", default=None)
 9 |     credits: float
10 | 
11 |     def __str__(self):
12 |         return f"{self.test_case_name}: {self.text} ({self.credits} credits)"
13 | 


--------------------------------------------------------------------------------
/app/domain/data/simple_submission_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | from datetime import datetime
 6 | 
 7 | 
 8 | class SimpleSubmissionDTO(BaseModel):
 9 |     timestamp: Optional[datetime] = Field(alias="timestamp", default=None)
10 |     score: Optional[float] = Field(alias="score", default=0)
11 | 
12 |     class Config:
13 |         require_by_default = False
14 | 


--------------------------------------------------------------------------------
/app/domain/data/tool_message_content_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from pydantic import BaseModel, ConfigDict, Field
 4 | 
 5 | 
 6 | class ToolMessageContentDTO(BaseModel):
 7 | 
 8 |     model_config = ConfigDict(populate_by_name=True)
 9 |     name: Optional[str] = Field(alias="toolName", default="")
10 |     tool_content: str = Field(alias="toolContent")
11 |     tool_call_id: str = Field(alias="toolCallId")
12 | 


--------------------------------------------------------------------------------
/app/domain/pipeline_execution_settings_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class PipelineExecutionSettingsDTO(BaseModel):
 7 |     authentication_token: str = Field(alias="authenticationToken")
 8 |     allowed_model_identifiers: List[str] = Field(
 9 |         default=[], alias="allowedModelIdentifiers"
10 |     )
11 |     artemis_base_url: str = Field(alias="artemisBaseUrl")
12 | 


--------------------------------------------------------------------------------
/app/pipeline/chat/output_models/output_models/selected_paragraphs.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from pydantic import Field, BaseModel
 4 | 
 5 | 
 6 | class SelectedParagraphs(BaseModel):
 7 |     selected_paragraphs: List[int] = Field(
 8 |         default=[],
 9 |         description="List of paragraphs sorted from most relevant to least relevant to the student question, "
10 |         "each with a relevance score.",
11 |     )
12 | 


--------------------------------------------------------------------------------
/app/domain/chat/chat_pipeline_execution_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from pydantic import Field
 4 | 
 5 | from app.domain import PipelineExecutionDTO
 6 | from app.common.pyris_message import PyrisMessage
 7 | from app.domain.data.user_dto import UserDTO
 8 | 
 9 | 
10 | class ChatPipelineExecutionDTO(PipelineExecutionDTO):
11 |     chat_history: List[PyrisMessage] = Field(alias="chatHistory", default=[])
12 |     user: Optional[UserDTO]
13 | 


--------------------------------------------------------------------------------
/app/web/routers/health.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, status, Response, Depends
 2 | 
 3 | from app.dependencies import TokenValidator
 4 | 
 5 | router = APIRouter(prefix="/api/v1/health", tags=["health"])
 6 | 
 7 | 
 8 | @router.get(
 9 |     "/",
10 |     dependencies=[Depends(TokenValidator())],
11 | )
12 | def health_check():
13 |     return Response(
14 |         status_code=status.HTTP_200_OK, content=b"[]", media_type="application/json"
15 |     )
16 | 


--------------------------------------------------------------------------------
/app/domain/data/faq_dto.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | 
 3 | 
 4 | class FaqDTO(BaseModel):
 5 |     faq_id: int = Field(alias="faqId")
 6 |     course_id: int = Field(alias="courseId")
 7 |     question_title: str = Field(alias="questionTitle")
 8 |     question_answer: str = Field(alias="questionAnswer")
 9 |     course_name: str = Field(default="", alias="courseName")
10 |     course_description: str = Field(default="", alias="courseDescription")
11 | 


--------------------------------------------------------------------------------
/app/domain/data/metrics/lecture_unit_information_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from pydantic import BaseModel, Field
 3 | from datetime import datetime
 4 | 
 5 | 
 6 | class LectureUnitInformationDTO(BaseModel):
 7 |     id: Optional[int] = None
 8 |     name: Optional[str] = None
 9 |     release_date: Optional[datetime] = Field(None, alias="releaseDate")
10 |     type: Optional[str] = None
11 | 
12 |     class Config:
13 |         populate_by_name = True
14 | 


--------------------------------------------------------------------------------
/app/ingestion/abstract_ingestion.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import List, Dict
 3 | 
 4 | 
 5 | class AbstractIngestion(ABC):
 6 |     """
 7 |     Abstract class for ingesting repositories into a database.
 8 |     """
 9 | 
10 |     @abstractmethod
11 |     def chunk_data(self, path: str) -> List[Dict[str, str]]:
12 |         """
13 |         Abstract method to chunk code files in the root directory.
14 |         """
15 |         pass
16 | 


--------------------------------------------------------------------------------
/app/domain/data/metrics/competency_progress_dto.py:
--------------------------------------------------------------------------------
 1 | # app/domain/data/metrics/competency_progress_dto.py
 2 | 
 3 | from typing import Optional
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class CompetencyProgressDTO(BaseModel):
 8 |     competency_id: Optional[int] = Field(None, alias="competencyId")
 9 |     progress: Optional[float] = None
10 | 
11 |     confidence: Optional[float] = None
12 | 
13 |     class Config:
14 |         populate_by_name = True
15 | 


--------------------------------------------------------------------------------
/docker/weaviate.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | services:
 3 |   weaviate:
 4 |     command:
 5 |     - --host
 6 |     - 0.0.0.0
 7 |     - --port
 8 |     - '8001'
 9 |     - --scheme
10 |     - http
11 |     image: cr.weaviate.io/semitechnologies/weaviate:1.25.3
12 |     expose:
13 |       - 8001
14 |       - 50051
15 |     volumes:
16 |       - ${WEAVIATE_VOLUME_MOUNT:-./.docker-data/weaviate-data}:/var/lib/weaviate
17 |     restart: on-failure:3
18 |     env_file:
19 |       - ./weaviate/default.env


--------------------------------------------------------------------------------
/app/domain/data/result_dto.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from typing import List, Optional
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | from ...domain.data.feedback_dto import FeedbackDTO
 7 | 
 8 | 
 9 | class ResultDTO(BaseModel):
10 |     completion_date: Optional[datetime] = Field(alias="completionDate", default=None)
11 |     successful: bool = Field(alias="successful", default=False)
12 |     feedbacks: List[FeedbackDTO] = Field(alias="feedbacks", default=[])
13 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 |     # See https://pre-commit.com for more information
 2 |     # See https://pre-commit.com/hooks.html for more hooks
 3 |     repos:
 4 |       - repo: https://github.com/ambv/black
 5 |         rev: stable
 6 |         hooks:
 7 |           - id: black
 8 |             language_version: python3.12
 9 |       - repo: https://github.com/pre-commit/pre-commit-hooks
10 |         rev: v2.0.0
11 |         hooks:
12 |           - id: flake8
13 |             language_version: python3.12
14 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | black==24.10.0
 2 | fastapi==0.115.5
 3 | flake8==7.1.1
 4 | langchain==0.3.8
 5 | ollama==0.3.3
 6 | openai==1.54.4
 7 | pre-commit==4.0.1
 8 | psutil==6.1.0
 9 | pydantic==2.9.2
10 | PyMuPDF==1.24.13
11 | pytz==2024.1
12 | PyYAML==6.0.2
13 | requests==2.32.3
14 | sentry-sdk[starlette,fastapi,openai]==2.13.0
15 | unstructured==0.16.5
16 | uvicorn==0.32.0
17 | weaviate-client==4.9.3
18 | langchain-core~=0.3.17
19 | starlette~=0.41.2
20 | langsmith~=0.1.142
21 | langchain-text-splitters~=0.3.2


--------------------------------------------------------------------------------
/app/domain/chat/interaction_suggestion_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, List
 2 | 
 3 | from pydantic import Field, BaseModel
 4 | 
 5 | from app.common.pyris_message import PyrisMessage
 6 | 
 7 | 
 8 | class InteractionSuggestionPipelineExecutionDTO(BaseModel):
 9 |     chat_history: List[PyrisMessage] = Field(alias="chatHistory", default=[])
10 |     last_message: Optional[str] = Field(alias="lastMessage", default=None)
11 |     problem_statement: Optional[str] = Field(alias="problemStatement", default=None)
12 | 


--------------------------------------------------------------------------------
/app/domain/data/tool_call_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal, Any
 2 | 
 3 | from pydantic import BaseModel, ConfigDict, Field, Json
 4 | 
 5 | 
 6 | class FunctionDTO(BaseModel):
 7 |     name: str = Field(..., alias="name")
 8 |     arguments: Json[Any] = Field(..., alias="arguments")
 9 | 
10 | 
11 | class ToolCallDTO(BaseModel):
12 | 
13 |     model_config = ConfigDict(populate_by_name=True)
14 |     id: str = Field(alias="id")
15 |     type: Literal["function"] = "function"
16 |     function: FunctionDTO = Field(alias="function")
17 | 


--------------------------------------------------------------------------------
/app/domain/data/message_content_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | from .tool_message_content_dto import ToolMessageContentDTO
 4 | from ...domain.data.image_message_content_dto import ImageMessageContentDTO
 5 | from ...domain.data.json_message_content_dto import JsonMessageContentDTO
 6 | from ...domain.data.text_message_content_dto import TextMessageContentDTO
 7 | 
 8 | MessageContentDTO = Union[
 9 |     TextMessageContentDTO,
10 |     ImageMessageContentDTO,
11 |     JsonMessageContentDTO,
12 |     ToolMessageContentDTO,
13 | ]
14 | 


--------------------------------------------------------------------------------
/app/domain/data/text_exercise_dto.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from typing import Optional
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | from app.domain.data.course_dto import CourseDTO
 7 | 
 8 | 
 9 | class TextExerciseDTO(BaseModel):
10 |     id: int
11 |     title: str
12 |     course: CourseDTO
13 |     problem_statement: str = Field(alias="problemStatement")
14 |     start_date: Optional[datetime] = Field(alias="startDate", default=None)
15 |     end_date: Optional[datetime] = Field(alias="endDate", default=None)
16 | 


--------------------------------------------------------------------------------
/app/domain/data/metrics/exercise_student_metrics_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Set
 2 | from pydantic import BaseModel, Field
 3 | 
 4 | 
 5 | class ExerciseStudentMetricsDTO(BaseModel):
 6 |     average_score: Dict[int, float] = Field({}, alias="averageScore")
 7 |     score: Dict[int, float] = Field({})
 8 |     average_latest_submission: Dict[int, float] = Field(
 9 |         {}, alias="averageLatestSubmission"
10 |     )
11 |     latest_submission: Dict[int, float] = Field({}, alias="latestSubmission")
12 |     completed: Set[int] = Field({})
13 | 


--------------------------------------------------------------------------------
/app/domain/pipeline_execution_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | from app.domain.pipeline_execution_settings_dto import PipelineExecutionSettingsDTO
 6 | from app.domain.status.stage_dto import StageDTO
 7 | 
 8 | 
 9 | class PipelineExecutionDTO(BaseModel):
10 |     settings: Optional[PipelineExecutionSettingsDTO]
11 |     initial_stages: Optional[list[StageDTO]] = Field(
12 |         default=None, alias="initialStages"
13 |     )
14 | 
15 |     class Config:
16 |         populate_by_name = True
17 | 


--------------------------------------------------------------------------------
/app/domain/text_exercise_chat_pipeline_execution_dto.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | 
 3 | from app.common.pyris_message import PyrisMessage
 4 | from app.domain import PipelineExecutionDTO
 5 | from app.domain.data.text_exercise_dto import TextExerciseDTO
 6 | 
 7 | 
 8 | class TextExerciseChatPipelineExecutionDTO(BaseModel):
 9 |     execution: PipelineExecutionDTO
10 |     exercise: TextExerciseDTO
11 |     conversation: list[PyrisMessage] = Field(default=[])
12 |     current_submission: str = Field(alias="currentSubmission", default="")
13 | 


--------------------------------------------------------------------------------
/app/domain/data/metrics/lecture_unit_student_metrics_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Set, Optional
 2 | from pydantic import BaseModel, Field
 3 | from app.domain.data.metrics.lecture_unit_information_dto import (
 4 |     LectureUnitInformationDTO,
 5 | )
 6 | 
 7 | 
 8 | class LectureUnitStudentMetricsDTO(BaseModel):
 9 |     lecture_unit_information: Dict[int, LectureUnitInformationDTO] = Field(
10 |         {}, alias="lectureUnitInformation"
11 |     )
12 |     completed: Optional[Set[int]] = None
13 | 
14 |     class Config:
15 |         populate_by_name = True
16 | 


--------------------------------------------------------------------------------
/app/domain/status/lecture_chat_status_update_dto.py:
--------------------------------------------------------------------------------
 1 | from app.domain.status.status_update_dto import StatusUpdateDTO
 2 | 
 3 | 
 4 | class LectureChatStatusUpdateDTO(StatusUpdateDTO):
 5 |     """Data Transfer Object for lecture chat status updates.
 6 |     This DTO extends the base StatusUpdateDTO to include the result of lecture chat
 7 |     pipeline operations, facilitating communication between Artemis and the lecture
 8 |     chat system.
 9 |     """
10 | 
11 |     result: str
12 |     """The result message or status of the lecture chat pipeline operation."""
13 | 


--------------------------------------------------------------------------------
/.github/labeler.yml:
--------------------------------------------------------------------------------
 1 | "component:LLM":
 2 |   - changed-files:
 3 |       - any-glob-to-any-file: app/llm/**
 4 | "component:Pipeline":
 5 |   - changed-files:
 6 |       - any-glob-to-any-file: app/pipeline/**
 7 | "component:FastAPI":
 8 |   - changed-files:
 9 |       - any-glob-to-any-file: app/web/**
10 | "component:Domain":
11 |   - changed-files:
12 |       - any-glob-to-any-file: app/domain/**
13 | "component:Docker":
14 |   - changed-files:
15 |       - any-glob-to-any-file: docker/**
16 | "component:CI/CD":
17 |   - changed-files:
18 |       - any-glob-to-any-file: .github/**
19 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Dockerfile to build a container image for a Python 3.12 FastAPI application
 2 | FROM python:3.12.3-slim
 3 | 
 4 | # Set the working directory in the container
 5 | WORKDIR /app
 6 | 
 7 | # Copy the dependencies file to the working directory
 8 | COPY requirements.txt .
 9 | 
10 | # Install any dependencies
11 | RUN pip install --no-cache-dir -r requirements.txt
12 | 
13 | # Copy the content of the local src directory to the working directory
14 | COPY app/ ./app
15 | 
16 | # Specify the command to run on container start
17 | CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
18 | 


--------------------------------------------------------------------------------
/.whitesource:
--------------------------------------------------------------------------------
 1 | {
 2 |   "scanSettings": {
 3 |     "enableScan": true,
 4 |     "baseBranches": ["main"],
 5 |     "scanDependabotPR": false
 6 |   },
 7 |   "checkRunSettings": {
 8 |     "vulnerableCheckRunConclusionLevel": "failure",
 9 |     "displayMode": "diff",
10 |     "useMendCheckNames": true
11 |   },
12 |   "issueSettings": {
13 |     "minSeverityLevel": "MEDIUM",
14 |     "issueType": "DEPENDENCY"
15 |   },
16 |   "remediateSettings": {
17 |     "workflowRules": {
18 |       "enabled": true,
19 |         "minVulnerabilityScore": 1.5,
20 |         "maxVulnerabilityScore": 10
21 |     }
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/app/llm/completion_arguments.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | CompletionArgumentsResponseFormat = Enum("TEXT", "JSON")
 4 | 
 5 | 
 6 | class CompletionArguments:
 7 |     """Arguments for the completion request"""
 8 | 
 9 |     def __init__(
10 |         self,
11 |         max_tokens: int = None,
12 |         temperature: float = None,
13 |         stop: list[str] = None,
14 |         response_format: CompletionArgumentsResponseFormat = "TEXT",
15 |     ):
16 |         self.max_tokens = max_tokens
17 |         self.temperature = temperature
18 |         self.stop = stop
19 |         self.response_format = response_format
20 | 


--------------------------------------------------------------------------------
/app/domain/chat/course_chat/course_chat_pipeline_execution_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Any
 2 | 
 3 | from pydantic import Field
 4 | 
 5 | from ..chat_pipeline_execution_dto import ChatPipelineExecutionDTO
 6 | from ...data.extended_course_dto import ExtendedCourseDTO
 7 | from ...data.metrics.student_metrics_dto import StudentMetricsDTO
 8 | from ...event.pyris_event_dto import PyrisEventDTO
 9 | 
10 | 
11 | class CourseChatPipelineExecutionDTO(ChatPipelineExecutionDTO):
12 |     course: ExtendedCourseDTO
13 |     metrics: Optional[StudentMetricsDTO]
14 |     event_payload: Optional[PyrisEventDTO[Any]] = Field(None, alias="eventPayload")
15 | 


--------------------------------------------------------------------------------
/app/domain/data/metrics/competency_jol_dto.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | from typing import Optional
 3 | from datetime import datetime
 4 | 
 5 | 
 6 | class CompetencyJolDTO(BaseModel):
 7 |     competency_id: Optional[int] = Field(None, alias="competencyId")
 8 |     jol_value: Optional[int] = Field(None, alias="jolValue")
 9 |     judgement_time: Optional[datetime] = Field(None, alias="judgementTime")
10 |     competency_progress: Optional[float] = Field(None, alias="competencyProgress")
11 |     competency_confidence: Optional[float] = Field(None, alias="competencyConfidence")
12 | 
13 |     class Config:
14 |         populate_by_name = True
15 | 


--------------------------------------------------------------------------------
/app/domain/data/lecture_dto.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from typing import List, Optional
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | from app.domain.data.lecture_unit_dto import LectureUnitDTO
 6 | 
 7 | 
 8 | class PyrisLectureDTO(BaseModel):
 9 |     id: int = Field(alias="id")
10 |     title: Optional[str] = Field(alias="title", default=None)
11 |     description: Optional[str] = Field(alias="description", default=None)
12 |     start_date: Optional[datetime] = Field(alias="startDate", default=None)
13 |     end_date: Optional[datetime] = Field(alias="endDate", default=None)
14 |     units: List[LectureUnitDTO] = Field(alias="units", default=[])
15 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: Run linters
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |     - name: Checkout repo
15 |       uses: actions/checkout@v3
16 | 
17 |     - name: Set up Python
18 |       uses: actions/setup-python@v5
19 |       with:
20 |         python-version: "3.12"
21 |         cache: 'pip'
22 | 
23 |     - name: Install Dependencies from requirements.txt
24 |       run: pip install -r requirements.txt
25 | 
26 |     - name: Execute black
27 |       run: black . --check
28 | 
29 |     - name: Execute flake8
30 |       run: flake8 .
31 | 


--------------------------------------------------------------------------------
/app/domain/chat/chat_pipeline_execution_base_data_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from pydantic import Field, BaseModel
 4 | 
 5 | from app.domain import PipelineExecutionSettingsDTO
 6 | from app.common.pyris_message import PyrisMessage
 7 | from app.domain.data.user_dto import UserDTO
 8 | from app.domain.status.stage_dto import StageDTO
 9 | 
10 | 
11 | class ChatPipelineExecutionBaseDataDTO(BaseModel):
12 |     chat_history: List[PyrisMessage] = Field(alias="chatHistory", default=[])
13 |     user: Optional[UserDTO]
14 |     settings: Optional[PipelineExecutionSettingsDTO]
15 |     initial_stages: Optional[List[StageDTO]] = Field(
16 |         default=None, alias="initialStages"
17 |     )
18 | 


--------------------------------------------------------------------------------
/app/domain/data/lecture_unit_dto.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | 
 3 | 
 4 | class LectureUnitDTO(BaseModel):
 5 |     pdf_file_base64: str = Field(default="", alias="pdfFile")
 6 |     lecture_unit_id: int = Field(alias="lectureUnitId")
 7 |     lecture_unit_name: str = Field(default="", alias="lectureUnitName")
 8 |     lecture_unit_link: str = Field(default="", alias="lectureUnitLink")
 9 |     lecture_id: int = Field(alias="lectureId")
10 |     lecture_name: str = Field(default="", alias="lectureName")
11 |     course_id: int = Field(alias="courseId")
12 |     course_name: str = Field(default="", alias="courseName")
13 |     course_description: str = Field(default="", alias="courseDescription")
14 | 


--------------------------------------------------------------------------------
/app/domain/data/metrics/competency_information_dto.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | from typing import Optional
 3 | from datetime import datetime
 4 | 
 5 | from app.domain.data.competency_dto import CompetencyTaxonomy
 6 | 
 7 | 
 8 | class CompetencyInformationDTO(BaseModel):
 9 |     id: Optional[int] = None
10 |     title: Optional[str] = None
11 |     description: Optional[str] = None
12 |     taxonomy: Optional[CompetencyTaxonomy | str] = None
13 |     soft_due_date: Optional[datetime] = Field(None, alias="softDueDate")
14 |     optional: Optional[bool] = None
15 |     mastery_threshold: Optional[int] = Field(None, alias="masteryThreshold")
16 | 
17 |     class Config:
18 |         populate_by_name = True
19 | 


--------------------------------------------------------------------------------
/app/llm/langchain/iris_langchain_embedding_model.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Any
 2 | from langchain_core.embeddings import Embeddings
 3 | from ...llm import RequestHandler
 4 | 
 5 | 
 6 | class IrisLangchainEmbeddingModel(Embeddings):
 7 |     """Custom langchain embedding for our own request handler"""
 8 | 
 9 |     request_handler: RequestHandler
10 | 
11 |     def __init__(self, request_handler: RequestHandler, **kwargs: Any) -> None:
12 |         super().__init__(request_handler=request_handler, **kwargs)
13 | 
14 |     def embed_documents(self, texts: List[str]) -> List[List[float]]:
15 |         return [self.embed_query(text) for text in texts]
16 | 
17 |     def embed_query(self, text: str) -> List[float]:
18 |         return self.request_handler.embed(text)
19 | 


--------------------------------------------------------------------------------
/app/llm/external/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | from ...llm.external.model import LanguageModel
 4 | from ...llm.external.openai_completion import (
 5 |     DirectOpenAICompletionModel,
 6 |     AzureOpenAICompletionModel,
 7 | )
 8 | from ...llm.external.openai_chat import DirectOpenAIChatModel, AzureOpenAIChatModel
 9 | from ...llm.external.openai_embeddings import (
10 |     DirectOpenAIEmbeddingModel,
11 |     AzureOpenAIEmbeddingModel,
12 | )
13 | from ...llm.external.ollama import OllamaModel
14 | 
15 | AnyLLM = Union[
16 |     DirectOpenAICompletionModel,
17 |     AzureOpenAICompletionModel,
18 |     DirectOpenAIChatModel,
19 |     AzureOpenAIChatModel,
20 |     DirectOpenAIEmbeddingModel,
21 |     AzureOpenAIEmbeddingModel,
22 |     OllamaModel,
23 | ]
24 | 


--------------------------------------------------------------------------------
/app/domain/data/programming_submission_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Dict, Optional
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | from datetime import datetime
 6 | from ...domain.data.build_log_entry import BuildLogEntryDTO
 7 | from ...domain.data.result_dto import ResultDTO
 8 | 
 9 | 
10 | class ProgrammingSubmissionDTO(BaseModel):
11 |     id: int
12 |     date: Optional[datetime] = None
13 |     repository: Dict[str, str] = Field(alias="repository", default={})
14 |     is_practice: bool = Field(alias="isPractice")
15 |     build_failed: bool = Field(alias="buildFailed")
16 |     build_log_entries: List[BuildLogEntryDTO] = Field(
17 |         alias="buildLogEntries", default=[]
18 |     )
19 |     latest_result: Optional[ResultDTO] = Field(alias="latestResult", default=None)
20 | 


--------------------------------------------------------------------------------
/app/domain/chat/exercise_chat/exercise_chat_pipeline_execution_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Any
 2 | 
 3 | from pydantic import Field
 4 | 
 5 | from app.domain.chat.chat_pipeline_execution_dto import ChatPipelineExecutionDTO
 6 | from app.domain.data.course_dto import CourseDTO
 7 | from app.domain.data.programming_exercise_dto import ProgrammingExerciseDTO
 8 | from app.domain.data.programming_submission_dto import ProgrammingSubmissionDTO
 9 | from app.domain.event.pyris_event_dto import PyrisEventDTO
10 | 
11 | 
12 | class ExerciseChatPipelineExecutionDTO(ChatPipelineExecutionDTO):
13 |     submission: Optional[ProgrammingSubmissionDTO] = None
14 |     exercise: ProgrammingExerciseDTO
15 |     course: CourseDTO
16 |     event_payload: Optional[PyrisEventDTO[Any]] = Field(None, alias="eventPayload")
17 | 


--------------------------------------------------------------------------------
/app/pipeline/prompts/reranker_prompt.txt:
--------------------------------------------------------------------------------
 1 | A list of paragraphs is shown below. Each paragraph has a number next to it. A question is also provided.
 2 | Respond with the numbers of the paragraphs you should consult to answer the question, in order of relevance.
 3 | The relevance score is a number from 1 to 10 based on how relevant the paragraphs are to answer the question.
 4 | Do not include any paragraphs that are not relevant to the question.
 5 | Without any comment, return the result in the following JSON format, it is important to avoid giving
 6 | unnecessary information, only the number of the paragraph if it's necessary for answering the student's question
 7 | otherwise leave the array empty.
 8 | {{"selected_paragraphs": [<number1>, <number2>, ...]}}
 9 | 
10 | {paragraphs}
11 | Question: {question}
12 | 


--------------------------------------------------------------------------------
/docker/pyris.yml:
--------------------------------------------------------------------------------
 1 | # ----------------------------------------------------------------------------------------------------------------------
 2 | # Pyris base service
 3 | # ----------------------------------------------------------------------------------------------------------------------
 4 | 
 5 | services:
 6 |   pyris-app:
 7 |     container_name: pyris-app
 8 |     build:
 9 |       context: ..
10 |       dockerfile: Dockerfile
11 |       pull: true
12 |     environment:
13 |       APPLICATION_YML_PATH: "/config/application.yml"
14 |       LLM_CONFIG_PATH: "/config/llm_config.yml"
15 |     expose:
16 |       - "8000"
17 |     networks:
18 |       - pyris
19 |     command: ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
20 | 
21 | networks:
22 |   pyris:
23 |     driver: "bridge"
24 |     name: pyris


--------------------------------------------------------------------------------
/app/dependencies.py:
--------------------------------------------------------------------------------
 1 | from fastapi import Depends
 2 | from fastapi.requests import Request
 3 | 
 4 | from app.common.custom_exceptions import (
 5 |     RequiresAuthenticationException,
 6 |     PermissionDeniedException,
 7 | )
 8 | from app.config import APIKeyConfig, settings
 9 | 
10 | 
11 | def _get_api_key(request: Request) -> str:
12 |     authorization_header = request.headers.get("Authorization")
13 | 
14 |     if not authorization_header:
15 |         raise RequiresAuthenticationException
16 | 
17 |     return authorization_header
18 | 
19 | 
20 | class TokenValidator:
21 |     async def __call__(self, api_key: str = Depends(_get_api_key)) -> APIKeyConfig:
22 |         for key in settings.api_keys:
23 |             if key.token == api_key:
24 |                 return key
25 |         raise PermissionDeniedException
26 | 


--------------------------------------------------------------------------------
/app/pipeline/shared/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, List
 2 | 
 3 | from langchain_core.tools import StructuredTool
 4 | 
 5 | 
 6 | def generate_structured_tool_from_function(tool_function: Callable) -> StructuredTool:
 7 |     """
 8 |     Generates a structured tool from a function
 9 |     :param tool_function: The tool function
10 |     :return: The structured tool
11 |     """
12 |     return StructuredTool.from_function(tool_function)
13 | 
14 | 
15 | def generate_structured_tools_from_functions(
16 |     tools: List[Callable],
17 | ) -> List[StructuredTool]:
18 |     """
19 |     Generates a list of structured tools from a list of functions
20 |     :param tools: The list of tool functions
21 |     :return: The list of structured tools
22 |     """
23 |     return [generate_structured_tool_from_function(_tool) for _tool in tools]
24 | 


--------------------------------------------------------------------------------
/docker/nginx/dhparam.pem:
--------------------------------------------------------------------------------
 1 | -----BEGIN DH PARAMETERS-----
 2 | MIICCAKCAgEAiT9FabLCTYkbfSNvenLC4q5kWMNIjCUTcYWZpt8xOlmrJQqgui9h
 3 | lKXP3hDe0J50oqYUkDQ+YS8i+GCVLzAJqXixqynLqrz/v5IWgloQMJJKlPBEl9M6
 4 | /Kh40+VyasVz7toja4qbyN12Kz1S8qLOlmxCcPmOpxGwIUG2yYSuZH9JQ724Gnji
 5 | 4puFw3CXDm6aBZWBb7cpwEvHSVW5C9R+Acph7ahCby9kWpLrLsNHL73+jJiJSGcx
 6 | hig+Yie2XTTlUBHVcxlHCZu8pFXA40hLuagejmXGuVfaaoezMyU1OpfrJpsJSE2s
 7 | OxFEt01nCaEguNn7L1dr46fHWux651/UCRHR8MB0J6KEOuKDhgQ8bq+WSGlowaJM
 8 | NGhGxAlFH98D/gbOrVcRxJDpmaSFVVwO4piDT/pBDvzaS6Ll8dnoKLv8TNa2r7dG
 9 | gedlnJ2gIhU3lLLjqIwe+fmrfhlr3ybwuIiSx/efEaw65vDnOkOHeKKXtbxUAMS6
10 | 07bLIKLEw4QRwMmrLhzu2sZnFipAppXjsQ8tRa/QO4eoaEM97FKq6qONVwAA2if6
11 | l3amSySYVDvMYpaOwQYawKTole1Kon06h8JlIr+A5W3vmraMfQZZY72HAkxuOYH0
12 | wchOEYKU+jlmutbEdz747Ngleb5kp55CtL/PlEawEpqXWWXYBqo8mmMCAQI=
13 | -----END DH PARAMETERS-----
14 | 


--------------------------------------------------------------------------------
/app/domain/competency_extraction_pipeline_execution_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from pydantic import Field, BaseModel
 4 | 
 5 | from . import PipelineExecutionDTO
 6 | from .data.competency_dto import CompetencyTaxonomy, Competency
 7 | 
 8 | 
 9 | class CompetencyExtractionPipelineExecutionDTO(BaseModel):
10 |     execution: PipelineExecutionDTO
11 |     course_description: str = Field(alias="courseDescription")
12 |     current_competencies: list[Competency] = Field(
13 |         alias="currentCompetencies", default=[]
14 |     )
15 |     taxonomy_options: List[CompetencyTaxonomy] = Field(
16 |         alias="taxonomyOptions", default=[]
17 |     )
18 |     max_n: int = Field(
19 |         alias="maxN",
20 |         description="Maximum number of competencies to extract from the course description",
21 |         default=10,
22 |     )
23 | 


--------------------------------------------------------------------------------
/app/domain/data/metrics/competency_student_metrics_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Set
 2 | from pydantic import BaseModel, Field
 3 | from app.domain.data.metrics.competency_information_dto import CompetencyInformationDTO
 4 | from app.domain.data.metrics.competency_jol_dto import CompetencyJolDTO
 5 | 
 6 | 
 7 | class CompetencyStudentMetricsDTO(BaseModel):
 8 |     competency_information: Dict[int, CompetencyInformationDTO] = Field(
 9 |         {}, alias="competencyInformation"
10 |     )
11 |     exercises: Dict[int, Set[int]] = Field({})
12 |     lecture_units: Dict[int, Set[int]] = Field({}, alias="lectureUnits")
13 |     progress: Dict[int, float] = Field({})
14 |     confidence: Dict[int, float] = Field({})
15 |     jol_values: Dict[int, CompetencyJolDTO] = Field({}, alias="jolValues")
16 | 
17 |     class Config:
18 |         populate_by_name = True
19 | 


--------------------------------------------------------------------------------
/app/domain/data/exam_dto.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from typing import Optional
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | 
 6 | 
 7 | class ExamDTO(BaseModel):
 8 |     id: int = Field(alias="id")
 9 |     title: Optional[str] = Field(alias="title", default=None)
10 |     is_text_exam: bool = Field(alias="isTextExam", default=False)
11 |     start_date: Optional[datetime] = Field(alias="startDate", default=None)
12 |     end_date: Optional[datetime] = Field(alias="endDate", default=None)
13 |     publish_results_date: Optional[datetime] = Field(
14 |         alias="publishResultsDate", default=None
15 |     )
16 |     exam_student_review_start: Optional[datetime] = Field(
17 |         alias="examStudentReviewStart", default=None
18 |     )
19 |     exam_student_review_end: Optional[datetime] = Field(
20 |         alias="examStudentReviewEnd", default=None
21 |     )
22 | 


--------------------------------------------------------------------------------
/app/common/token_usage_dto.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | 
 3 | from app.common.PipelineEnum import PipelineEnum
 4 | 
 5 | 
 6 | class TokenUsageDTO(BaseModel):
 7 |     model_info: str = Field(alias="model", default="")
 8 |     num_input_tokens: int = Field(alias="numInputTokens", default=0)
 9 |     cost_per_input_token: float = Field(alias="costPerMillionInputToken", default=0)
10 |     num_output_tokens: int = Field(alias="numOutputTokens", default=0)
11 |     cost_per_output_token: float = Field(alias="costPerMillionOutputToken", default=0)
12 |     pipeline: PipelineEnum = Field(alias="pipelineId", default=PipelineEnum.NOT_SET)
13 | 
14 |     def __str__(self):
15 |         return (
16 |             f"{self.model_info}: {self.num_input_tokens} input cost: {self.cost_per_input_token},"
17 |             f" {self.num_output_tokens} output cost: {self.cost_per_output_token}, pipeline: {self.pipeline} "
18 |         )
19 | 


--------------------------------------------------------------------------------
/docker/pyris-dev.yml:
--------------------------------------------------------------------------------
 1 | # ----------------------------------------------------------------------------------------------------------------------
 2 | # Setup for a Pyris development server.
 3 | # ----------------------------------------------------------------------------------------------------------------------
 4 | 
 5 | services:
 6 |   pyris-app:
 7 |     extends:
 8 |       file: ./pyris.yml
 9 |       service: pyris-app
10 |     pull_policy: never
11 |     restart: "no"
12 |     volumes:
13 |       - ../application.local.yml:/config/application.yml:ro
14 |       - ../llm_config.local.yml:/config/llm_config.yml:ro
15 |     networks:
16 |       - pyris
17 |     ports:
18 |           - 8000:8000
19 | 
20 |   weaviate:
21 |     extends:
22 |       file: ./weaviate.yml
23 |       service: weaviate
24 |     networks:
25 |       - pyris
26 |     ports:
27 |       - 8001:8001
28 |       - 50051:50051
29 | 
30 | networks:
31 |   pyris:
32 |     driver: "bridge"
33 |     name: pyris
34 | 
35 | 


--------------------------------------------------------------------------------
/app/domain/__init__.py:
--------------------------------------------------------------------------------
 1 | from .error_response_dto import IrisErrorResponseDTO
 2 | from .pipeline_execution_dto import PipelineExecutionDTO
 3 | from .pipeline_execution_settings_dto import PipelineExecutionSettingsDTO
 4 | from .chat.chat_pipeline_execution_dto import ChatPipelineExecutionDTO
 5 | from .chat.chat_pipeline_execution_base_data_dto import ChatPipelineExecutionBaseDataDTO
 6 | from .competency_extraction_pipeline_execution_dto import (
 7 |     CompetencyExtractionPipelineExecutionDTO,
 8 | )
 9 | from .inconsistency_check_pipeline_execution_dto import (
10 |     InconsistencyCheckPipelineExecutionDTO,
11 | )
12 | from app.domain.chat.exercise_chat.exercise_chat_pipeline_execution_dto import (
13 |     ExerciseChatPipelineExecutionDTO,
14 | )
15 | from app.domain.chat.course_chat.course_chat_pipeline_execution_dto import (
16 |     CourseChatPipelineExecutionDTO,
17 | )
18 | from app.domain.data import image_message_content_dto
19 | from app.domain.feature_dto import FeatureDTO
20 | 


--------------------------------------------------------------------------------
/app/domain/data/metrics/student_metrics_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from pydantic import Field, BaseModel
 3 | from app.domain.data.metrics.competency_student_metrics_dto import (
 4 |     CompetencyStudentMetricsDTO,
 5 | )
 6 | from app.domain.data.metrics.exercise_student_metrics_dto import (
 7 |     ExerciseStudentMetricsDTO,
 8 | )
 9 | from app.domain.data.metrics.lecture_unit_student_metrics_dto import (
10 |     LectureUnitStudentMetricsDTO,
11 | )
12 | 
13 | 
14 | class StudentMetricsDTO(BaseModel):
15 |     exercise_metrics: Optional[ExerciseStudentMetricsDTO] = Field(
16 |         None, alias="exerciseMetrics"
17 |     )
18 |     lecture_unit_student_metrics_dto: Optional[LectureUnitStudentMetricsDTO] = Field(
19 |         None, alias="lectureUnitStudentMetricsDTO"
20 |     )
21 |     competency_metrics: Optional[CompetencyStudentMetricsDTO] = Field(
22 |         None, alias="competencyMetrics"
23 |     )
24 | 
25 |     class Config:
26 |         populate_by_name = True
27 | 


--------------------------------------------------------------------------------
/app/domain/ingestion/deletionPipelineExecutionDto.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from pydantic import Field
 4 | 
 5 | from app.domain import PipelineExecutionDTO, PipelineExecutionSettingsDTO
 6 | from app.domain.data.faq_dto import FaqDTO
 7 | from app.domain.data.lecture_unit_dto import LectureUnitDTO
 8 | from app.domain.status.stage_dto import StageDTO
 9 | 
10 | 
11 | class LecturesDeletionExecutionDto(PipelineExecutionDTO):
12 |     lecture_units: List[LectureUnitDTO] = Field(..., alias="pyrisLectureUnits")
13 |     settings: Optional[PipelineExecutionSettingsDTO]
14 |     initial_stages: Optional[List[StageDTO]] = Field(
15 |         default=None, alias="initialStages"
16 |     )
17 | 
18 | 
19 | class FaqDeletionExecutionDto(PipelineExecutionDTO):
20 |     faq: FaqDTO = Field(..., alias="pyrisFaqWebhookDTO")
21 |     settings: Optional[PipelineExecutionSettingsDTO]
22 |     initial_stages: Optional[List[StageDTO]] = Field(
23 |         default=None, alias="initialStages"
24 |     )
25 | 


--------------------------------------------------------------------------------
/app/domain/ingestion/ingestion_pipeline_execution_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from pydantic import Field
 4 | 
 5 | from app.domain import PipelineExecutionDTO, PipelineExecutionSettingsDTO
 6 | from app.domain.data.faq_dto import FaqDTO
 7 | from app.domain.data.lecture_unit_dto import LectureUnitDTO
 8 | from app.domain.status.stage_dto import StageDTO
 9 | 
10 | 
11 | class IngestionPipelineExecutionDto(PipelineExecutionDTO):
12 |     lecture_unit: LectureUnitDTO = Field(..., alias="pyrisLectureUnit")
13 |     settings: Optional[PipelineExecutionSettingsDTO]
14 |     initial_stages: Optional[List[StageDTO]] = Field(
15 |         default=None, alias="initialStages"
16 |     )
17 | 
18 | 
19 | class FaqIngestionPipelineExecutionDto(PipelineExecutionDTO):
20 |     faq: FaqDTO = Field(..., alias="pyrisFaqWebhookDTO")
21 |     settings: Optional[PipelineExecutionSettingsDTO]
22 |     initial_stages: Optional[List[StageDTO]] = Field(
23 |         default=None, alias="initialStages"
24 |     )
25 | 


--------------------------------------------------------------------------------
/log_conf.yml:
--------------------------------------------------------------------------------
 1 | version: 1
 2 | disable_existing_loggers: False
 3 | formatters:
 4 |   default:
 5 |     "use_colors": null,
 6 |     "()": uvicorn.logging.DefaultFormatter
 7 |     format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 8 |   access:
 9 |     "use_colors": null,
10 |     "()": uvicorn.logging.AccessFormatter
11 |     format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
12 | handlers:
13 |   default:
14 |     formatter: default
15 |     class: logging.StreamHandler
16 |     stream: ext://sys.stderr
17 |   access:
18 |     formatter: access
19 |     class: logging.StreamHandler
20 |     stream: ext://sys.stdout
21 | loggers:
22 |   uvicorn:
23 |     level: INFO
24 |     handlers:
25 |       - default
26 |     propagate: no
27 |   uvicorn.error:
28 |     level: INFO
29 |     handlers:
30 |       - default
31 |     propagate: no
32 |   uvicorn.access:
33 |     level: INFO
34 |     handlers:
35 |       - access
36 |     propagate: no
37 | root:
38 |   level: DEBUG
39 |   handlers:
40 |     - default
41 |   propagate: no


--------------------------------------------------------------------------------
/docker/nginx/pyris-server.conf:
--------------------------------------------------------------------------------
 1 | resolver 127.0.0.11;
 2 | resolver_timeout 5s;
 3 | client_max_body_size 10m;
 4 | client_body_buffer_size 1m;
 5 | 
 6 | location / {
 7 |     proxy_pass http://pyris;
 8 |     proxy_http_version 1.1;
 9 |     proxy_set_header Upgrade $http_upgrade;
10 |     proxy_set_header Connection 'upgrade';
11 |     proxy_set_header Host $host;
12 | #   proxy_set_header Early-Data $ssl_early_data;
13 |     proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
14 |     proxy_set_header X-Forwarded-Proto $scheme;
15 |     proxy_cache_bypass $http_upgrade;
16 |     proxy_send_timeout 900s;
17 |     proxy_read_timeout 900s;
18 |     proxy_max_temp_file_size 0;
19 |     proxy_buffering on;
20 |     proxy_buffer_size 16k;
21 |     proxy_buffers 8 16k;
22 |     proxy_busy_buffers_size 32k;
23 |     fastcgi_send_timeout 900s;
24 |     fastcgi_read_timeout 900s;
25 |     client_max_body_size 128M;
26 |  }
27 | 
28 | error_page 502 /502.html;
29 | location /502.html {
30 |     root /usr/share/nginx/html;
31 |     internal;
32 | }
33 | 


--------------------------------------------------------------------------------
/app/common/PipelineEnum.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class PipelineEnum(str, Enum):
 5 |     IRIS_CODE_FEEDBACK = "IRIS_CODE_FEEDBACK"
 6 |     IRIS_CHAT_COURSE_MESSAGE = "IRIS_CHAT_COURSE_MESSAGE"
 7 |     IRIS_CHAT_EXERCISE_MESSAGE = "IRIS_CHAT_EXERCISE_MESSAGE"
 8 |     IRIS_CHAT_EXERCISE_AGENT_MESSAGE = "IRIS_CHAT_EXERCISE_AGENT_MESSAGE"
 9 |     IRIS_INTERACTION_SUGGESTION = "IRIS_INTERACTION_SUGGESTION"
10 |     IRIS_CHAT_LECTURE_MESSAGE = "IRIS_CHAT_LECTURE_MESSAGE"
11 |     IRIS_COMPETENCY_GENERATION = "IRIS_COMPETENCY_GENERATION"
12 |     IRIS_CITATION_PIPELINE = "IRIS_CITATION_PIPELINE"
13 |     IRIS_RERANKER_PIPELINE = "IRIS_RERANKER_PIPELINE"
14 |     IRIS_SUMMARY_PIPELINE = "IRIS_SUMMARY_PIPELINE"
15 |     IRIS_LECTURE_RETRIEVAL_PIPELINE = "IRIS_LECTURE_RETRIEVAL_PIPELINE"
16 |     IRIS_LECTURE_INGESTION = "IRIS_LECTURE_INGESTION"
17 |     IRIS_FAQ_INGESTION = "IRIS_FAQ_INGESTION"
18 |     IRIS_FAQ_RETRIEVAL_PIPELINE = "IRIS_FAQ_RETRIEVAL_PIPELINE"
19 |     IRIS_INCONSISTENCY_CHECK = "IRIS_INCONSISTENCY_CHECK"
20 |     IRIS_REWRITING_PIPELINE = "IRIS_REWRITING_PIPELINE"
21 |     NOT_SET = "NOT_SET"
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 TUM Applied Software Engineering
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/app/web/status/lecture_deletion_status_callback.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from .status_update import StatusCallback
 4 | from ...domain.ingestion.ingestion_status_update_dto import IngestionStatusUpdateDTO
 5 | from ...domain.status.stage_state_dto import StageStateEnum
 6 | from ...domain.status.stage_dto import StageDTO
 7 | import logging
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | class LecturesDeletionStatusCallback(StatusCallback):
13 |     """
14 |     Callback class for updating the status of a Tutor Chat pipeline run.
15 |     """
16 | 
17 |     def __init__(
18 |         self, run_id: str, base_url: str, initial_stages: List[StageDTO] = None
19 |     ):
20 |         url = f"{base_url}/api/public/pyris/webhooks/ingestion/runs/{run_id}/status"
21 | 
22 |         current_stage_index = len(initial_stages) if initial_stages else 0
23 |         stages = initial_stages or []
24 |         stages += [
25 |             StageDTO(
26 |                 weight=100, state=StageStateEnum.NOT_STARTED, name="Slides removal"
27 |             ),
28 |         ]
29 |         status = IngestionStatusUpdateDTO(stages=stages)
30 |         stage = stages[current_stage_index]
31 |         super().__init__(url, run_id, status, stage, current_stage_index)
32 | 


--------------------------------------------------------------------------------
/app/pipeline/prompts/faq_retrieval_prompts.py:
--------------------------------------------------------------------------------
 1 | faq_retriever_initial_prompt = """
 2 | You write good and performant vector database queries, in particular for Weaviate,
 3 | from chat histories between an AI tutor and a student.
 4 | The query should be designed to retrieve context information from indexed faqs so the AI tutor
 5 | can use the context information to give a better answer. Apply accepted norms when querying vector databases.
 6 | Query the database so it returns answers for the latest student query.
 7 | A good vector database query is formulated in natural language, just like a student would ask a question.
 8 | It is not an instruction to the database, but a question to the database.
 9 | The chat history between the AI tutor and the student is provided to you in the next messages.
10 | """
11 | 
12 | write_hypothetical_answer_prompt = """
13 | A student has sent a query in the context the course {course_name}.
14 | The chat history between the AI tutor and the student is provided to you in the next messages.
15 | Please provide a response in {course_language}.
16 | You should create a response that looks like a faq answer.
17 | Craft your response to closely reflect the style and content of typical university lecture materials.
18 | Do not exceed 350 words. Add keywords and phrases that are relevant to student intent.
19 | """
20 | 


--------------------------------------------------------------------------------
/app/pipeline/pipeline.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta
 2 | from typing import List
 3 | 
 4 | from app.common.token_usage_dto import TokenUsageDTO
 5 | from app.common.PipelineEnum import PipelineEnum
 6 | 
 7 | 
 8 | class Pipeline(metaclass=ABCMeta):
 9 |     """Abstract class for all pipelines"""
10 | 
11 |     implementation_id: str
12 |     tokens: List[TokenUsageDTO]
13 | 
14 |     def __init__(self, implementation_id=None, **kwargs):
15 |         self.implementation_id = implementation_id
16 | 
17 |     def __str__(self):
18 |         return f"{self.__class__.__name__}"
19 | 
20 |     def __repr__(self):
21 |         return f"{self.__class__.__name__}"
22 | 
23 |     def __call__(self, **kwargs):
24 |         """
25 |         Extracts the required parameters from the kwargs runs the pipeline.
26 |         """
27 |         raise NotImplementedError("Subclasses must implement the __call__ method.")
28 | 
29 |     def __init_subclass__(cls, **kwargs):
30 |         super().__init_subclass__(**kwargs)
31 |         if "__call__" not in cls.__dict__:
32 |             raise NotImplementedError(
33 |                 "Subclasses of Pipeline interface must implement the __call__ method."
34 |             )
35 | 
36 |     def _append_tokens(self, tokens: TokenUsageDTO, pipeline: PipelineEnum) -> None:
37 |         tokens.pipeline = pipeline
38 |         self.tokens.append(tokens)
39 | 


--------------------------------------------------------------------------------
/app/pipeline/prompts/content_image_interpretation_merge_prompt.txt:
--------------------------------------------------------------------------------
 1 | You are An AI assistant for university Professors of the Technical University of Munich.
 2 | You are tasked with helping to prepare educational materials for university students.
 3 | You were provided with the raw text content of a slide and, in some cases,
 4 |  a description of the slide generated by another AI assistant.
 5 | The assistant can fail to generate a description for some slides.
 6 | Your task is to merge the description and the text content of the slide.
 7 | If a description is available, you should add it after the raw text content of the slide.
 8 | If an error message is given at the description, please ignore it and return only the raw text content.
 9 | 
10 | 
11 | ############################################################################################################
12 | Here is the raw text content of the Slide provided:
13 | {page_content}
14 | ############################################################################################################
15 | 
16 | ############################################################################################################
17 | Here is the description of the slide provided, if it's an error message ignore it:
18 | {image_interpretation}
19 | ############################################################################################################
20 | 


--------------------------------------------------------------------------------
/docker/pyris-production-internal.yml:
--------------------------------------------------------------------------------
 1 | # ----------------------------------------------------------------------------------------------------------------------
 2 | # Setup for a Pyris server suitable for internal network requests (without nginx).
 3 | # ----------------------------------------------------------------------------------------------------------------------
 4 | # It is designed to take in environment variables for configuration, similar to the production setup.
 5 | # ----------------------------------------------------------------------------------------------------------------------
 6 | 
 7 | services:
 8 |   pyris-app:
 9 |     extends:
10 |       file: ./pyris.yml
11 |       service: pyris-app
12 |     image: ghcr.io/ls1intum/pyris:${PYRIS_DOCKER_TAG:-latest}
13 |     pull_policy: always
14 |     restart: unless-stopped
15 |     volumes:
16 |       - ${PYRIS_APPLICATION_YML_FILE}:/config/application.yml:ro
17 |       - ${PYRIS_LLM_CONFIG_YML_FILE}:/config/llm_config.yml:ro
18 |     ports:
19 |       - "${PYRIS_PORT:-8000}:8000"
20 |     networks:
21 |       - pyris
22 | 
23 |   weaviate:
24 |     extends:
25 |       file: ./weaviate.yml
26 |       service: weaviate
27 |     ports:
28 |       - "${WEAVIATE_PORT:-8001}:8001"
29 |       - "${WEAVIATE_GRPC_PORT:-50051}:50051"
30 |     networks:
31 |       - pyris
32 | 
33 | networks:
34 |   pyris:
35 |     driver: "bridge"
36 |     name: pyris


--------------------------------------------------------------------------------
/app/pipeline/prompts/choose_response_prompt.txt:
--------------------------------------------------------------------------------
 1 | Two paragraphs are shown below. Each paragraph has a number next to it. A question is also provided.
 2 | Respond with the numbers of the paragraph that respond precisely and cover the full scope of the question.
 3 | To understand the full scope of the question, take into consideration the Chat History as it the necessary context for the question.
 4 |  The relevance score is a number from 1 to 10 based on how relevant the paragraphs are to answer the question.
 5 | Without any comment, return the result in the following JSON format, it is important to avoid giving
 6 | unnecessary information, only the number of the paragraph that is most relevant and better suited for the question.
 7 | {{"selected_paragraphs": [<number>]}}
 8 | If the question is asking for code, return {{"selected_paragraphs": [0]}}
 9 | Do not by any means return a the number of the response that has written programming code in it.
10 | If there is no suitable answer return {{"selected_paragraphs": [0]}}
11 | If the question is a type of greeting like hello or hey return {{"selected_paragraphs": [0]}}
12 | If the answer or the question is out the education context return {{"selected_paragraphs": [0]}}
13 | 
14 | Paragraph 0:
15 | {paragraph_0}
16 | 
17 | Paragraph 1:
18 | {paragraph_1}
19 | 
20 | Question: {question}
21 | 
22 | Chat History:{chat_history}
23 | 
24 | 
25 | DO NOT RETURN THE RESPONSE THAT HAS CODE IN IT.
26 | 


--------------------------------------------------------------------------------
/app/domain/data/programming_exercise_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Optional
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | from datetime import datetime
 5 | from enum import Enum
 6 | 
 7 | 
 8 | class ProgrammingLanguage(str, Enum):
 9 |     JAVA = "JAVA"
10 |     PYTHON = "PYTHON"
11 |     C = "C"
12 |     HASKELL = "HASKELL"
13 |     KOTLIN = "KOTLIN"
14 |     VHDL = "VHDL"
15 |     ASSEMBLER = "ASSEMBLER"
16 |     SWIFT = "SWIFT"
17 |     OCAML = "OCAML"
18 |     EMPTY = "EMPTY"
19 | 
20 | 
21 | class ProgrammingExerciseDTO(BaseModel):
22 |     id: int
23 |     name: str
24 |     programming_language: Optional[str] = Field(
25 |         alias="programmingLanguage", default=None
26 |     )
27 |     template_repository: Dict[str, str] = Field(alias="templateRepository", default={})
28 |     solution_repository: Dict[str, str] = Field(alias="solutionRepository", default={})
29 |     test_repository: Dict[str, str] = Field(alias="testRepository", default={})
30 |     problem_statement: str = Field(alias="problemStatement", default=None)
31 |     start_date: Optional[datetime] = Field(alias="startDate", default=None)
32 |     end_date: Optional[datetime] = Field(alias="endDate", default=None)
33 |     max_points: Optional[float] = Field(alias="maxPoints", default=None)
34 |     recent_changes: Optional[str] = Field(
35 |         alias="recentChanges",
36 |         default=None,
37 |         description="Git diff of the recent changes",
38 |     )
39 | 


--------------------------------------------------------------------------------
/app/common/custom_exceptions.py:
--------------------------------------------------------------------------------
 1 | from fastapi import HTTPException, status
 2 | 
 3 | 
 4 | class RequiresAuthenticationException(HTTPException):
 5 |     def __init__(self):
 6 |         super().__init__(
 7 |             status_code=status.HTTP_401_UNAUTHORIZED,
 8 |             detail={
 9 |                 "type": "not_authenticated",
10 |                 "errorMessage": "Requires authentication",
11 |             },
12 |         )
13 | 
14 | 
15 | class PermissionDeniedException(HTTPException):
16 |     def __init__(self):
17 |         super().__init__(
18 |             status_code=status.HTTP_403_FORBIDDEN,
19 |             detail={
20 |                 "type": "not_authorized",
21 |                 "errorMessage": "Permission denied",
22 |             },
23 |         )
24 | 
25 | 
26 | class PipelineInvocationError(HTTPException):
27 |     def __init__(self):
28 |         super().__init__(
29 |             status_code=status.HTTP_400_BAD_REQUEST,
30 |             detail={
31 |                 "type": "bad_request",
32 |                 "errorMessage": "Cannot invoke pipeline",
33 |             },
34 |         )
35 | 
36 | 
37 | class PipelineNotFoundException(HTTPException):
38 |     def __init__(self):
39 |         super().__init__(
40 |             status_code=status.HTTP_404_NOT_FOUND,
41 |             detail={
42 |                 "type": "pipeline_not_found",
43 |                 "errorMessage": "Pipeline not found",
44 |             },
45 |         )
46 | 


--------------------------------------------------------------------------------
/app/sentry.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import sentry_sdk
 4 | from sentry_sdk.integrations.starlette import StarletteIntegration
 5 | from sentry_sdk.integrations.fastapi import FastApiIntegration
 6 | from sentry_sdk.integrations.openai import OpenAIIntegration
 7 | 
 8 | 
 9 | def init():
10 |     sentry_sdk.init(
11 |         dsn="https://1535dff78eec4932a67fc5affd0a680a@sentry.ase.in.tum.de/7",
12 |         environment=os.environ.get("SENTRY_ENVIRONMENT", "development"),
13 |         server_name=os.environ.get("SENTRY_SERVER_NAME", "localhost"),
14 |         release=os.environ.get("SENTRY_RELEASE", None),
15 |         attach_stacktrace=os.environ.get("SENTRY_ATTACH_STACKTRACE", "False").lower()
16 |         in ("true", "1"),
17 |         max_request_body_size="always",
18 |         enable_tracing=os.environ.get("SENTRY_ENABLE_TRACING", "False").lower()
19 |         in ("true", "1"),
20 |         traces_sample_rate=1.0,
21 |         profiles_sample_rate=1.0,
22 |         send_default_pii=True,
23 |         integrations=[
24 |             StarletteIntegration(
25 |                 transaction_style="endpoint",
26 |                 failed_request_status_codes=[403, range(500, 599)],
27 |             ),
28 |             FastApiIntegration(
29 |                 transaction_style="endpoint",
30 |                 failed_request_status_codes=[403, range(500, 599)],
31 |             ),
32 |             OpenAIIntegration(
33 |                 include_prompts=True,
34 |             ),
35 |         ],
36 |     )
37 | 


--------------------------------------------------------------------------------
/app/common/pyris_message.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from enum import Enum
 3 | from typing import List, Optional
 4 | 
 5 | from pydantic import BaseModel, ConfigDict, Field
 6 | 
 7 | from app.domain.data.message_content_dto import MessageContentDTO
 8 | from app.common.token_usage_dto import TokenUsageDTO
 9 | from app.domain.data.tool_call_dto import ToolCallDTO
10 | from app.domain.data.tool_message_content_dto import ToolMessageContentDTO
11 | 
12 | 
13 | class IrisMessageRole(str, Enum):
14 |     USER = "USER"
15 |     ASSISTANT = "LLM"
16 |     SYSTEM = "SYSTEM"
17 |     TOOL = "TOOL"
18 | 
19 | 
20 | class PyrisMessage(BaseModel):
21 |     model_config = ConfigDict(populate_by_name=True)
22 | 
23 |     token_usage: TokenUsageDTO = Field(default_factory=TokenUsageDTO)
24 | 
25 |     sent_at: datetime | None = Field(alias="sentAt", default=None)
26 |     sender: IrisMessageRole
27 | 
28 |     contents: List[MessageContentDTO] = Field(default=[])
29 | 
30 |     def __str__(self):
31 |         return f"{self.sender.lower()}: {self.contents}"
32 | 
33 | 
34 | class PyrisAIMessage(PyrisMessage):
35 |     model_config = ConfigDict(populate_by_name=True)
36 |     sender: IrisMessageRole = IrisMessageRole.ASSISTANT
37 |     tool_calls: Optional[List[ToolCallDTO]] = Field(alias="toolCalls")
38 | 
39 | 
40 | class PyrisToolMessage(PyrisMessage):
41 |     model_config = ConfigDict(populate_by_name=True)
42 |     sender: IrisMessageRole = IrisMessageRole.TOOL
43 |     contents: List[ToolMessageContentDTO] = Field(default=[])
44 | 


--------------------------------------------------------------------------------
/app/llm/langchain/iris_langchain_completion_model.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional, Any
 2 | 
 3 | from langchain_core.callbacks import CallbackManagerForLLMRun
 4 | from langchain_core.language_models.llms import BaseLLM
 5 | from langchain_core.outputs import LLMResult
 6 | from langchain_core.outputs.generation import Generation
 7 | 
 8 | from ...llm import RequestHandler, CompletionArguments
 9 | 
10 | 
11 | class IrisLangchainCompletionModel(BaseLLM):
12 |     """Custom langchain chat model for our own request handler"""
13 | 
14 |     request_handler: RequestHandler
15 |     max_tokens: Optional[int] = None
16 | 
17 |     def __init__(self, request_handler: RequestHandler, **kwargs: Any) -> None:
18 |         super().__init__(request_handler=request_handler, **kwargs)
19 | 
20 |     def _generate(
21 |         self,
22 |         prompts: List[str],
23 |         stop: Optional[List[str]] = None,
24 |         run_manager: Optional[CallbackManagerForLLMRun] = None,
25 |         **kwargs: Any,
26 |     ) -> LLMResult:
27 |         generations = []
28 |         args = CompletionArguments(stop=stop, temperature=0.0)
29 |         if self.max_tokens:
30 |             args.max_tokens = self.max_tokens
31 |         for prompt in prompts:
32 |             completion = self.request_handler.complete(prompt=prompt, arguments=args)
33 |             generations.append([Generation(text=completion.choices[0].text)])
34 |         return LLMResult(generations=generations)
35 | 
36 |     @property
37 |     def _llm_type(self) -> str:
38 |         return "Iris"
39 | 


--------------------------------------------------------------------------------
/docker/nginx.yml:
--------------------------------------------------------------------------------
 1 | # ----------------------------------------------------------------------------------------------------------------------
 2 | # Nginx base service
 3 | # ----------------------------------------------------------------------------------------------------------------------
 4 | 
 5 | services:
 6 |     nginx:
 7 |         container_name: pyris-nginx
 8 |         image: nginx:1.23
 9 |         pull_policy: always
10 |         volumes:
11 |             - ./nginx/timeouts.conf:/etc/nginx/conf.d/timeouts.conf:ro
12 |             - ./nginx/pyris-nginx.conf:/etc/nginx/conf.d/pyris-nginx.conf:ro
13 |             - ./nginx/pyris-server.conf:/etc/nginx/includes/pyris-server.conf:ro
14 |             - ./nginx/dhparam.pem:/etc/nginx/dhparam.pem:ro
15 |             - ./nginx/nginx_502.html:/usr/share/nginx/html/502.html:ro
16 |             - ./nginx/70-pyris-setup.sh:/docker-entrypoint.d/70-pyris-setup.sh
17 |             - ./nginx/certs/pyris-nginx+4.pem:/certs/fullchain.pem:ro
18 |             - ./nginx/certs/pyris-nginx+4-key.pem:/certs/priv_key.pem:ro
19 |         ports:
20 |             - "80:80"
21 |             - "443:443"
22 |         # expose the port to make it reachable docker internally even if the external port mapping changes
23 |         expose:
24 |             - "80"
25 |             - "443"
26 |         healthcheck:
27 |             test: service nginx status || exit 1
28 |             start_period: 60s
29 |         networks:
30 |             - pyris
31 | 
32 | networks:
33 |     pyris:
34 |         driver: "bridge"
35 |         name: pyris


--------------------------------------------------------------------------------
/app/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | from pydantic import BaseModel
 4 | import yaml
 5 | 
 6 | 
 7 | class APIKeyConfig(BaseModel):
 8 |     token: str
 9 | 
10 | 
11 | class WeaviateSettings(BaseModel):
12 |     host: str
13 |     port: int
14 |     grpc_port: int
15 | 
16 | 
17 | class Settings(BaseModel):
18 |     api_keys: list[APIKeyConfig]
19 |     env_vars: dict[str, str]
20 |     weaviate: WeaviateSettings
21 | 
22 |     @classmethod
23 |     def get_settings(cls):
24 |         """Get the settings from the configuration file."""
25 |         file_path_env = os.environ.get("APPLICATION_YML_PATH")
26 |         if not file_path_env:
27 |             raise EnvironmentError(
28 |                 "APPLICATION_YML_PATH environment variable is not set."
29 |             )
30 | 
31 |         file_path = Path(file_path_env)
32 |         try:
33 |             with open(file_path, "r") as file:
34 |                 settings_file = yaml.safe_load(file)
35 |             return cls.model_validate(settings_file)
36 |         except FileNotFoundError as e:
37 |             raise FileNotFoundError(
38 |                 f"Configuration file not found at {file_path}."
39 |             ) from e
40 |         except yaml.YAMLError as e:
41 |             raise yaml.YAMLError(f"Error parsing YAML file at {file_path}.") from e
42 | 
43 |     def set_env_vars(self):
44 |         """Set environment variables from the settings."""
45 |         for key, value in self.env_vars.items():
46 |             os.environ[key] = value
47 | 
48 | 
49 | settings = Settings.get_settings()
50 | 


--------------------------------------------------------------------------------
/docker/nginx/pyris-nginx.conf:
--------------------------------------------------------------------------------
 1 | # Load balancing
 2 | upstream pyris {
 3 |     server pyris-app:8000;
 4 | }
 5 | 
 6 | # Remove nginx version from HTTP response
 7 | server_tokens off;
 8 | 
 9 | # Rate limit for the login REST call, at most one requests per two seconds
10 | limit_req_zone $binary_remote_addr zone=loginlimit:10m rate=30r/m;
11 | 
12 | server {
13 |     listen 80 default_server;
14 |     listen [::]:80 default_server;
15 |     server_name _;
16 | 
17 |     return 301 https://$host$request_uri;
18 | }
19 | 
20 | server {
21 |     listen 443 ssl http2;
22 |     listen [::]:443 ssl http2;
23 |     server_name _;
24 | 
25 |     ssl_certificate     /certs/fullchain.pem;
26 |     ssl_certificate_key /certs/priv_key.pem;
27 |     ssl_protocols       TLSv1.2 TLSv1.3;
28 |     # TODO: dynamic dh param generation not needed here? Otherwise have to generate them somehow if not available at container entrypoint
29 |     ssl_dhparam /etc/nginx/dhparam.pem;
30 |     ssl_prefer_server_ciphers   on;
31 |     ssl_ciphers 'ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256';
32 |     ssl_ecdh_curve secp384r1;
33 |     ssl_session_timeout  10m;
34 |     ssl_session_cache shared:SSL:10m;
35 |     ssl_session_tickets off;
36 |     ssl_stapling on;
37 |     ssl_stapling_verify on;
38 | #   ssl_early_data on;
39 | 
40 |     include includes/pyris-server.conf;
41 | }
42 | 


--------------------------------------------------------------------------------
/app/retrieval/faq_retrieval_utils.py:
--------------------------------------------------------------------------------
 1 | from weaviate.collections.classes.filters import Filter
 2 | from app.vector_database.database import VectorDatabase
 3 | from app.vector_database.faq_schema import FaqSchema
 4 | 
 5 | 
 6 | def should_allow_faq_tool(db: VectorDatabase, course_id: int) -> bool:
 7 |     """
 8 |     Checks if there are indexed faqs for the given course
 9 | 
10 |     :param db: The vector database on which the faqs are indexed
11 |     :param course_id: The course ID
12 |     :return: True if there are indexed faqs for the course, False otherwise
13 |     """
14 |     if course_id:
15 |         # Fetch the first object that matches the course ID with the language property
16 |         result = db.faqs.query.fetch_objects(
17 |             filters=Filter.by_property(FaqSchema.COURSE_ID.value).equal(course_id),
18 |             limit=1,
19 |             return_properties=[FaqSchema.COURSE_NAME.value],
20 |         )
21 |         return len(result.objects) > 0
22 |     return False
23 | 
24 | 
25 | def format_faqs(retrieved_faqs):
26 |     """
27 |     Formatiert die abgerufenen FAQs in einen String.
28 | 
29 |     :param retrieved_faqs: Liste der abgerufenen FAQs
30 |     :return: Formatierter String mit den FAQ-Daten
31 |     """
32 |     result = ""
33 |     for faq in retrieved_faqs:
34 |         res = "[FAQ ID: {}, FAQ Question: {}, FAQ Answer: {}]".format(
35 |             faq.get(FaqSchema.FAQ_ID.value),
36 |             faq.get(FaqSchema.QUESTION_TITLE.value),
37 |             faq.get(FaqSchema.QUESTION_ANSWER.value),
38 |         )
39 |         result += res
40 |     return result
41 | 


--------------------------------------------------------------------------------
/docker/nginx/certs/pyris-nginx+4.pem:
--------------------------------------------------------------------------------
 1 | -----BEGIN CERTIFICATE-----
 2 | MIIERjCCAq6gAwIBAgIQSQ2vfdquHAQcrzbEKx46mzANBgkqhkiG9w0BAQsFADBf
 3 | MR4wHAYDVQQKExVta2NlcnQgZGV2ZWxvcG1lbnQgQ0ExGjAYBgNVBAsMEXJvb3RA
 4 | MWY0ZmQzNzYzNmMyMSEwHwYDVQQDDBhta2NlcnQgcm9vdEAxZjRmZDM3NjM2YzIw
 5 | HhcNMjIxMjA1MDk0NTEzWhcNMjUwMzA1MDk0NTEzWjBFMScwJQYDVQQKEx5ta2Nl
 6 | cnQgZGV2ZWxvcG1lbnQgY2VydGlmaWNhdGUxGjAYBgNVBAsMEXJvb3RAMWY0ZmQz
 7 | NzYzNmMyMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArgiUh0rUkHdN
 8 | n4/iJUBXrO6VVHGL9q9KpuaonBsD3CkH9f55rMupOFoRPR0FfVTvS0OHaEjafTbm
 9 | Fz/yUKnWC2WGEL8R0vn0e6HyD44q7K20GAJBuReopjPB+zJJLnhtxxqmig+eDwZg
10 | uAYASQxVohfI0jUmDSjhNi+UmmBv4NyyyJFgZYQUB4K6a5IqOKb+WOOPp3/oRlpE
11 | o25EaNvj5iLDZkK6QwdqNDu7qMrMGu9RxPeu5/Ste0lmex4uJD+gRlFp+uT+SQdd
12 | GupM9MvNfN1M8/fW8fIc8Sz4OlTYiWUPnR+DNSZ+pMrCosleMduCbDPbOrflYZZ/
13 | RCl+ZLaLHQIDAQABo4GXMIGUMA4GA1UdDwEB/wQEAwIFoDATBgNVHSUEDDAKBggr
14 | BgEFBQcDATAfBgNVHSMEGDAWgBSpuKALkiwfLnQmm7+JNG2bxGAIgzBMBgNVHREE
15 | RTBDgg1hcnRlbWlzLW5naW54gg9hcnRlbWlzLmV4YW1wbGWCCWxvY2FsaG9zdIcE
16 | fwAAAYcQAAAAAAAAAAAAAAAAAAAAATANBgkqhkiG9w0BAQsFAAOCAYEApG8ZADQe
17 | SsH/nqH9WpR3ZkYg0rm8pw+YquBNUdDFG2/4IQtaaxrgsvNPrEEMXfCO4vvnC0cH
18 | 6Tgay8LzFZxU9D1F06VZ9S1C7KNnYSsjgwhW7wxem1JXgauoutA8D0uHLr/2bVnz
19 | rTShQT7gRp9SRunqDylaSkgpXlfZQRlEANrYT8Jh6LIHRjkxLh/etw7VdFA6Tywh
20 | iQGBE/EbQcGpmqHBoMytblku0D8H+pcFHZ03AZq0FTMbByM9GekQ8HJV88epqvqJ
21 | 7pWyQPX9lr7yC6n121dPoA0ylP8D7jIBCmlFeF+QWCiRAgdeb1w+JONHMgI97IR+
22 | 9HBm6gGE+Da/TRq82w02tUN/F7NHdzqwKGx/GKLrEsdNlfP6D9iiVtfBGBoAUm+C
23 | 2t3jbQEgqYHA+mzadS75RGJsRnVdY24IHvNjEnESW6KCaSfQyMmp3trRH6JeOttU
24 | 2JeqRPjmOzNvzIcB76w1/hB2ljhimyfoxB8Gbrts+GFPRZE+AXg1mvCn
25 | -----END CERTIFICATE-----
26 | 


--------------------------------------------------------------------------------
/app/llm/external/openai_completion.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal, Any
 2 | from openai import OpenAI
 3 | from openai.lib.azure import AzureOpenAI
 4 | 
 5 | from ...llm import CompletionArguments
 6 | from ...llm.external.model import CompletionModel
 7 | 
 8 | 
 9 | class OpenAICompletionModel(CompletionModel):
10 |     model: str
11 |     api_key: str
12 |     _client: OpenAI
13 | 
14 |     def complete(self, prompt: str, arguments: CompletionArguments) -> any:
15 |         response = self._client.completions.create(
16 |             model=self.model,
17 |             prompt=prompt,
18 |             temperature=arguments.temperature,
19 |             max_tokens=arguments.max_tokens,
20 |             stop=arguments.stop,
21 |         )
22 |         return response
23 | 
24 | 
25 | class DirectOpenAICompletionModel(OpenAICompletionModel):
26 |     type: Literal["openai_completion"]
27 | 
28 |     def model_post_init(self, __context: Any) -> None:
29 |         self._client = OpenAI(api_key=self.api_key)
30 | 
31 |     def __str__(self):
32 |         return f"OpenAICompletion('{self.model}')"
33 | 
34 | 
35 | class AzureOpenAICompletionModel(OpenAICompletionModel):
36 |     type: Literal["azure_completion"]
37 |     endpoint: str
38 |     azure_deployment: str
39 |     api_version: str
40 | 
41 |     def model_post_init(self, __context: Any) -> None:
42 |         self._client = AzureOpenAI(
43 |             azure_endpoint=self.endpoint,
44 |             azure_deployment=self.azure_deployment,
45 |             api_version=self.api_version,
46 |             api_key=self.api_key,
47 |         )
48 | 
49 |     def __str__(self):
50 |         return f"AzureCompletion('{self.model}')"
51 | 


--------------------------------------------------------------------------------
/app/web/status/ingestion_status_callback.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from .status_update import StatusCallback
 4 | from ...domain.ingestion.ingestion_status_update_dto import IngestionStatusUpdateDTO
 5 | from ...domain.status.stage_state_dto import StageStateEnum
 6 | from ...domain.status.stage_dto import StageDTO
 7 | import logging
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | class IngestionStatusCallback(StatusCallback):
13 |     """
14 |     Callback class for updating the status of a Lecture ingestion Pipeline run.
15 |     """
16 | 
17 |     def __init__(
18 |         self,
19 |         run_id: str,
20 |         base_url: str,
21 |         initial_stages: List[StageDTO] = None,
22 |         lecture_unit_id: int = None,
23 |     ):
24 |         url = f"{base_url}/api/public/pyris/webhooks/ingestion/runs/{run_id}/status"
25 | 
26 |         current_stage_index = len(initial_stages) if initial_stages else 0
27 |         stages = initial_stages or []
28 |         stages += [
29 |             StageDTO(
30 |                 weight=10, state=StageStateEnum.NOT_STARTED, name="Old slides removal"
31 |             ),
32 |             StageDTO(
33 |                 weight=60,
34 |                 state=StageStateEnum.NOT_STARTED,
35 |                 name="Slides Interpretation",
36 |             ),
37 |             StageDTO(
38 |                 weight=30,
39 |                 state=StageStateEnum.NOT_STARTED,
40 |                 name="Slides ingestion",
41 |             ),
42 |         ]
43 |         status = IngestionStatusUpdateDTO(stages=stages, id=lecture_unit_id)
44 |         stage = stages[current_stage_index]
45 |         super().__init__(url, run_id, status, stage, current_stage_index)
46 | 


--------------------------------------------------------------------------------
/app/web/status/faq_ingestion_status_callback.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from .status_update import StatusCallback
 4 | from ...domain.ingestion.ingestion_status_update_dto import IngestionStatusUpdateDTO
 5 | from ...domain.status.stage_state_dto import StageStateEnum
 6 | from ...domain.status.stage_dto import StageDTO
 7 | import logging
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | class FaqIngestionStatus(StatusCallback):
13 |     """
14 |     Callback class for updating the status of a Faq ingestion Pipeline run.
15 |     """
16 | 
17 |     def __init__(
18 |         self,
19 |         run_id: str,
20 |         base_url: str,
21 |         initial_stages: List[StageDTO] = None,
22 |         faq_id: int = None,
23 |     ):
24 |         url = (
25 |             f"{base_url}/api/public/pyris/webhooks/ingestion/faqs/runs/{run_id}/status"
26 |         )
27 | 
28 |         current_stage_index = len(initial_stages) if initial_stages else 0
29 |         stages = initial_stages or []
30 |         stages += [
31 |             StageDTO(
32 |                 weight=10, state=StageStateEnum.NOT_STARTED, name="Old faq removal"
33 |             ),
34 |             StageDTO(
35 |                 weight=30,
36 |                 state=StageStateEnum.NOT_STARTED,
37 |                 name="Faq Interpretation",
38 |             ),
39 |             StageDTO(
40 |                 weight=60,
41 |                 state=StageStateEnum.NOT_STARTED,
42 |                 name="Faq ingestion",
43 |             ),
44 |         ]
45 |         status = IngestionStatusUpdateDTO(stages=stages, id=faq_id)
46 |         stage = stages[current_stage_index]
47 |         super().__init__(url, run_id, status, stage, current_stage_index)
48 | 


--------------------------------------------------------------------------------
/docker/pyris-production.yml:
--------------------------------------------------------------------------------
 1 | # ----------------------------------------------------------------------------------------------------------------------
 2 | # Setup for a Pyris production server.
 3 | # ----------------------------------------------------------------------------------------------------------------------
 4 | # It is designed to take in a lot of environment variables to take in all the configuration of the deployment.
 5 | # ----------------------------------------------------------------------------------------------------------------------
 6 | 
 7 | services:
 8 |   pyris-app:
 9 |     extends:
10 |       file: ./pyris.yml
11 |       service: pyris-app
12 |     image: ghcr.io/ls1intum/pyris:${PYRIS_DOCKER_TAG:-latest}
13 |     pull_policy: always
14 |     restart: unless-stopped
15 |     volumes:
16 |       - ${PYRIS_APPLICATION_YML_FILE}:/config/application.yml:ro
17 |       - ${PYRIS_LLM_CONFIG_YML_FILE}:/config/llm_config.yml:ro
18 |     networks:
19 |       - pyris
20 | 
21 |   nginx:
22 |     extends:
23 |       file: ./nginx.yml
24 |       service: nginx
25 |     restart: always
26 |     depends_on:
27 |       pyris-app:
28 |         condition: service_started
29 |     volumes:
30 |       - type: bind
31 |         source: ${NGINX_PROXY_SSL_CERTIFICATE_PATH:-./nginx/certs/pyris-nginx+4.pem}
32 |         target: "/certs/fullchain.pem"
33 |       - type: bind
34 |         source: ${NGINX_PROXY_SSL_CERTIFICATE_KEY_PATH:-./nginx/certs/pyris-nginx+4-key.pem}
35 |         target: "/certs/priv_key.pem"
36 |     networks:
37 |       - pyris
38 | 
39 |   weaviate:
40 |     extends:
41 |       file: ./weaviate.yml
42 |       service: weaviate
43 |     networks:
44 |       - pyris
45 | 
46 | networks:
47 |   pyris:
48 |     driver: "bridge"
49 |     name: pyris


--------------------------------------------------------------------------------
/app/domain/data/extended_course_dto.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, List
 2 | from datetime import datetime
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | from app.domain.data.competency_dto import CompetencyDTO
 6 | from app.domain.data.exam_dto import ExamDTO
 7 | from app.domain.data.exercise_with_submissions_dto import ExerciseWithSubmissionsDTO
 8 | from app.domain.data.programming_exercise_dto import ProgrammingLanguage
 9 | 
10 | 
11 | class ExtendedCourseDTO(BaseModel):
12 |     id: int = Field(alias="id")
13 |     name: str = Field(alias="name", default=None)
14 |     description: Optional[str] = Field(alias="description", default=None)
15 |     start_time: Optional[datetime] = Field(alias="startTime", default=None)
16 |     end_time: Optional[datetime] = Field(alias="endTime", default=None)
17 |     default_programming_language: Optional[ProgrammingLanguage] = Field(
18 |         alias="defaultProgrammingLanguage", default=None
19 |     )
20 |     max_complaints: Optional[int] = Field(alias="maxComplaints", default=None)
21 |     max_team_complaints: Optional[int] = Field(alias="maxTeamComplaints", default=None)
22 |     max_complaint_time_days: Optional[int] = Field(
23 |         alias="maxComplaintTimeDays", default=None
24 |     )
25 |     max_request_more_feedback_time_days: Optional[int] = Field(
26 |         alias="maxRequestMoreFeedbackTimeDays", default=None
27 |     )
28 |     max_points: Optional[int] = Field(alias="maxPoints", default=None)
29 |     presentation_score: Optional[int] = Field(alias="presentationScore", default=None)
30 |     exercises: List[ExerciseWithSubmissionsDTO] = Field(alias="exercises", default=[])
31 |     exams: List[ExamDTO] = Field(alias="exams", default=[])
32 |     competencies: List[CompetencyDTO] = Field(alias="competencies", default=[])
33 | 


--------------------------------------------------------------------------------
/app/pipeline/prompts/faq_rewriting.py:
--------------------------------------------------------------------------------
 1 | system_prompt_faq = """\
 2 | :You are an excellent tutor with expertise in computer science and its practical applications, teaching at a university
 3 | level. Your task is to proofread and enhance the given FAQ text. Please follow these guidelines:
 4 | 
 5 | 1. Correct all spelling and grammatical errors.
 6 | 2. Ensure the text is written in simple and clear language, making it easy to understand for students.
 7 | 3. Preserve the original meaning and intent of the text while maintaining clarity.
 8 | 4. Ensure that the response is always written in complete sentences. If you are given a list of bullet points, \
 9 | convert them into complete sentences.
10 | 5. Make sure to use the original language of the input text.
11 | 6. Avoid repeating any information that is already present in the text.
12 | 7. Make sure to keep the markdown formatting intact and add formatting for the most important information.
13 | 8. If someone does input a very short text, that does not resemble to be an answer to a potential question please make.
14 | sure to respond accordingly. Also, if the input text is too short, please point this out.
15 | 
16 | Additionally for Short Inputs: If the input text is too short and does not resemble an answer to a potential question, \
17 | respond appropriately and point this out.
18 | Your output will be used as an answer to a frequently asked question (FAQ) on the Artemis platform.
19 | Ensure it is clear, concise, and well-structured.
20 | 
21 | Exclude the start and end markers from your response and provide only the improved content.
22 | 
23 | The markers are defined as following:
24 | Start of the text: ###START###
25 | End of the text: ###END###
26 | 
27 | The text that has to be rewritten starts now:
28 | 
29 | ###START###
30 | {rewritten_text}
31 | ###END###\
32 | """
33 | 


--------------------------------------------------------------------------------
/docker/nginx/certs/pyris-nginx+4-key.pem:
--------------------------------------------------------------------------------
 1 | -----BEGIN PRIVATE KEY-----
 2 | MIIEwAIBADANBgkqhkiG9w0BAQEFAASCBKowggSmAgEAAoIBAQCuCJSHStSQd02f
 3 | j+IlQFes7pVUcYv2r0qm5qicGwPcKQf1/nmsy6k4WhE9HQV9VO9LQ4doSNp9NuYX
 4 | P/JQqdYLZYYQvxHS+fR7ofIPjirsrbQYAkG5F6imM8H7MkkueG3HGqaKD54PBmC4
 5 | BgBJDFWiF8jSNSYNKOE2L5SaYG/g3LLIkWBlhBQHgrprkio4pv5Y44+nf+hGWkSj
 6 | bkRo2+PmIsNmQrpDB2o0O7uoyswa71HE967n9K17SWZ7Hi4kP6BGUWn65P5JB10a
 7 | 6kz0y8183Uzz99bx8hzxLPg6VNiJZQ+dH4M1Jn6kysKiyV4x24JsM9s6t+Vhln9E
 8 | KX5ktosdAgMBAAECggEBAJs3ddkwqWLrtOSR/H2C5G+NHsyAtPdgIfG3mTwZcBjk
 9 | 03/X5gdyYUusMOHTx3ifzwjOgq9FAvFYjGDCHMlKoGfrtWWsNCZ53k6CApVTE/+h
10 | cRVUte9yJW2Ojf0PPWvf5vEEWPKbuTnnU03ttEVyZdG66tZoprZn9m1QhHYnesEO
11 | PMPvYMd3Oyko8MD/Rr1A/KS/rmc0yfUvgLsqF6PLxq3NKxyVD/8Tp4u9aXbPMnd2
12 | vugVxjjvt5ubscF1Owi8EjqjVkXlw94JzLcy70XfBzsS2EvUtX/hmHgBEsViXUOQ
13 | KGVyeFTvuReq0RvLQi1LA8vs2q6UC0ZYX75wGDfWWnUCgYEAyP6FY6xdP+N83qEM
14 | TzAf2a33bBCcD5zbrfsvYwHwdzcAz9HBdf3TN1ZcbgfIzIWvuo+hFdjZd32E2+b7
15 | tSGpcs21iZ3dn1aWxngNs/h94h6cNak/02iCbOsmMX9rHfKZd1ODnQyA8q0s9PQY
16 | uWWWMUfqPse7mSYbgU0aYOVFraMCgYEA3ak3N2mTgTVsUqhNyZCJlmtafp0tsT6b
17 | /7GKSqkl741wokM6un3wx1eo6Q95mngxOlY2xxq9OChnNSEa9ZQnzdUDtQ0YE4QD
18 | 09awTIMHNCeSqpV2n3Yv2fT3C5Ya5/WEtYGpVAtqgxwWPij8+VMOa8MVzy+/v6Hg
19 | N1Tpww+Y8D8CgYEAhbEGeK4FuKFQRaVJ0sJn7RrSIIdLxvbHCIqzkl+P2zwyxgj3
20 | bcxP2dcP1ABJiADESouO0kFTJS/QV5TkiC7DzyEVR1xCNeIamBjyxGrdELLbpLXX
21 | Rn+VgW1IElR2o4zil4RtXuEaRFD8PlK+v1La/ByhqvCfz9aRJQhsK1dVaZECgYEA
22 | jRYR0TFf89P/OLVrnapkCNwX45ND7Bc/0AY/UbpMLSfH02AbV2yl/xvqpT12Vz29
23 | h7Ysc5qvabk9x/FkaX99vmOhUnIdKv7SONnjqS+VPDsb/XvY3zKozoA/Zp6KTa5W
24 | Y/k9wALsLruH5NTOABw/h5PKo+9uixkLz+w6Ri/9Vp0CgYEAqfkZJe7vCOIwtIwj
25 | Mq5knkJgR+Vq30i4jRoFU0yxIcWA1hODVBnK39+mtA++/3+r5DY5fGRTc9mMyXU/
26 | y2N2nfSnvPMAUaRmisB7NhmvinEgymlrX+WE+7S9/+nOQADxzWSc6Hxg/ub6mTYV
27 | k2/hv9uG1gbm2+OBP/EBOr48jz0=
28 | -----END PRIVATE KEY-----
29 | 


--------------------------------------------------------------------------------
/app/domain/data/competency_dto.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from enum import Enum
 3 | from typing import Optional, List
 4 | 
 5 | from pydantic import BaseModel, Field
 6 | from pydantic.v1 import validator
 7 | 
 8 | 
 9 | class CompetencyTaxonomy(str, Enum):
10 |     REMEMBER = "REMEMBER"
11 |     UNDERSTAND = "UNDERSTAND"
12 |     APPLY = "APPLY"
13 |     ANALYZE = "ANALYZE"
14 |     EVALUATE = "EVALUATE"
15 |     CREATE = "CREATE"
16 | 
17 | 
18 | class CompetencyDTO(BaseModel):
19 |     id: Optional[int] = None
20 |     title: Optional[str] = None
21 |     description: Optional[str] = None
22 |     taxonomy: Optional[CompetencyTaxonomy] = None
23 |     soft_due_date: Optional[datetime] = Field(default=None, alias="softDueDate")
24 |     optional: Optional[bool] = None
25 |     exercise_list: Optional[List[int]] = Field(default=[], alias="exerciseList")
26 | 
27 | 
28 | class Competency(BaseModel):
29 |     title: str = Field(
30 |         description="Title of the competency that contains no more than 4 words",
31 |     )
32 |     description: str = Field(
33 |         description="Description of the competency as plain string. DO NOT RETURN A LIST OF STRINGS."
34 |     )
35 |     taxonomy: CompetencyTaxonomy = Field(
36 |         description="Selected taxonomy based on bloom's taxonomy"
37 |     )
38 | 
39 |     @validator("title")
40 |     def validate_title(cls, field):
41 |         """Validate the subject of the competency."""
42 |         if len(field.split()) > 4:
43 |             raise ValueError("Title must contain no more than 4 words")
44 |         return field
45 | 
46 |     @validator("taxonomy")
47 |     def validate_selected_taxonomy(cls, field):
48 |         """Validate the selected taxonomy."""
49 |         if field not in CompetencyTaxonomy.__members__:
50 |             raise ValueError(f"Invalid taxonomy: {field}")
51 |         return field
52 | 


--------------------------------------------------------------------------------
/app/llm/external/openai_dalle.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | from typing import List, Literal
 3 | 
 4 | import requests
 5 | 
 6 | from app.domain.data.image_message_content_dto import ImageMessageContentDTO
 7 | 
 8 | 
 9 | def generate_images(
10 |     self,
11 |     prompt: str,
12 |     n: int = 1,
13 |     size: Literal[
14 |         "256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"
15 |     ] = "256x256",
16 |     quality: Literal["standard", "hd"] = "standard",
17 |     **kwargs,
18 | ) -> List[ImageMessageContentDTO]:
19 |     """
20 |     Generate images from the prompt.
21 |     """
22 |     try:
23 |         response = self._client.images.generate(
24 |             model=self.model,
25 |             prompt=prompt,
26 |             size=size,
27 |             quality=quality,
28 |             n=n,
29 |             response_format="url",
30 |             **kwargs,
31 |         )
32 |     except Exception as e:
33 |         print(f"Failed to generate images: {e}")
34 |         return []
35 | 
36 |     images = response.data
37 |     iris_images = []
38 |     for image in images:
39 |         revised_prompt = (
40 |             prompt if image.revised_prompt is None else image.revised_prompt
41 |         )
42 |         base64_data = image.b64_json
43 |         if base64_data is None:
44 |             try:
45 |                 image_response = requests.get(image.url)
46 |                 image_response.raise_for_status()
47 |                 base64_data = base64.b64encode(image_response.content).decode("utf-8")
48 |             except requests.RequestException as e:
49 |                 print(f"Failed to download or encode image: {e}")
50 |                 continue
51 | 
52 |         iris_images.append(
53 |             ImageMessageContentDTO(
54 |                 prompt=revised_prompt,
55 |                 base64=base64_data,
56 |             )
57 |         )
58 | 
59 |     return iris_images
60 | 


--------------------------------------------------------------------------------
/app/pipeline/prompts/competency_extraction.py:
--------------------------------------------------------------------------------
 1 | system_prompt = """
 2 | You are an expert in all topics of computer science and its practical applications.
 3 | Your task consists of three parts:
 4 | 1. Read the provided curriculum description a university course.
 5 | 2. Extract all learning goals ("competencies") from the course description.
 6 | 
 7 | Each competency must contain the following fields:
 8 | 
 9 | - title:
10 | The title of the competency, which is a specific topic or skill. This should be a short phrase of at most 4 words.
11 | 
12 | - description:
13 | A detailed description of the competency in 2 to 5 bullet points.
14 | Each bullet point illustrates a specific skill or concept of the competency.
15 | Each bullet point is a complete sentence containing at most 15 words.
16 | Each bullet point is on a new line and starts with "- ".
17 | 
18 | - taxonomy:
19 | The classification of the competency within Bloom's taxonomy.
20 | You must choose from these options in Bloom's taxonomy: {taxonomy_list}
21 | 
22 | All competencies must meet the following requirements:
23 | 
24 | - is mentioned in the course description.
25 | - corresponds to exactly one subject or skill covered in the course description.
26 | - is assigned to exactly one level of Bloom's taxonomy.
27 | - is small and fine-grained. Large topics should be broken down into smaller competencies.
28 | - does not overlap with other competencies: each competency is unique. Expanding on a previous competency is allowed.
29 | 
30 | Here is the provided course description: {course_description}
31 | 
32 | Here is a template competency in JSON format:
33 | 
34 | {{
35 |     "title": "Competency Title",
36 |     "description": "- You understand this.\n- You are proficient in doing that.\n- You know how to do this.",
37 |     "taxonomy": "ANALYZE"
38 | }}
39 | 
40 | {current_competencies}
41 | 
42 | Respond with 0 to {max_n} competencies extracted from the course description,
43 | each in JSON format, split by two newlines.
44 | """
45 | 


--------------------------------------------------------------------------------
/app/domain/data/exercise_with_submissions_dto.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from enum import Enum
 3 | from typing import Optional, List
 4 | 
 5 | from pydantic import BaseModel, Field
 6 | 
 7 | from app.domain.data.simple_submission_dto import SimpleSubmissionDTO
 8 | 
 9 | 
10 | class ExerciseType(str, Enum):
11 |     PROGRAMMING = "PROGRAMMING"
12 |     QUIZ = "QUIZ"
13 |     MODELING = "MODELING"
14 |     TEXT = "TEXT"
15 |     FILE_UPLOAD = "FILE_UPLOAD"
16 | 
17 | 
18 | class ExerciseMode(str, Enum):
19 |     INDIVIDUAL = "INDIVIDUAL"
20 |     TEAM = "TEAM"
21 | 
22 | 
23 | class DifficultyLevel(str, Enum):
24 |     EASY = "EASY"
25 |     MEDIUM = "MEDIUM"
26 |     HARD = "HARD"
27 | 
28 | 
29 | class IncludedInOverallScore(str, Enum):
30 |     INCLUDED_COMPLETELY = "INCLUDED_COMPLETELY"
31 |     INCLUDED_AS_BONUS = "INCLUDED_AS_BONUS"
32 |     NOT_INCLUDED = "NOT_INCLUDED"
33 | 
34 | 
35 | class ExerciseWithSubmissionsDTO(BaseModel):
36 |     id: int = Field(alias="id")
37 |     url: Optional[str] = Field(alias="url", default=None)
38 |     title: str = Field(alias="title")
39 |     type: ExerciseType = Field(alias="type")
40 |     mode: ExerciseMode = Field(alias="mode")
41 |     max_points: Optional[float] = Field(alias="maxPoints", default=None)
42 |     bonus_points: Optional[float] = Field(alias="bonusPoints", default=None)
43 |     difficulty_level: Optional[DifficultyLevel] = Field(
44 |         alias="difficultyLevel", default=None
45 |     )
46 |     release_date: Optional[datetime] = Field(alias="releaseDate", default=None)
47 |     due_date: Optional[datetime] = Field(alias="dueDate", default=None)
48 |     inclusion_mode: Optional[IncludedInOverallScore] = Field(
49 |         alias="inclusionMode", default=None
50 |     )
51 |     presentation_score_enabled: Optional[bool] = Field(
52 |         alias="presentationScoreEnabled", default=None
53 |     )
54 |     submissions: List[SimpleSubmissionDTO] = Field(default=[])
55 | 
56 |     class Config:
57 |         require_by_default = False
58 | 


--------------------------------------------------------------------------------
/app/llm/llm_manager.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Annotated
 3 | 
 4 | from pydantic import BaseModel, Discriminator
 5 | 
 6 | import yaml
 7 | 
 8 | from ..common import Singleton
 9 | from ..llm.capability import RequirementList
10 | from ..llm.capability.capability_checker import (
11 |     calculate_capability_scores,
12 |     capabilities_fulfill_requirements,
13 | )
14 | from ..llm.external import LanguageModel, AnyLLM
15 | 
16 | 
17 | # Small workaround to get pydantic discriminators working
18 | class LlmList(BaseModel):
19 |     llms: list[Annotated[AnyLLM, Discriminator("type")]]
20 | 
21 | 
22 | class LlmManager(metaclass=Singleton):
23 |     entries: list[LanguageModel]
24 | 
25 |     def __init__(self):
26 |         self.entries = []
27 |         self.load_llms()
28 | 
29 |     def get_llm_by_id(self, llm_id):
30 |         for llm in self.entries:
31 |             if llm.id == llm_id:
32 |                 return llm
33 | 
34 |     def load_llms(self):
35 |         """Load the llms from the config file"""
36 |         path = os.environ.get("LLM_CONFIG_PATH")
37 |         if not path:
38 |             raise Exception("LLM_CONFIG_PATH not set")
39 | 
40 |         with open(path, "r") as file:
41 |             loaded_llms = yaml.safe_load(file)
42 | 
43 |         self.entries = LlmList.model_validate({"llms": loaded_llms}).llms
44 | 
45 |     def get_llms_sorted_by_capabilities_score(
46 |         self, requirements: RequirementList, invert_cost: bool = False
47 |     ):
48 |         valid_llms = [
49 |             llm
50 |             for llm in self.entries
51 |             if capabilities_fulfill_requirements(llm.capabilities, requirements)
52 |         ]
53 |         """Get the llms sorted by their capability to requirement scores"""
54 |         scores = calculate_capability_scores(
55 |             [llm.capabilities for llm in valid_llms], requirements, invert_cost
56 |         )
57 |         sorted_llms = sorted(zip(scores, valid_llms), key=lambda pair: -pair[0])
58 |         return [llm for _, llm in sorted_llms]
59 | 


--------------------------------------------------------------------------------
/app/pipeline/prompts/code_feedback_prompt.txt:
--------------------------------------------------------------------------------
 1 | Exercise Problem Statement:
 2 | {problem_statement}
 3 | 
 4 | Chat History:
 5 | {chat_history}
 6 | 
 7 | User question:
 8 | {question}
 9 | 
10 | Feedbacks (from automated tests):
11 | {feedbacks}
12 | 
13 | Build Log:
14 | {build_log}
15 | 
16 | Here are the all files (minified version - the real code the student sees is properly formatted):
17 | {files}
18 | 
19 | You are an assistant to a tutor who is helping a student with their programming homework.
20 | You have access to the chat history, the student's question, the feedbacks from automated tests, and the files the student has uploaded.
21 | Your task is to help the tutor answer the student's question. The tutor can not read all the files, so you need to summarize what the student is doing wrong.
22 | The tutor will use this information to help the student.
23 | Formulate the feedback as hints to the tutor, so they can guide the student in the right direction.
24 | It should contain an instruction what to explain to the student; however, it should not contain the entire solution and promote independent thinking.
25 | 
26 | Use the following output format:
27 | 
28 | ---
29 | File: com/a/b/MyClass.java
30 | Class: com.a.b.MyClass
31 | Method: myMethod
32 | Code Snippet:
33 | <code snippet like the student wrote it added here as context>
34 | Issue: The code of the student is doing X wrong. Explain them that they should do Y.
35 | ---
36 | 
37 | You can repeat this output as frequently as needed, for each issue you find.
38 | However, the selection of issues should be based on the student's question, and if needed, the automated feedback.
39 | Be concise and to the point. Return a maximum of 3 issues that are most relevant to the student's question.
40 | It is fine to return 0, 1 or 2 issues as well. Do not use the maximum of 3 issues if not required.
41 | It is possible that the conversation does not require to look at any code, for example, if the question is conceptual or random chatter.
42 | If you want to return no issues at all, return "!NONE!".
43 | 


--------------------------------------------------------------------------------
/docker/nginx/nginx_502.html:
--------------------------------------------------------------------------------
 1 | <!-- ansible-managed -->
 2 | <!DOCTYPE html>
 3 | <html lang="en">
 4 | <title>Pyris Maintenance</title>
 5 | <meta http-equiv="refresh" content="5"/>
 6 | <link href="https://fonts.googleapis.com/css?family=Open+Sans:300,400,700" rel="stylesheet">
 7 | <style>
 8 |     html, body { padding: 0; margin: 0; width: 100%; height: 100%; }
 9 |     * {box-sizing: border-box;}
10 |     body { text-align: center; padding: 0; background: #353d47; color: #fff; font-family: Open Sans; }
11 |     h1 { font-size: 50px; font-weight: 100; text-align: center;}
12 |     body { font-family: Open Sans; font-weight: 100; font-size: 20px; color: #fff; text-align: center; display: -webkit-box; display: -ms-flexbox; display: flex; -webkit-box-pack: center; -ms-flex-pack: center; justify-content: center; -webkit-box-align: center; -ms-flex-align: center; align-items: center;}
13 |     article { display: block; width: 700px; padding: 50px; margin: 0 auto; }
14 |     a { color: #fff; font-weight: bold;}
15 |     a:hover { text-decoration: none; }
16 |     svg { width: 75px; margin-top: 1em; }
17 | </style>
18 | 
19 | <article>
20 |     <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 202.24 202.24"><defs><style>.cls-1{fill:#fff;}</style></defs><title>Asset 3</title><g id="Layer_2" data-name="Layer 2"><g id="Capa_1" data-name="Capa 1"><path class="cls-1" d="M101.12,0A101.12,101.12,0,1,0,202.24,101.12,101.12,101.12,0,0,0,101.12,0ZM159,148.76H43.28a11.57,11.57,0,0,1-10-17.34L91.09,31.16a11.57,11.57,0,0,1,20.06,0L169,131.43a11.57,11.57,0,0,1-10,17.34Z"/><path class="cls-1" d="M101.12,36.93h0L43.27,137.21H159L101.13,36.94Zm0,88.7a7.71,7.71,0,1,1,7.71-7.71A7.71,7.71,0,0,1,101.12,125.63Zm7.71-50.13a7.56,7.56,0,0,1-.11,1.3l-3.8,22.49a3.86,3.86,0,0,1-7.61,0l-3.8-22.49a8,8,0,0,1-.11-1.3,7.71,7.71,0,1,1,15.43,0Z"/></g></g></svg>
21 |     <h1>We&rsquo;ll be back soon!</h1>
22 |     <div>
23 |         <p>We&rsquo;re performing some maintenance at the moment. Sorry for the inconvenience.</p>
24 |         <p>&mdash; Your Pyris Administrators</p>
25 |     </div>
26 | </article>
27 | </html>
28 | 


--------------------------------------------------------------------------------
/app/llm/capability/requirement_list.py:
--------------------------------------------------------------------------------
 1 | class RequirementList:
 2 |     """A class to represent the requirements you want to match against"""
 3 | 
 4 |     # Maximum cost in $ per 1k input tokens
 5 |     input_cost: float | None
 6 |     # Maximum cost in $ per 1k output tokens
 7 |     output_cost: float | None
 8 |     # The minimum GPT version that the model should be roughly equivalent to
 9 |     gpt_version_equivalent: float | None
10 |     # The minimum speed of the model in tokens per second
11 |     speed: float | None
12 |     # The minimum context length of the model in tokens
13 |     context_length: int | None
14 |     # The vendor of the model e.g. "OpenAI" or "Anthropic"
15 |     vendor: str | None
16 |     # Whether the model should be privacy compliant to be used for sensitive data
17 |     privacy_compliance: bool | None
18 |     # Whether the model should be self-hosted
19 |     self_hosted: bool | None
20 |     # Whether the model should support image recognition
21 |     image_recognition: bool | None
22 |     # Whether the model should support a JSON mode
23 |     json_mode: bool | None
24 | 
25 |     def __init__(
26 |         self,
27 |         input_cost: float | None = None,
28 |         output_cost: float | None = None,
29 |         gpt_version_equivalent: float | None = None,
30 |         speed: float | None = None,
31 |         context_length: int | None = None,
32 |         vendor: str | None = None,
33 |         privacy_compliance: bool | None = None,
34 |         self_hosted: bool | None = None,
35 |         image_recognition: bool | None = None,
36 |         json_mode: bool | None = None,
37 |     ) -> None:
38 |         self.input_cost = input_cost
39 |         self.output_cost = output_cost
40 |         self.gpt_version_equivalent = gpt_version_equivalent
41 |         self.speed = speed
42 |         self.context_length = context_length
43 |         self.vendor = vendor
44 |         self.privacy_compliance = privacy_compliance
45 |         self.self_hosted = self_hosted
46 |         self.image_recognition = image_recognition
47 |         self.json_mode = json_mode
48 | 


--------------------------------------------------------------------------------
/app/llm/request_handler/request_handler_interface.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | from typing import Optional, Sequence, Union, Dict, Any, Type, Callable
 3 | from langchain_core.tools import BaseTool
 4 | from pydantic import BaseModel
 5 | 
 6 | from .. import LanguageModel
 7 | from ...common.pyris_message import PyrisMessage
 8 | from ...domain.data.image_message_content_dto import ImageMessageContentDTO
 9 | from ...llm import CompletionArguments
10 | 
11 | 
12 | class RequestHandler(BaseModel, metaclass=ABCMeta):
13 |     """Interface for the request handlers"""
14 | 
15 |     @classmethod
16 |     def __subclasshook__(cls, subclass) -> bool:
17 |         return (
18 |             hasattr(subclass, "complete")
19 |             and callable(subclass.complete)
20 |             and hasattr(subclass, "chat")
21 |             and callable(subclass.chat)
22 |             and hasattr(subclass, "embed")
23 |             and callable(subclass.embed)
24 |         )
25 | 
26 |     @abstractmethod
27 |     def complete(
28 |         self,
29 |         prompt: str,
30 |         arguments: CompletionArguments,
31 |         image: Optional[ImageMessageContentDTO] = None,
32 |     ) -> str:
33 |         """Create a completion from the prompt"""
34 |         raise NotImplementedError
35 | 
36 |     @abstractmethod
37 |     def chat(
38 |         self,
39 |         messages: list[any],
40 |         arguments: CompletionArguments,
41 |         tools: Optional[
42 |             Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]]
43 |         ],
44 |     ) -> PyrisMessage:
45 |         """Create a completion from the chat messages"""
46 |         raise NotImplementedError
47 | 
48 |     @abstractmethod
49 |     def embed(self, text: str) -> list[float]:
50 |         """Create an embedding from the text"""
51 |         raise NotImplementedError
52 | 
53 |     @abstractmethod
54 |     def bind_tools(
55 |         self,
56 |         tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
57 |     ) -> LanguageModel:
58 |         """Bind tools"""
59 |         raise NotImplementedError
60 | 


--------------------------------------------------------------------------------
/app/vector_database/database.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import weaviate
 3 | 
 4 | from .faq_schema import init_faq_schema
 5 | from .lecture_schema import init_lecture_schema
 6 | from weaviate.classes.query import Filter
 7 | from app.config import settings
 8 | import threading
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | class VectorDatabase:
14 |     """
15 |     Class to interact with the Weaviate vector database
16 |     """
17 | 
18 |     _lock = threading.Lock()
19 |     _client_instance = None
20 | 
21 |     def __init__(self):
22 |         with VectorDatabase._lock:
23 |             if not VectorDatabase._client_instance:
24 |                 VectorDatabase._client_instance = weaviate.connect_to_local(
25 |                     host=settings.weaviate.host,
26 |                     port=settings.weaviate.port,
27 |                     grpc_port=settings.weaviate.grpc_port,
28 |                 )
29 |                 logger.info("Weaviate client initialized")
30 |         self.client = VectorDatabase._client_instance
31 |         self.lectures = init_lecture_schema(self.client)
32 |         self.faqs = init_faq_schema(self.client)
33 | 
34 |     def delete_collection(self, collection_name):
35 |         """
36 |         Delete a collection from the database
37 |         """
38 |         if self.client.collections.exists(collection_name):
39 |             if self.client.collections.delete(collection_name):
40 |                 logger.info(f"Collection {collection_name} deleted")
41 |             else:
42 |                 logger.error(f"Collection {collection_name} failed to delete")
43 | 
44 |     def delete_object(self, collection_name, property_name, object_property):
45 |         """
46 |         Delete an object from the collection inside the database
47 |         """
48 |         collection = self.client.collections.get(collection_name)
49 |         collection.data.delete_many(
50 |             where=Filter.by_property(property_name).equal(object_property)
51 |         )
52 | 
53 |     def get_client(self):
54 |         """
55 |         Get the Weaviate client
56 |         """
57 |         return self.client
58 | 


--------------------------------------------------------------------------------
/app/pipeline/prompts/faq_citation_prompt.txt:
--------------------------------------------------------------------------------
 1 | In the paragraphs below you are provided with an answer to a question. Underneath the answer you will find the faqs that the answer was based on.
 2 | Add citations of the faqs to the answer. Cite the faqs in brackets after the sentence where the information is used in the answer.
 3 | At the end of the answer, list each source with its corresponding number and provide the FAQ Question title, and a clickable link in this format: [1] <a href="URL">"FAQ Question title"</a>.
 4 | Do not include the actual faqs, only the citations at the end.
 5 | Please do not use the FAQ ID as the citation number, instead, use the order of the citations in the answer.
 6 | Only include the citations of the faqs that are relevant to the answer.
 7 | If the answer actually does not contain any information from the faqs, please do not include any citations and return '!NONE!'.
 8 | But if the answer contains information from the paragraphs, ALWAYS include citations.
 9 | 
10 | Here is an example how to rewrite the answer with citations (ONLY ADD CITATION IF THE PROVIDED FAQS ARE RELEVANT TO THE ANSWER):
11 | "
12 | Lorem ipsum dolor sit amet, consectetur adipiscing elit [1]. Ded do eiusmod tempor incididunt ut labore et dolore magna aliqua [2].
13 | 
14 | [1] <a href="http://localhost:9000/courses/1/faq?faqId=1">FAQ question title 1</a>.
15 | [2] <a href="http://localhost:9000/courses/1/faq?faqId=2">FAQ question title 2</a>.
16 | "
17 | 
18 | Note: If there is no link available, please do not include the link in the citation. For example, if citation 1 does not have a link, it should look like this:
19 | [1] "FAQ question title"
20 | but if citation 2 has a link, it should look like this:
21 | [2] <a href="URL">"FAQ question title"</a>
22 | 
23 | Here are the answer and the faqs:
24 | 
25 | Answer without citations:
26 | {Answer}
27 | 
28 | Faqs with their FAQ ID, CourseId, FAQ Question title and FAQ Question Answer and the Link to the FAQ:
29 | {Paragraphs}
30 | 
31 | Answer with citations (ensure empty line between the message and the citations):
32 | If the answer actually does not contain any information from the paragraphs, please do not include any citations and return '!NONE!'.
33 | 


--------------------------------------------------------------------------------
/app/web/routers/ingestion_status.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from urllib.parse import unquote
 3 | 
 4 | from fastapi import APIRouter, status, Response, Depends
 5 | from fastapi.params import Query
 6 | from weaviate.collections.classes.filters import Filter
 7 | 
 8 | from app.dependencies import TokenValidator
 9 | from ...vector_database.database import VectorDatabase
10 | from ...vector_database.lecture_schema import LectureSchema
11 | from enum import Enum
12 | 
13 | router = APIRouter(prefix="/api/v1", tags=["ingestion_status"])
14 | 
15 | 
16 | class IngestionState(str, Enum):
17 |     DONE = "DONE"
18 |     NOT_STARTED = "NOT_STARTED"
19 | 
20 | 
21 | @router.get(
22 |     "/courses/{course_id}/lectures/{lecture_id}/lectureUnits/{lecture_unit_id}/ingestion-state",
23 |     dependencies=[Depends(TokenValidator())],
24 | )
25 | def get_lecture_unit_ingestion_state(
26 |     course_id: int, lecture_id: int, lecture_unit_id: int, base_url: str = Query(...)
27 | ):
28 |     """
29 | 
30 |     :param course_id:
31 |     :param lecture_id:
32 |     :param lecture_unit_id:
33 |     :param base_url:
34 |     :return:
35 |     """
36 |     db = VectorDatabase()
37 |     decoded_base_url = unquote(base_url)
38 |     result = db.lectures.query.fetch_objects(
39 |         filters=(
40 |             Filter.by_property(LectureSchema.BASE_URL.value).equal(decoded_base_url)
41 |             & Filter.by_property(LectureSchema.COURSE_ID.value).equal(course_id)
42 |             & Filter.by_property(LectureSchema.LECTURE_ID.value).equal(lecture_id)
43 |             & Filter.by_property(LectureSchema.LECTURE_UNIT_ID.value).equal(
44 |                 lecture_unit_id
45 |             )
46 |         ),
47 |         limit=1,
48 |         return_properties=[LectureSchema.LECTURE_UNIT_NAME.value],
49 |     )
50 | 
51 |     if len(result.objects) > 0:
52 |         return Response(
53 |             status_code=status.HTTP_200_OK,
54 |             content=json.dumps({"state": IngestionState.DONE.value}),
55 |             media_type="application/json",
56 |         )
57 |     else:
58 |         return Response(
59 |             status_code=status.HTTP_200_OK,
60 |             content=json.dumps({"state": IngestionState.NOT_STARTED.value}),
61 |             media_type="application/json",
62 |         )
63 | 


--------------------------------------------------------------------------------
/app/pipeline/prompts/citation_prompt.txt:
--------------------------------------------------------------------------------
 1 | In the paragraphs below you are provided with an answer to a question. Underneath the answer you will find the paragraphs that the answer was based on.
 2 | Add citations of the paragraphs to the answer. Cite the paragraphs in brackets after the sentence where the information is used in the answer.
 3 | At the end of the answer, list each source with its corresponding number and provide the Lecture Title, page number, and a clickable link in this format: [1] <a href="URL">"Lecture title", "Lecture unit title", "page number"</a>.
 4 | If the answer uses multiple pages from the same lecture, list the page numbers at the same line separated by commas in this format :  [1] <a href="URL">"Lecture title", "Lecture unit title", "page number1,number2,number3"</a>.
 5 | Do not include the actual paragraphs, only the citations at the end.
 6 | Only include the citations of the paragraphs that are relevant to the answer.
 7 | If the answer actually does not contain any information from the paragraphs, please do not include any citations and return '!NONE!'.
 8 | But if the answer contains information from the paragraphs, ALWAYS include citations.
 9 | 
10 | Here is an example how to rewrite the answer with citations (ONLY ADD CITATION IF THE PROVIDED PARAGRAPHS ARE RELEVANT TO THE ANSWER):
11 | "
12 | Lorem ipsum dolor sit amet, consectetur adipiscing elit [1]. Ded do eiusmod tempor incididunt ut labore et dolore magna aliqua [2].
13 | 
14 | [1] <a href="https://example.com/lecture1">Lecture 1, Unit A, page 2,3,4</a>.
15 | [2] <a href="https://example.com/lecture2">Lecture 2, Unit B, page 5,25</a>.
16 | "
17 | 
18 | Note: If there is no link available, please do not include the link in the citation. For example, if citation 1 does not have a link, it should look like this:
19 | [1] "Lecture title", "Lecture unit title", "page number"
20 | but if citation 2 has a link, it should look like this:
21 | [2] <a href="URL">"Lecture title", "Lecture unit title", "page number"</a>
22 | 
23 | Here are the answer and the paragraphs:
24 | 
25 | Answer without citations:
26 | {Answer}
27 | 
28 | Paragraphs with their Lecture Names, Unit Names, Links and Page Numbers:
29 | {Paragraphs}
30 | 
31 | If the answer actually does not contain any information from the paragraphs, please do not include any citations and return '!NONE!'.
32 | Original answer with citations (ensure two empty lines between the message and the citations):
33 | 


--------------------------------------------------------------------------------
/app/llm/external/openai_embeddings.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Literal, Any
 3 | from openai import (
 4 |     OpenAI,
 5 |     APIError,
 6 |     APITimeoutError,
 7 |     RateLimitError,
 8 |     InternalServerError,
 9 | )
10 | from openai.lib.azure import AzureOpenAI
11 | 
12 | from ...llm.external.model import EmbeddingModel
13 | import time
14 | 
15 | 
16 | class OpenAIEmbeddingModel(EmbeddingModel):
17 |     model: str
18 |     api_key: str
19 |     _client: OpenAI
20 | 
21 |     def embed(self, text: str) -> list[float]:
22 |         retries = 5
23 |         backoff_factor = 2
24 |         initial_delay = 1
25 |         # Maximum wait time: 1 + 2 + 4 + 8 + 16 = 31 seconds
26 | 
27 |         for attempt in range(retries):
28 |             try:
29 |                 response = self._client.embeddings.create(
30 |                     model=self.model,
31 |                     input=text,
32 |                     encoding_format="float",
33 |                 )
34 |                 return response.data[0].embedding
35 |             except (
36 |                 APIError,
37 |                 APITimeoutError,
38 |                 RateLimitError,
39 |                 InternalServerError,
40 |             ):
41 |                 wait_time = initial_delay * (backoff_factor**attempt)
42 |                 logging.exception(f"OpenAI error on attempt {attempt + 1}")
43 |                 logging.info(f"Retrying in {wait_time} seconds...")
44 |                 time.sleep(wait_time)
45 |         raise Exception(f"Failed to get embedding from OpenAI after {retries} retries.")
46 | 
47 | 
48 | class DirectOpenAIEmbeddingModel(OpenAIEmbeddingModel):
49 |     type: Literal["openai_embedding"]
50 | 
51 |     def model_post_init(self, __context: Any) -> None:
52 |         self._client = OpenAI(api_key=self.api_key)
53 | 
54 |     def __str__(self):
55 |         return f"OpenAIEmbedding('{self.model}')"
56 | 
57 | 
58 | class AzureOpenAIEmbeddingModel(OpenAIEmbeddingModel):
59 |     type: Literal["azure_embedding"]
60 |     endpoint: str
61 |     azure_deployment: str
62 |     api_version: str
63 | 
64 |     def model_post_init(self, __context: Any) -> None:
65 |         self._client = AzureOpenAI(
66 |             azure_endpoint=self.endpoint,
67 |             azure_deployment=self.azure_deployment,
68 |             api_version=self.api_version,
69 |             api_key=self.api_key,
70 |         )
71 | 
72 |     def __str__(self):
73 |         return f"AzureEmbedding('{self.model}')"
74 | 


--------------------------------------------------------------------------------
/app/llm/request_handler/basic_request_handler.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Sequence, Union, Dict, Any, Type, Callable
 2 | 
 3 | from langchain_core.tools import BaseTool
 4 | from pydantic import ConfigDict
 5 | from pydantic import BaseModel
 6 | 
 7 | from app.common.pyris_message import PyrisMessage
 8 | from app.domain.data.image_message_content_dto import ImageMessageContentDTO
 9 | from app.llm import LanguageModel
10 | from app.llm.request_handler import RequestHandler
11 | from app.llm.completion_arguments import CompletionArguments
12 | from app.llm.llm_manager import LlmManager
13 | 
14 | 
15 | class BasicRequestHandler(RequestHandler):
16 |     model_id: str
17 |     llm_manager: LlmManager | None = None
18 |     model_config = ConfigDict(arbitrary_types_allowed=True)
19 | 
20 |     def __init__(self, model_id: str):
21 |         super().__init__(model_id=model_id, llm_manager=None)
22 |         self.model_id = model_id
23 |         self.llm_manager = LlmManager()
24 | 
25 |     def complete(
26 |         self,
27 |         prompt: str,
28 |         arguments: CompletionArguments,
29 |         image: Optional[ImageMessageContentDTO] = None,
30 |     ) -> str:
31 |         llm = self.llm_manager.get_llm_by_id(self.model_id)
32 |         return llm.complete(prompt, arguments, image)
33 | 
34 |     def chat(
35 |         self,
36 |         messages: list[PyrisMessage],
37 |         arguments: CompletionArguments,
38 |         tools: Optional[
39 |             Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]]
40 |         ],
41 |     ) -> PyrisMessage:
42 |         llm = self.llm_manager.get_llm_by_id(self.model_id)
43 |         return llm.chat(messages, arguments, tools)
44 | 
45 |     def embed(self, text: str) -> list[float]:
46 |         llm = self.llm_manager.get_llm_by_id(self.model_id)
47 |         return llm.embed(text)
48 | 
49 |     def bind_tools(
50 |         self,
51 |         tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
52 |     ) -> LanguageModel:
53 |         """
54 |         Binds a sequence of tools to the language model.
55 | 
56 |         Args:
57 |             tools (Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]]):
58 |             A sequence of tools to be bound.
59 | 
60 |         Returns:
61 |             LanguageModel: The language model with tools bound.
62 |         """
63 |         llm = self.llm_manager.get_llm_by_id(self.model_id)
64 |         llm.bind_tools(tools)
65 |         return llm
66 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     paths-ignore:
 6 |     - 'README.md'
 7 |     - 'LICENSE'
 8 |     - '.github/**'
 9 |     - '!.github/workflows/build.yml'
10 |     - '!.github/workflows/deploy.yml'
11 |     - '!.github/workflows/deploy-test.yml'
12 |   push:
13 |     branches:
14 |     - main
15 |     tags: '[0-9]+.[0-9]+.[0-9]+'
16 |     paths-ignore:
17 |     - 'README.md'
18 |     - 'LICENSE'
19 |     - '.github/**'
20 |     - '!.github/workflows/build.yml'
21 |     - '!.github/workflows/deploy.yml'
22 |   release:
23 |     types:
24 |     - created
25 | 
26 | jobs:
27 |   docker:
28 |     name: Build and Push Docker Image
29 |     if: ${{ github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == 'ls1intum/Pyris' }}
30 |     runs-on: ubuntu-latest
31 |     steps:
32 |     - name: Compute Tag
33 |       uses: actions/github-script@v7
34 |       id: compute-tag
35 |       with:
36 |         result-encoding: string
37 |         script: |
38 |           if (context.eventName === "pull_request") {
39 |             return "pr-" + context.issue.number;
40 |           }
41 |           if (context.eventName === "release") {
42 |             return "latest";
43 |           }
44 |           if (context.eventName === "push") {
45 |             if (context.ref.startsWith("refs/tags/")) {
46 |               return context.ref.slice(10);
47 |             }
48 |             if (context.ref === "refs/heads/main") {
49 |               return "latest";
50 |             }
51 |           }
52 |           return "FALSE";
53 |     - uses: actions/checkout@v3
54 |     - name: Set up QEMU
55 |       uses: docker/setup-qemu-action@v3
56 |     - name: Set up Docker Buildx
57 |       uses: docker/setup-buildx-action@v2
58 |     # Build and Push to GitHub Container Registry
59 |     - name: Login to GitHub Container Registry
60 |       uses: docker/login-action@v2
61 |       if: ${{ steps.compute-tag.outputs.result != 'FALSE' }}
62 |       with:
63 |         registry: ghcr.io
64 |         username: ${{ github.repository_owner }}
65 |         password: ${{ secrets.GITHUB_TOKEN }}
66 |     - name: Build and Push to GitHub Container Registry
67 |       uses: docker/build-push-action@v6
68 |       if: ${{ steps.compute-tag.outputs.result != 'FALSE' }}
69 |       with:
70 |         platforms: amd64, arm64
71 |         file: ./Dockerfile
72 |         context: .
73 |         tags: ghcr.io/ls1intum/pyris:${{ steps.compute-tag.outputs.result }}
74 |         push: true
75 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy
 2 | 
 3 | on:
 4 |   workflow_call:
 5 |     inputs:
 6 |       docker-tag:
 7 |         required: true
 8 |         type: string
 9 |       branch-name:
10 |         required: true
11 |         type: string
12 |       environment-name:
13 |         required: true
14 |         type: string
15 |       environment-url:
16 |         required: true
17 |         type: string
18 |     secrets:
19 |       DEPLOYMENT_GATEWAY_SSH_KEY:
20 |         required: true
21 | 
22 | concurrency: deploy
23 | 
24 | env:
25 |   RAW_URL: https://raw.githubusercontent.com/${{ github.repository }}/${{ github.sha }}
26 | 
27 | 
28 | jobs:
29 |   deploy:
30 |     runs-on: ubuntu-latest
31 | 
32 |     environment:
33 |       name: ${{ inputs.environment-name }}
34 |       url: ${{ inputs.environment-url }}
35 | 
36 |     env:
37 |       DOCKER_TAG: ${{ inputs.docker-tag }}
38 |       BRANCH_NAME: ${{ inputs.branch-name }}
39 |       DEPLOYMENT_USER: ${{ vars.DEPLOYMENT_USER }}
40 |       DEPLOYMENT_HOST: ${{ vars.DEPLOYMENT_HOST }}
41 |       DEPLOYMENT_FOLDER: ${{ vars.DEPLOYMENT_FOLDER }}
42 |       DEPLOYMENT_HOST_PUBLIC_KEYS: ${{ vars.DEPLOYMENT_HOST_PUBLIC_KEYS }}
43 |       GATEWAY_USER: "jump"
44 |       GATEWAY_HOST: "gateway.artemis.in.tum.de:2010"
45 |       GATEWAY_HOST_PUBLIC_KEY: "[gateway.artemis.in.tum.de]:2010 ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKtTLiKRILjKZ+Qg4ReWKsG7mLDXkzHfeY5nalSQUNQ4"
46 | 
47 |     steps:
48 |       # Download pyris-server-cli from GH without cloning the Repo
49 |       - name: Fetch Pyris CLI
50 |         run: |
51 |           wget ${{ env.RAW_URL }}/pyris-server-cli
52 |           chmod +x pyris-server-cli
53 | 
54 |       # Configure SSH Key
55 |       - name: Setup SSH Keys and known_hosts
56 |         env:
57 |           SSH_AUTH_SOCK: /tmp/ssh_agent.sock
58 |           GATEWAY_SSH_KEY: "${{ secrets.DEPLOYMENT_GATEWAY_SSH_KEY }}"
59 |           DEPLOYMENT_SSH_KEY: "${{ secrets.DEPLOYMENT_SSH_KEY }}"
60 |         run: |
61 |           mkdir -p ~/.ssh
62 |           ssh-agent -a $SSH_AUTH_SOCK > /dev/null
63 |           ssh-add - <<< $GATEWAY_SSH_KEY
64 |           ssh-add - <<< $DEPLOYMENT_SSH_KEY
65 |           cat - <<< $GATEWAY_HOST_PUBLIC_KEY >> ~/.ssh/known_hosts
66 |           cat - <<< $(sed 's/\\n/\n/g' <<< "$DEPLOYMENT_HOST_PUBLIC_KEYS") >> ~/.ssh/known_hosts
67 | 
68 |       - name: Deploy Pyris with Docker
69 |         env:
70 |           SSH_AUTH_SOCK: /tmp/ssh_agent.sock
71 |         run: |
72 |           ./pyris-server-cli docker-deploy "$DEPLOYMENT_USER@$DEPLOYMENT_HOST" -g "$GATEWAY_USER@$GATEWAY_HOST" -t $DOCKER_TAG -b $BRANCH_NAME -d $DEPLOYMENT_FOLDER -y
73 | 


--------------------------------------------------------------------------------
/app/pipeline/shared/summary_pipeline.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | from langchain_core.output_parsers import StrOutputParser
 5 | from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
 6 | from langchain_core.runnables import Runnable
 7 | 
 8 | from ...llm import CapabilityRequestHandler, RequirementList
 9 | from ...llm.langchain import IrisLangchainCompletionModel
10 | from ...pipeline import Pipeline
11 | 
12 | logger = logging.getLogger(__name__)
13 | 
14 | 
15 | class SummaryPipeline(Pipeline):
16 |     """A generic summary pipeline that can be used to summarize any text"""
17 | 
18 |     llm: IrisLangchainCompletionModel
19 |     pipeline: Runnable
20 |     prompt_str: str
21 |     prompt: ChatPromptTemplate
22 | 
23 |     def __init__(self):
24 |         super().__init__(implementation_id="summary_pipeline")
25 |         # Set the langchain chat model
26 |         request_handler = CapabilityRequestHandler(
27 |             requirements=RequirementList(
28 |                 gpt_version_equivalent=3.5,
29 |                 context_length=4096,
30 |             )
31 |         )
32 |         self.llm = IrisLangchainCompletionModel(
33 |             request_handler=request_handler, max_tokens=1000
34 |         )
35 |         # Load the prompt from a file
36 |         dirname = os.path.dirname(__file__)
37 |         with open(os.path.join(dirname, "../prompts/summary_prompt.txt"), "r") as file:
38 |             logger.info("Loading summary prompt...")
39 |             self.prompt_str = file.read()
40 |         # Create the prompt
41 |         self.prompt = ChatPromptTemplate.from_messages(
42 |             [
43 |                 SystemMessagePromptTemplate.from_template(self.prompt_str),
44 |             ]
45 |         )
46 |         # Create the pipeline
47 |         self.pipeline = self.prompt | self.llm | StrOutputParser()
48 |         self.tokens = []
49 | 
50 |     def __repr__(self):
51 |         return f"{self.__class__.__name__}(llm={self.llm})"
52 | 
53 |     def __str__(self):
54 |         return f"{self.__class__.__name__}(llm={self.llm})"
55 | 
56 |     def __call__(self, query: str, **kwargs) -> str:
57 |         """
58 |         Runs the pipeline
59 |             :param query: The query
60 |             :param kwargs: keyword arguments
61 |             :return: summary text as string
62 |         """
63 |         if query is None:
64 |             raise ValueError("Query must not be None")
65 |         logger.info("Running summary pipeline...")
66 |         response: str = self.pipeline.invoke({"text": query})
67 |         logger.info(f"Response from summary pipeline: {response[:20]}...")
68 |         return response
69 | 


--------------------------------------------------------------------------------
/app/retrieval/faq_retrieval.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import List
 3 | from langsmith import traceable
 4 | from weaviate import WeaviateClient
 5 | from app.common.PipelineEnum import PipelineEnum
 6 | from .basic_retrieval import BaseRetrieval, merge_retrieved_chunks
 7 | from ..common.pyris_message import PyrisMessage
 8 | from ..pipeline.prompts.faq_retrieval_prompts import (
 9 |     faq_retriever_initial_prompt,
10 |     write_hypothetical_answer_prompt,
11 | )
12 | from ..pipeline.prompts.lecture_retrieval_prompts import (
13 |     rewrite_student_query_prompt,
14 | )
15 | from ..vector_database.faq_schema import FaqSchema, init_faq_schema
16 | 
17 | logger = logging.getLogger(__name__)
18 | 
19 | 
20 | class FaqRetrieval(BaseRetrieval):
21 |     def __init__(self, client: WeaviateClient, **kwargs):
22 |         super().__init__(
23 |             client, init_faq_schema, implementation_id="faq_retrieval_pipeline"
24 |         )
25 | 
26 |     def get_schema_properties(self) -> List[str]:
27 |         return [
28 |             FaqSchema.COURSE_ID.value,
29 |             FaqSchema.FAQ_ID.value,
30 |             FaqSchema.QUESTION_TITLE.value,
31 |             FaqSchema.QUESTION_ANSWER.value,
32 |         ]
33 | 
34 |     @traceable(name="Full Faq Retrieval")
35 |     def __call__(
36 |         self,
37 |         chat_history: list[PyrisMessage],
38 |         student_query: str,
39 |         result_limit: int,
40 |         course_name: str = None,
41 |         course_id: int = None,
42 |         problem_statement: str = None,
43 |         exercise_title: str = None,
44 |         base_url: str = None,
45 |     ) -> List[dict]:
46 |         course_language = self.fetch_course_language(course_id)
47 | 
48 |         response, response_hyde = self.run_parallel_rewrite_tasks(
49 |             chat_history=chat_history,
50 |             student_query=student_query,
51 |             result_limit=result_limit,
52 |             course_language=course_language,
53 |             initial_prompt=faq_retriever_initial_prompt,
54 |             rewrite_prompt=rewrite_student_query_prompt,
55 |             hypothetical_answer_prompt=write_hypothetical_answer_prompt,
56 |             pipeline_enum=PipelineEnum.IRIS_FAQ_RETRIEVAL_PIPELINE,
57 |             course_name=course_name,
58 |             course_id=course_id,
59 |         )
60 | 
61 |         basic_retrieved_faqs: list[dict[str, dict]] = [
62 |             {"id": obj.uuid.int, "properties": obj.properties}
63 |             for obj in response.objects
64 |         ]
65 |         hyde_retrieved_faqs: list[dict[str, dict]] = [
66 |             {"id": obj.uuid.int, "properties": obj.properties}
67 |             for obj in response_hyde.objects
68 |         ]
69 |         return merge_retrieved_chunks(basic_retrieved_faqs, hyde_retrieved_faqs)
70 | 


--------------------------------------------------------------------------------
/app/main.py:
--------------------------------------------------------------------------------
 1 | from fastapi.responses import ORJSONResponse
 2 | from starlette.background import BackgroundTask
 3 | from starlette.responses import Response
 4 | 
 5 | from app.config import settings
 6 | import app.sentry as sentry
 7 | from app.web.routers.health import router as health_router
 8 | from app.web.routers.pipelines import router as pipelines_router
 9 | from app.web.routers.webhooks import router as webhooks_router
10 | from app.web.routers.ingestion_status import router as ingestion_status_router
11 | 
12 | import logging
13 | from fastapi import FastAPI, Request, status
14 | from fastapi.exceptions import RequestValidationError
15 | from fastapi.responses import JSONResponse
16 | 
17 | settings.set_env_vars()
18 | 
19 | sentry.init()
20 | 
21 | app = FastAPI(default_response_class=ORJSONResponse)
22 | 
23 | 
24 | def custom_openapi():
25 |     if not app.openapi_schema:
26 |         openapi_schema = FastAPI.openapi(app)
27 |         # Add security scheme
28 |         openapi_schema["components"]["securitySchemes"] = {
29 |             "bearerAuth": {"type": "apiKey", "in": "header", "name": "Authorization"}
30 |         }
31 |         # Apply the security globally
32 |         for path in openapi_schema["paths"].values():
33 |             for method in path.values():
34 |                 method.setdefault("security", []).append({"bearerAuth": []})
35 |         app.openapi_schema = openapi_schema
36 |     return app.openapi_schema
37 | 
38 | 
39 | app.openapi = custom_openapi
40 | 
41 | 
42 | @app.exception_handler(RequestValidationError)
43 | async def validation_exception_handler(request: Request, exc: RequestValidationError):
44 |     exc_str = f"{exc}".replace("\n", " ").replace("   ", " ")
45 |     logging.error(f"{request}: {exc_str}")
46 |     content = {"status_code": 10422, "message": exc_str, "data": None}
47 |     return JSONResponse(
48 |         content=content, status_code=status.HTTP_422_UNPROCESSABLE_ENTITY
49 |     )
50 | 
51 | 
52 | def log_info(req_body, res_body):
53 |     logging.info(req_body)
54 |     logging.info(res_body)
55 | 
56 | 
57 | @app.middleware("http")
58 | async def some_middleware(request: Request, call_next):
59 |     req_body = await request.body()
60 |     response = await call_next(request)
61 | 
62 |     res_body = b""
63 |     async for chunk in response.body_iterator:
64 |         res_body += chunk
65 | 
66 |     task = BackgroundTask(log_info, req_body, res_body)
67 |     return Response(
68 |         content=res_body,
69 |         status_code=response.status_code,
70 |         headers=dict(response.headers),
71 |         media_type=response.media_type,
72 |         background=task,
73 |     )
74 | 
75 | 
76 | app.include_router(health_router)
77 | app.include_router(pipelines_router)
78 | app.include_router(webhooks_router)
79 | app.include_router(ingestion_status_router)
80 | 


--------------------------------------------------------------------------------
/app/pipeline/prompts/inconsistency_check_prompts.py:
--------------------------------------------------------------------------------
 1 | solver_prompt = """\
 2 | <Instruction>
 3 | You are a detail-oriented expert instructor at an Ivy League university ensuring the quality of programming exercises. \
 4 | Your task is to find consistency issues as part of the exercise creation process to make sure that the exercise is \
 5 | without any errors or inconsistencies that might confuse students. Your teaching assistants will use your feedback to \
 6 | improve the exercise.
 7 | 
 8 | Parts of a programming exercise:
 9 |  - Problem statement: The description of the exercise containing tasks that the student needs to solve.
10 |  - Template repository: The starting point from which the student will start solving the exercise.
11 |  - Solution repository: The sample solution set by the instructor to compare the student's solution against.
12 | 
13 | To not overburden you, you will be provided with the problem statement and one of the template plus solution files \
14 | at a time. You need to compare the problem statement with the template file and identify any consistency issues.
15 | </Instruction>
16 | 
17 | <ProblemStatement>
18 | {problem_statement}
19 | </ProblemStatement>
20 | 
21 | <TemplateFile path='{file_path}'>
22 | {template_file}
23 | </TemplateFile>
24 | 
25 | <SolutionFile path='{file_path}'>
26 | {solution_file}
27 | </SolutionFile>
28 | 
29 | <Response>
30 | Respond with any potential consistency issues found in the exercise formatted in markdown. \
31 | Just provide the easily digestible formatted markdown without other explanations. It is fine to provide no issues if \
32 | you are confident that the files are consistent.
33 | </Response>
34 | """
35 | 
36 | prettify_prompt = """\
37 | <Instruction>
38 | You are a detail-oriented expert instructor at an Ivy League university ensuring the quality of programming exercises. \
39 | Your task is to find consistency issues as part of the exercise creation process to make sure that the exercise is \
40 | without any errors or inconsistencies that might confuse students.
41 | In a previous step you already found potential consistency issues as part of the exercise creation process on a file \
42 | level. Now, you need to summarize the issues found in the exercise so the teaching assistants can fix them.
43 | 
44 | Parts of a programming exercise:
45 |  - Problem statement: The description of the exercise containing tasks that the student needs to solve.
46 |  - Template repository: The starting point from which the student will start solving the exercise.
47 |  - Solution repository: The sample solution set by the instructor to compare the student's solution against.
48 | </Instruction>
49 | 
50 | <ProblemStatement>
51 | {problem_statement}
52 | </ProblemStatement>
53 | 
54 | <ConsistencyIssues>
55 | {consistency_issues}
56 | </ConsistencyIssues>
57 | 
58 | <Response>
59 | Respond with a summary of the consistency issues found in the exercise, stay specific and clear so the issues can be \
60 | easily fixed by the teaching assistants. Make it clear which file path contains the issues. Just provide the easily \
61 | digestible formatted markdown without other explanations.
62 | </Response>
63 | """
64 | 


--------------------------------------------------------------------------------
/app/pipeline/rewriting_pipeline.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Literal, Optional
 3 | 
 4 | from langchain.output_parsers import PydanticOutputParser
 5 | from langchain_core.prompts import (
 6 |     ChatPromptTemplate,
 7 | )
 8 | 
 9 | from app.common.PipelineEnum import PipelineEnum
10 | from app.common.pyris_message import PyrisMessage, IrisMessageRole
11 | from app.domain.data.text_message_content_dto import TextMessageContentDTO
12 | from app.domain.rewriting_pipeline_execution_dto import RewritingPipelineExecutionDTO
13 | from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments
14 | from app.pipeline import Pipeline
15 | from app.pipeline.prompts.rewriting_prompts import (
16 |     system_prompt_faq,
17 |     system_prompt_problem_statement,
18 | )
19 | from app.web.status.status_update import RewritingCallback
20 | 
21 | logger = logging.getLogger(__name__)
22 | 
23 | 
24 | class RewritingPipeline(Pipeline):
25 |     callback: RewritingCallback
26 |     request_handler: CapabilityRequestHandler
27 |     output_parser: PydanticOutputParser
28 |     variant: Literal["faq", "problem_statement"]
29 | 
30 |     def __init__(
31 |         self, callback: RewritingCallback, variant: Literal["faq", "problem_statement"]
32 |     ):
33 |         super().__init__(implementation_id="rewriting_pipeline_reference_impl")
34 |         self.callback = callback
35 |         self.request_handler = CapabilityRequestHandler(
36 |             requirements=RequirementList(
37 |                 gpt_version_equivalent=4.5,
38 |                 context_length=16385,
39 |             )
40 |         )
41 |         self.tokens = []
42 |         self.variant = variant
43 | 
44 |     def __call__(
45 |         self,
46 |         dto: RewritingPipelineExecutionDTO,
47 |         prompt: Optional[ChatPromptTemplate] = None,
48 |         **kwargs,
49 |     ):
50 |         if not dto.to_be_rewritten:
51 |             raise ValueError("You need to provide a text to rewrite")
52 | 
53 |         variant_prompts = {
54 |             "faq": system_prompt_faq,
55 |             "problem_statement": system_prompt_problem_statement,
56 |         }
57 |         print(variant_prompts[self.variant])
58 |         prompt = variant_prompts[self.variant].format(
59 |             rewritten_text=dto.to_be_rewritten,
60 |         )
61 |         prompt = PyrisMessage(
62 |             sender=IrisMessageRole.SYSTEM,
63 |             contents=[TextMessageContentDTO(text_content=prompt)],
64 |         )
65 | 
66 |         response = self.request_handler.chat(
67 |             [prompt], CompletionArguments(temperature=0.4), tools=None
68 |         )
69 |         self._append_tokens(response.token_usage, PipelineEnum.IRIS_REWRITING_PIPELINE)
70 |         response = response.contents[0].text_content
71 | 
72 |         # remove ``` from start and end if exists
73 |         if response.startswith("```") and response.endswith("```"):
74 |             response = response[3:-3]
75 |             if response.startswith("markdown"):
76 |                 response = response[8:]
77 |             response = response.strip()
78 | 
79 |         final_result = response
80 |         self.callback.done(final_result=final_result, tokens=self.tokens)
81 | 


--------------------------------------------------------------------------------
/app/llm/capability/capability_checker.py:
--------------------------------------------------------------------------------
 1 | from .capability_list import (
 2 |     CapabilityList,
 3 |     capability_weights,
 4 |     always_considered_capabilities_with_default,
 5 | )
 6 | from .requirement_list import RequirementList
 7 | 
 8 | 
 9 | def capabilities_fulfill_requirements(
10 |     capability: CapabilityList, requirements: RequirementList
11 | ) -> bool:
12 |     """Check if the capability fulfills the requirements"""
13 |     return all(
14 |         getattr(capability, field).matches(getattr(requirements, field))
15 |         for field in requirements.__dict__.keys()
16 |         if getattr(requirements, field) is not None
17 |     )
18 | 
19 | 
20 | def calculate_capability_scores(
21 |     capabilities: list[CapabilityList],
22 |     requirements: RequirementList,
23 |     invert_cost: bool = False,
24 | ) -> list[int]:
25 |     """Calculate the scores of the capabilities against the requirements"""
26 |     all_scores = []
27 | 
28 |     for requirement in requirements.__dict__.keys():
29 |         requirement_value = getattr(requirements, requirement)
30 |         if (
31 |             requirement_value is None
32 |             and requirement not in always_considered_capabilities_with_default
33 |         ):
34 |             continue
35 | 
36 |         # Calculate the scores for each capability
37 |         scores = []
38 |         for capability in capabilities:
39 |             if (
40 |                 requirement_value is None
41 |                 and requirement in always_considered_capabilities_with_default
42 |             ):
43 |                 # If the requirement is not set, use the default value if necessary
44 |                 score = getattr(capability, requirement).matches(
45 |                     always_considered_capabilities_with_default[requirement]
46 |                 )
47 |             else:
48 |                 score = getattr(capability, requirement).matches(requirement_value)
49 |             # Invert the cost if required
50 |             # The cost is a special case, as depending on how you want to use the scores
51 |             # the cost needs to be considered differently
52 |             if (
53 |                 requirement in ["input_cost", "output_cost"]
54 |                 and invert_cost
55 |                 and score != 0
56 |             ):
57 |                 score = 1 / score
58 |             scores.append(score)
59 | 
60 |         # Normalize the scores between 0 and 1 and multiply by the weight modifier
61 |         # The normalization here is based on the position of the score in the sorted list to balance out
62 |         # the different ranges of the capabilities
63 |         sorted_scores = sorted(set(scores))
64 |         weight_modifier = capability_weights[requirement]
65 |         normalized_scores = [
66 |             ((sorted_scores.index(score) + 1) / len(sorted_scores)) * weight_modifier
67 |             for score in scores
68 |         ]
69 |         all_scores.append(normalized_scores)
70 | 
71 |     final_scores = []
72 | 
73 |     # Sum up the scores for each capability to get the final score for each list of capabilities
74 |     for i in range(len(all_scores[0])):
75 |         score = 0
76 |         for j in range(len(all_scores)):
77 |             score += all_scores[j][i]
78 |         final_scores.append(score)
79 | 
80 |     return final_scores
81 | 


--------------------------------------------------------------------------------
/app/llm/external/model.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | from typing import Sequence, Union, Dict, Any, Type, Callable
 3 | 
 4 | from black import Optional
 5 | from langchain_core.tools import BaseTool
 6 | from openai.types.chat import ChatCompletionMessage
 7 | from pydantic import BaseModel
 8 | 
 9 | from ...common.pyris_message import PyrisMessage
10 | from ...llm import CompletionArguments
11 | from ...llm.capability import CapabilityList
12 | 
13 | 
14 | class LanguageModel(BaseModel, metaclass=ABCMeta):
15 |     """Abstract class for the llm wrappers"""
16 | 
17 |     id: str
18 |     name: str
19 |     description: str
20 |     capabilities: CapabilityList
21 | 
22 | 
23 | class CompletionModel(LanguageModel, metaclass=ABCMeta):
24 |     """Abstract class for the llm completion wrappers"""
25 | 
26 |     @classmethod
27 |     def __subclasshook__(cls, subclass) -> bool:
28 |         return hasattr(subclass, "complete") and callable(subclass.complete)
29 | 
30 |     @abstractmethod
31 |     def complete(self, prompt: str, arguments: CompletionArguments) -> str:
32 |         """Create a completion from the prompt"""
33 |         raise NotImplementedError(
34 |             f"The LLM {self.__str__()} does not support completion"
35 |         )
36 | 
37 | 
38 | class ChatModel(LanguageModel, metaclass=ABCMeta):
39 |     """Abstract class for the llm chat completion wrappers"""
40 | 
41 |     @classmethod
42 |     def __subclasshook__(cls, subclass) -> bool:
43 |         return hasattr(subclass, "chat") and callable(subclass.chat)
44 | 
45 |     @abstractmethod
46 |     def chat(
47 |         self,
48 |         messages: list[PyrisMessage],
49 |         arguments: CompletionArguments,
50 |         tools: Optional[
51 |             Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]]
52 |         ],
53 |     ) -> ChatCompletionMessage:
54 |         """Create a completion from the chat messages"""
55 |         raise NotImplementedError(
56 |             f"The LLM {self.__str__()} does not support chat completion"
57 |         )
58 | 
59 | 
60 | class EmbeddingModel(LanguageModel, metaclass=ABCMeta):
61 |     """Abstract class for the llm embedding wrappers"""
62 | 
63 |     @classmethod
64 |     def __subclasshook__(cls, subclass) -> bool:
65 |         return hasattr(subclass, "embed") and callable(subclass.embed)
66 | 
67 |     @abstractmethod
68 |     def embed(self, text: str) -> list[float]:
69 |         """Create an embedding from the text"""
70 |         raise NotImplementedError(
71 |             f"The LLM {self.__str__()} does not support embeddings"
72 |         )
73 | 
74 | 
75 | class ImageGenerationModel(LanguageModel, metaclass=ABCMeta):
76 |     """Abstract class for the llm image generation wrappers"""
77 | 
78 |     @classmethod
79 |     def __subclasshook__(cls, subclass):
80 |         return hasattr(subclass, "generate_images") and callable(
81 |             subclass.generate_images
82 |         )
83 | 
84 |     @abstractmethod
85 |     def generate_images(
86 |         self,
87 |         prompt: str,
88 |         n: int = 1,
89 |         size: str = "256x256",
90 |         quality: str = "standard",
91 |         **kwargs,
92 |     ) -> list:
93 |         """Create an image from the prompt"""
94 |         raise NotImplementedError(
95 |             f"The LLM {self.__str__()} does not support image generation"
96 |         )
97 | 


--------------------------------------------------------------------------------
/app/vector_database/faq_schema.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | from weaviate.classes.config import Property
 4 | from weaviate import WeaviateClient
 5 | from weaviate.collections import Collection
 6 | from weaviate.collections.classes.config import Configure, VectorDistances, DataType
 7 | 
 8 | 
 9 | class FaqSchema(Enum):
10 |     """
11 |     Schema for the faqs
12 |     """
13 | 
14 |     COLLECTION_NAME = "Faqs"
15 |     COURSE_NAME = "course_name"
16 |     COURSE_DESCRIPTION = "course_description"
17 |     COURSE_LANGUAGE = "course_language"
18 |     COURSE_ID = "course_id"
19 |     FAQ_ID = "faq_id"
20 |     QUESTION_TITLE = "question_title"
21 |     QUESTION_ANSWER = "question_answer"
22 | 
23 | 
24 | def init_faq_schema(client: WeaviateClient) -> Collection:
25 |     """
26 |     Initialize the schema for the faqs
27 |     """
28 |     if client.collections.exists(FaqSchema.COLLECTION_NAME.value):
29 |         collection = client.collections.get(FaqSchema.COLLECTION_NAME.value)
30 |         # Check and add 'course_language' property if missing
31 |         if not any(
32 |             property.name == FaqSchema.COURSE_LANGUAGE.value
33 |             for property in collection.config.get(simple=False).properties
34 |         ):
35 |             collection.config.add_property(
36 |                 Property(
37 |                     name=FaqSchema.COURSE_LANGUAGE.value,
38 |                     description="The language of the COURSE",
39 |                     data_type=DataType.TEXT,
40 |                     index_searchable=False,
41 |                 )
42 |             )
43 |         return collection
44 | 
45 |     return client.collections.create(
46 |         name=FaqSchema.COLLECTION_NAME.value,
47 |         vectorizer_config=Configure.Vectorizer.none(),
48 |         vector_index_config=Configure.VectorIndex.hnsw(
49 |             distance_metric=VectorDistances.COSINE
50 |         ),
51 |         properties=[
52 |             Property(
53 |                 name=FaqSchema.COURSE_ID.value,
54 |                 description="The ID of the course",
55 |                 data_type=DataType.INT,
56 |                 index_searchable=False,
57 |             ),
58 |             Property(
59 |                 name=FaqSchema.COURSE_NAME.value,
60 |                 description="The name of the course",
61 |                 data_type=DataType.TEXT,
62 |                 index_searchable=False,
63 |             ),
64 |             Property(
65 |                 name=FaqSchema.COURSE_DESCRIPTION.value,
66 |                 description="The description of the COURSE",
67 |                 data_type=DataType.TEXT,
68 |                 index_searchable=False,
69 |             ),
70 |             Property(
71 |                 name=FaqSchema.COURSE_LANGUAGE.value,
72 |                 description="The language of the COURSE",
73 |                 data_type=DataType.TEXT,
74 |                 index_searchable=False,
75 |             ),
76 |             Property(
77 |                 name=FaqSchema.FAQ_ID.value,
78 |                 description="The ID of the Faq",
79 |                 data_type=DataType.INT,
80 |                 index_searchable=False,
81 |             ),
82 |             Property(
83 |                 name=FaqSchema.QUESTION_TITLE.value,
84 |                 description="The title of the faq",
85 |                 data_type=DataType.TEXT,
86 |             ),
87 |             Property(
88 |                 name=FaqSchema.QUESTION_ANSWER.value,
89 |                 description="The answer of the faq",
90 |                 data_type=DataType.TEXT,
91 |             ),
92 |         ],
93 |     )
94 | 


--------------------------------------------------------------------------------
/llm_config.example.yml:
--------------------------------------------------------------------------------
  1 | - api_key:
  2 |   api_version: 2024-05-01-preview
  3 |   azure_deployment: gpt-35-turbo
  4 |   capabilities:
  5 |     context_length: 16385
  6 |     gpt_version_equivalent: 3.5
  7 |     image_recognition: false
  8 |     input_cost: 0.5
  9 |     json_mode: true
 10 |     output_cost: 1.5
 11 |     privacy_compliance: true
 12 |     self_hosted: false
 13 |     vendor: OpenAI
 14 |   description: GPT 3.5 16k on Azure
 15 |   endpoint: '<your-endpoint>'
 16 |   id: azure-gpt-35-turbo
 17 |   model: gpt-3.5-turbo
 18 |   name: GPT 3.5 Turbo
 19 |   type: azure_chat
 20 | - api_key:
 21 |   capabilities:
 22 |     input_cost: 0.5
 23 |     output_cost: 1.5
 24 |     gpt_version_equivalent: 3.5
 25 |     context_length: 16385
 26 |     vendor: "OpenAI"
 27 |     privacy_compliance: false
 28 |     self_hosted: false
 29 |     image_recognition: false
 30 |     son_mode: true
 31 |   description: GPT 3.5 16k
 32 |   id: oai-gpt-35-turbo
 33 |   model: gpt-3.5-turbo
 34 |   name: GPT 3.5 Turbo
 35 |   type: openai_chat
 36 | - api_key:
 37 |   api_version: 2024-02-15-preview
 38 |   azure_deployment: gpt-4-turbo
 39 |   capabilities:
 40 |     context_length: 128000
 41 |     gpt_version_equivalent: 4
 42 |     image_recognition: false
 43 |     input_cost: 10
 44 |     json_mode: true
 45 |     output_cost: 30
 46 |     privacy_compliance: true
 47 |     self_hosted: false
 48 |     vendor: OpenAI
 49 |   description: GPT 4 Turbo 128k on Azure
 50 |   endpoint: '<your-endpoint>'
 51 |   id: azure-gpt-4-turbo
 52 |   model: gpt-4-turbo
 53 |   name: GPT 4 Turbo
 54 |   type: azure_chat
 55 | - api_key:
 56 |   api_version: 2024-02-15-preview
 57 |   azure_deployment: gpt-4o
 58 |   capabilities:
 59 |     context_length: 128000
 60 |     gpt_version_equivalent: 4.5
 61 |     image_recognition: true
 62 |     input_cost: 5
 63 |     json_mode: true
 64 |     output_cost: 15
 65 |     privacy_compliance: true
 66 |     self_hosted: false
 67 |     vendor: OpenAI
 68 |   description: GPT 4 Omni on Azure
 69 |   endpoint: '<your-endpoint>'
 70 |   id: azure-gpt-4-omni
 71 |   model: gpt-4o
 72 |   name: GPT 4 Omni
 73 |   type: azure_chat
 74 | - api_key:
 75 |   api_version: 2023-03-15-preview
 76 |   azure_deployment: gpt-4o-mini
 77 |   capabilities:
 78 |     context_length: 128000
 79 |     gpt_version_equivalent: 4.25
 80 |     image_recognition: true
 81 |     input_cost: 0.15
 82 |     json_mode: true
 83 |     output_cost: 0.075
 84 |     privacy_compliance: true
 85 |     self_hosted: false
 86 |     vendor: OpenAI
 87 |   description: GPT 4 Omni Mini on Azure
 88 |   endpoint: '<your-endpoint>'
 89 |   id: azure-gpt-4-omni-mini
 90 |   model: gpt-4o-mini
 91 |   name: GPT 4 Omni Mini
 92 |   type: azure_chat
 93 | - api_key:
 94 |   api_version: '2023-05-15T00:00:00.000Z'
 95 |   azure_deployment: te-3-large
 96 |   capabilities:
 97 |     context_length: 8191
 98 |     input_cost: 0.13
 99 |     output_cost: 0.065
100 |     privacy_compliance: true
101 |     self_hosted: false
102 |     vendor: OpenAI
103 |   description: Embedding Large 8k Azure
104 |   endpoint: '<your-endpoint>'
105 |   id: embedding-large
106 |   model: text-embedding-3-large
107 |   name: Embedding Large
108 |   type: azure_embedding
109 | - api_key:
110 |   api_version: 2024-02-15-preview
111 |   azure_deployment: te-3-small
112 |   capabilities:
113 |     context_length: 8191
114 |     input_cost: 0.02
115 |     output_cost: 0
116 |     privacy_compliance: true
117 |     self_hosted: false
118 |     vendor: OpenAI
119 |   description: Embedding Small 8k Azure
120 |   endpoint: '<your-endpoint>'
121 |   id: embedding-small
122 |   model: text-embedding-3-small
123 |   name: Embedding Small
124 |   type: azure_embedding


--------------------------------------------------------------------------------
/app/llm/langchain/iris_langchain_chat_model.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from logging import Logger
  3 | from typing import List, Optional, Any, Sequence, Union, Dict, Type, Callable
  4 | 
  5 | from langchain_core.callbacks import CallbackManagerForLLMRun
  6 | from langchain_core.language_models import LanguageModelInput
  7 | from langchain_core.language_models.chat_models import (
  8 |     BaseChatModel,
  9 | )
 10 | from langchain_core.messages import BaseMessage
 11 | from langchain_core.outputs import ChatResult
 12 | from langchain_core.outputs.chat_generation import ChatGeneration
 13 | from langchain_core.runnables import Runnable
 14 | from langchain_core.tools import BaseTool
 15 | from pydantic import BaseModel, Field
 16 | 
 17 | from app.common.PipelineEnum import PipelineEnum
 18 | from app.common.token_usage_dto import TokenUsageDTO
 19 | from ...common.message_converters import (
 20 |     convert_langchain_message_to_iris_message,
 21 |     convert_iris_message_to_langchain_message,
 22 | )
 23 | from ...llm import RequestHandler, CompletionArguments
 24 | 
 25 | 
 26 | class IrisLangchainChatModel(BaseChatModel):
 27 |     """Custom langchain chat model for our own request handler"""
 28 | 
 29 |     request_handler: RequestHandler
 30 |     completion_args: CompletionArguments
 31 |     tokens: TokenUsageDTO = None
 32 |     logger: Logger = logging.getLogger(__name__)
 33 |     tools: Optional[
 34 |         Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]]
 35 |     ] = Field(default_factory=list, alias="tools")
 36 | 
 37 |     def __init__(
 38 |         self,
 39 |         request_handler: RequestHandler,
 40 |         completion_args: Optional[CompletionArguments] = CompletionArguments(stop=None),
 41 |         **kwargs: Any,
 42 |     ) -> None:
 43 |         super().__init__(
 44 |             request_handler=request_handler, completion_args=completion_args, **kwargs
 45 |         )
 46 | 
 47 |     def bind_tools(
 48 |         self,
 49 |         tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
 50 |         **kwargs: Any,
 51 |     ) -> Runnable[LanguageModelInput, BaseMessage]:
 52 |         """Bind a sequence of tools to the request handler for function calling support.
 53 | 
 54 |         Args:
 55 |             tools: Sequence of tools that can be one of:
 56 |                   - Dict describing the tool
 57 |                   - Pydantic BaseModel
 58 |                   - Callable function
 59 |                   - BaseTool instance
 60 |             **kwargs: Additional arguments passed to the request handler
 61 | 
 62 |         Returns:
 63 |             self: Returns this instance as a Runnable
 64 | 
 65 |         Raises:
 66 |             ValueError: If tools sequence is empty or contains invalid tool types
 67 |         """
 68 |         if not tools:
 69 |             raise ValueError("At least one tool must be provided")
 70 | 
 71 |         self.tools = tools
 72 |         return self
 73 | 
 74 |     def _generate(
 75 |         self,
 76 |         messages: List[BaseMessage],
 77 |         stop: Optional[List[str]] = None,
 78 |         run_manager: Optional[CallbackManagerForLLMRun] = None,
 79 |         **kwargs: Any,
 80 |     ) -> ChatResult:
 81 |         iris_messages = [convert_langchain_message_to_iris_message(m) for m in messages]
 82 |         self.completion_args.stop = stop
 83 |         iris_message = self.request_handler.chat(
 84 |             iris_messages, self.completion_args, self.tools
 85 |         )
 86 |         base_message = convert_iris_message_to_langchain_message(iris_message)
 87 |         chat_generation = ChatGeneration(message=base_message)
 88 |         self.tokens = TokenUsageDTO(
 89 |             model=iris_message.token_usage.model_info,
 90 |             numInputTokens=iris_message.token_usage.num_input_tokens,
 91 |             costPerMillionInputToken=iris_message.token_usage.cost_per_input_token,
 92 |             numOutputTokens=iris_message.token_usage.num_output_tokens,
 93 |             costPerMillionOutputToken=iris_message.token_usage.cost_per_output_token,
 94 |             pipeline=PipelineEnum.NOT_SET,
 95 |         )
 96 |         return ChatResult(generations=[chat_generation])
 97 | 
 98 |     @property
 99 |     def _llm_type(self) -> str:
100 |         return "Iris"
101 | 


--------------------------------------------------------------------------------
/app/pipeline/chat_gpt_wrapper_pipeline.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import List, Optional
  3 | 
  4 | from langchain_core.prompts import (
  5 |     ChatPromptTemplate,
  6 | )
  7 | from app.common.pyris_message import IrisMessageRole, PyrisMessage
  8 | from app.domain.chat.exercise_chat.exercise_chat_pipeline_execution_dto import (
  9 |     ExerciseChatPipelineExecutionDTO,
 10 | )
 11 | from app.domain.data.text_message_content_dto import TextMessageContentDTO
 12 | from app.llm.langchain.iris_langchain_chat_model import IrisLangchainChatModel
 13 | from app.pipeline.prompts.chat_gpt_wrapper_prompts import chat_gpt_initial_system_prompt
 14 | from langchain_core.runnables import Runnable
 15 | 
 16 | from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments
 17 | from app.pipeline import Pipeline
 18 | from app.web.status.status_update import ChatGPTWrapperStatusCallback
 19 | 
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | def convert_chat_history_to_str(chat_history: List[PyrisMessage]) -> str:
 24 |     """
 25 |     Converts the chat history to a string
 26 |     :param chat_history: The chat history
 27 |     :return: The chat history as a string
 28 |     """
 29 | 
 30 |     def map_message_role(role: IrisMessageRole) -> str:
 31 |         if role == IrisMessageRole.SYSTEM:
 32 |             return "System"
 33 |         elif role == IrisMessageRole.ASSISTANT:
 34 |             return "AI Tutor"
 35 |         elif role == IrisMessageRole.USER:
 36 |             return "Student"
 37 |         else:
 38 |             return "Unknown"
 39 | 
 40 |     return "\n\n".join(
 41 |         [
 42 |             f"{map_message_role(message.sender)} {"" if not message.sent_at else f"at {message.sent_at.strftime(
 43 |                 "%Y-%m-%d %H:%M:%S")}"}: {message.contents[0].text_content}"
 44 |             for message in chat_history
 45 |         ]
 46 |     )
 47 | 
 48 | 
 49 | class ChatGPTWrapperPipeline(Pipeline):
 50 |     callback: ChatGPTWrapperStatusCallback
 51 |     llm: IrisLangchainChatModel
 52 |     pipeline: Runnable
 53 | 
 54 |     def __init__(self, callback: Optional[ChatGPTWrapperStatusCallback] = None):
 55 |         super().__init__(implementation_id="chat_gpt_wrapper_pipeline_reference_impl")
 56 |         self.callback = callback
 57 |         self.request_handler = CapabilityRequestHandler(
 58 |             requirements=RequirementList(
 59 |                 gpt_version_equivalent=4.5,
 60 |                 context_length=16385,
 61 |             )
 62 |         )
 63 | 
 64 |     def __call__(
 65 |         self,
 66 |         dto: ExerciseChatPipelineExecutionDTO,
 67 |         prompt: Optional[ChatPromptTemplate] = None,
 68 |         **kwargs,
 69 |     ):
 70 |         """
 71 |         Run the ChatGPT wrapper pipeline.
 72 |         This consists of a single response generation step.
 73 |         """
 74 | 
 75 |         self.callback.in_progress()
 76 |         pyris_system_prompt = PyrisMessage(
 77 |             sender=IrisMessageRole.SYSTEM,
 78 |             contents=[
 79 |                 TextMessageContentDTO(text_content=chat_gpt_initial_system_prompt)
 80 |             ],
 81 |         )
 82 | 
 83 |         prompts = [pyris_system_prompt] + [
 84 |             msg
 85 |             for msg in dto.chat_history
 86 |             if msg.contents is not None
 87 |             and len(msg.contents) > 0
 88 |             and msg.contents[0].text_content
 89 |             and len(msg.contents[0].text_content) > 0
 90 |         ]
 91 | 
 92 |         response = self.request_handler.chat(
 93 |             prompts, CompletionArguments(temperature=0.5, max_tokens=2000), tools=None
 94 |         )
 95 | 
 96 |         logger.info(f"ChatGPTWrapperPipeline response: {response}")
 97 | 
 98 |         if (
 99 |             response.contents is None
100 |             or len(response.contents) == 0
101 |             or response.contents[0].text_content is None
102 |             or len(response.contents[0].text_content) == 0
103 |         ):
104 |             self.callback.error("ChatGPT did not reply. Try resending.")
105 |             # Print lots of debug info for this case
106 |             logger.error(f"ChatGPTWrapperPipeline response: {response}")
107 |             logger.error(f"ChatGPTWrapperPipeline request: {prompts}")
108 |             return
109 | 
110 |         self.callback.done(final_result=response.contents[0].text_content)
111 | 


--------------------------------------------------------------------------------
/app/pipeline/prompts/rewriting_prompts.py:
--------------------------------------------------------------------------------
 1 | system_prompt_faq = """\
 2 | :You are an excellent tutor with expertise in computer science and its practical applications, teaching at a university
 3 | level. Your task is to proofread and enhance the given FAQ text. Please follow these guidelines:
 4 | 
 5 | 1. Correct all spelling and grammatical errors.
 6 | 2. Ensure the text is written in simple and clear language, making it easy to understand for students.
 7 | 3. Preserve the original meaning and intent of the text while maintaining clarity.
 8 | 4. Ensure that the response is always written in complete sentences. If you are given a list of bullet points, \
 9 | convert them into complete sentences.
10 | 5. Make sure to use the original language of the input text.
11 | 6. Avoid repeating any information that is already present in the text.
12 | 7. Make sure to keep the markdown formatting intact and add formatting for the most important information.
13 | 8. If someone does input a very short text, that does not resemble to be an answer to a potential question please make.
14 | sure to respond accordingly. Also, if the input text is too short, please point this out.
15 | 
16 | Additionally for Short Inputs: If the input text is too short and does not resemble an answer to a potential question, \
17 | respond appropriately and point this out.
18 | Your output will be used as an answer to a frequently asked question (FAQ) on the Artemis platform.
19 | Ensure it is clear, concise, and well-structured.
20 | 
21 | Exclude the start and end markers from your response and provide only the improved content.
22 | 
23 | The markers are defined as following:
24 | Start of the text: ###START###
25 | End of the text: ###END###
26 | 
27 | The text that has to be rewritten starts now:
28 | 
29 | ###START###
30 | {rewritten_text}
31 | ###END###\
32 | """
33 | 
34 | system_prompt_problem_statement = """\
35 | <Instructions>
36 | You are an excellent tutor with deep expertise in **computer science** and **practical applications**, teaching at the \
37 | university level. Your goal is to **proofread and refine** the problem statement you are given, focusing on what \
38 | students need most.
39 | 
40 | Follow these instructions carefully:
41 | 1. **Correct all spelling and grammatical errors.**
42 |    Make sure the text reads clearly and accurately.
43 | 
44 | 2. **Use simple, clear, student-focused language.**
45 |    The rewritten statement should be as understandable as possible for students. Avoid overly complex words or phrasing.
46 | 
47 | 3. **Preserve the original meaning and intent.**
48 |    Do not remove or alter any tasks, test instructions, or technical details. All tasks and references (e.g., \
49 | `[task]`, test names, UML diagrams) must remain intact.
50 | 
51 | 4. **Write in complete sentences.**
52 |    If you are given bullet points or lists, convert them into complete sentences whenever possible. However, you can \
53 | still use bullet points if they make the problem statement clearer.
54 | 
55 | 5. **Keep the original language of the text.**
56 |    If the input is in English, do not switch to another language, and vice versa.
57 | 
58 | 6. **Do not repeat information unnecessarily.**
59 |    Condense any redundant content, but make sure no new information is lost and nothing is removed.
60 | 
61 | 7. **Retain and properly format existing markdown and any extended syntax.**
62 |    This includes:
63 |    - Code blocks, UML diagrams (`@startuml ... @enduml`).
64 |    - Special test case references like `testBubbleSort()`, `testConstructors[Policy]`, etc.
65 |    - Additional markdown features (e.g., `<span class="red"></span>` or `$$ e^{{\frac{{1}}{{4}} y^2}} $$`).
66 |    - Task syntax `[task][Task Description](testCaseName)`.
67 | 
68 | 8. **Emphasize critical information.**
69 |    Use bold or italic text (or other markdown elements) when highlighting essential steps or requirements to help \
70 | students quickly identify what is most important.
71 | 
72 | 9. **Maintain a supportive, instructive tone.**
73 |    Write as if you are addressing students directly, ensuring they understand the objectives, tasks, and relevance of \
74 | each component.
75 | </Instructions>
76 | 
77 | <ProblemStatement>
78 | {rewritten_text}
79 | </ProblemStatement>
80 | 
81 | <Response>
82 | Respond with a single string containing only the improved version. The output should be the optimized problem \
83 | statement, ready to be shown directly to students.
84 | </Response>
85 | """
86 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | #######################
  2 | # Custom rules        #
  3 | #######################
  4 | application.local.yml
  5 | llm_config.local.yml
  6 | 
  7 | ######################
  8 | # Docker
  9 | ######################
 10 | /docker/.docker-data/artemis-data/*
 11 | !/docker/.docker-data/artemis-data/.gitkeep
 12 | /docker/.docker-data/weaviate-data/*
 13 | !/docker/.docker-data/weaviate-data/.gitkeep
 14 | 
 15 | ########################
 16 | # Auto-generated rules #
 17 | ########################
 18 | # Byte-compiled / optimized / DLL files
 19 | __pycache__/
 20 | *.py[cod]
 21 | *$py.class
 22 | 
 23 | # C extensions
 24 | *.so
 25 | 
 26 | # Distribution / packaging
 27 | .Python
 28 | build/
 29 | develop-eggs/
 30 | dist/
 31 | downloads/
 32 | eggs/
 33 | .eggs/
 34 | lib/
 35 | lib64/
 36 | parts/
 37 | sdist/
 38 | var/
 39 | wheels/
 40 | share/python-wheels/
 41 | *.egg-info/
 42 | .installed.cfg
 43 | *.egg
 44 | MANIFEST
 45 | 
 46 | # PyInstaller
 47 | #  Usually these files are written by a python script from a template
 48 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 49 | *.manifest
 50 | *.spec
 51 | 
 52 | # Installer logs
 53 | pip-log.txt
 54 | pip-delete-this-directory.txt
 55 | 
 56 | # Unit test / coverage reports
 57 | htmlcov/
 58 | .tox/
 59 | .nox/
 60 | .coverage
 61 | .coverage.*
 62 | .cache
 63 | nosetests.xml
 64 | coverage.xml
 65 | *.cover
 66 | *.py,cover
 67 | .hypothesis/
 68 | .pytest_cache/
 69 | cover/
 70 | 
 71 | # Translations
 72 | *.mo
 73 | *.pot
 74 | 
 75 | # Django stuff:
 76 | *.log
 77 | local_settings.py
 78 | db.sqlite3
 79 | db.sqlite3-journal
 80 | 
 81 | # Flask stuff:
 82 | instance/
 83 | .webassets-cache
 84 | 
 85 | # Scrapy stuff:
 86 | .scrapy
 87 | 
 88 | # Sphinx documentation
 89 | docs/_build/
 90 | 
 91 | # PyBuilder
 92 | .pybuilder/
 93 | target/
 94 | 
 95 | # Jupyter Notebook
 96 | .ipynb_checkpoints
 97 | 
 98 | # IPython
 99 | profile_default/
100 | ipython_config.py
101 | 
102 | # pyenv
103 | #   For a library or package, you might want to ignore these files since the code is
104 | #   intended to run in multiple environments; otherwise, check them in:
105 | # .python-version
106 | 
107 | # pipenv
108 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
109 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
110 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
111 | #   install all needed dependencies.
112 | #Pipfile.lock
113 | 
114 | # poetry
115 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
116 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
117 | #   commonly ignored for libraries.
118 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
119 | #poetry.lock
120 | 
121 | # pdm
122 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
123 | #pdm.lock
124 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
125 | #   in version control.
126 | #   https://pdm.fming.dev/#use-with-ide
127 | .pdm.toml
128 | 
129 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
130 | __pypackages__/
131 | 
132 | # Celery stuff
133 | celerybeat-schedule
134 | celerybeat.pid
135 | 
136 | # SageMath parsed files
137 | *.sage.py
138 | 
139 | # Environments
140 | .env
141 | .venv
142 | env/
143 | venv/
144 | ENV/
145 | env.bak/
146 | venv.bak/
147 | 
148 | # Spyder project settings
149 | .spyderproject
150 | .spyproject
151 | 
152 | # Rope project settings
153 | .ropeproject
154 | 
155 | # mkdocs documentation
156 | /site
157 | 
158 | # mypy
159 | .mypy_cache/
160 | .dmypy.json
161 | dmypy.json
162 | 
163 | # Pyre type checker
164 | .pyre/
165 | 
166 | # pytype static type analyzer
167 | .pytype/
168 | 
169 | # Cython debug symbols
170 | cython_debug/
171 | 
172 | # PyCharm
173 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
174 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
175 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
176 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
177 | .idea/
178 | 
179 | .DS_Store
180 | 


--------------------------------------------------------------------------------
/app/pipeline/competency_extraction_pipeline.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Optional
  3 | 
  4 | from langchain.output_parsers import PydanticOutputParser
  5 | from langchain_core.prompts import (
  6 |     ChatPromptTemplate,
  7 | )
  8 | 
  9 | from app.common.PipelineEnum import PipelineEnum
 10 | from app.common.pyris_message import PyrisMessage, IrisMessageRole
 11 | from app.domain import (
 12 |     CompetencyExtractionPipelineExecutionDTO,
 13 | )
 14 | from app.domain.data.text_message_content_dto import TextMessageContentDTO
 15 | from app.domain.data.competency_dto import Competency
 16 | from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments
 17 | from app.pipeline import Pipeline
 18 | from app.web.status.status_update import CompetencyExtractionCallback
 19 | from app.pipeline.prompts.competency_extraction import system_prompt
 20 | 
 21 | logger = logging.getLogger(__name__)
 22 | 
 23 | 
 24 | class CompetencyExtractionPipeline(Pipeline):
 25 |     callback: CompetencyExtractionCallback
 26 |     request_handler: CapabilityRequestHandler
 27 |     output_parser: PydanticOutputParser
 28 | 
 29 |     def __init__(self, callback: Optional[CompetencyExtractionCallback] = None):
 30 |         super().__init__(
 31 |             implementation_id="competency_extraction_pipeline_reference_impl"
 32 |         )
 33 |         self.callback = callback
 34 |         self.request_handler = CapabilityRequestHandler(
 35 |             requirements=RequirementList(
 36 |                 gpt_version_equivalent=4.5,
 37 |                 context_length=16385,
 38 |             )
 39 |         )
 40 |         self.output_parser = PydanticOutputParser(pydantic_object=Competency)
 41 |         self.tokens = []
 42 | 
 43 |     def __call__(
 44 |         self,
 45 |         dto: CompetencyExtractionPipelineExecutionDTO,
 46 |         prompt: Optional[ChatPromptTemplate] = None,
 47 |         **kwargs,
 48 |     ):
 49 |         if not dto.course_description:
 50 |             raise ValueError("Course description is required")
 51 |         if not dto.taxonomy_options:
 52 |             raise ValueError("Taxonomy options are required")
 53 |         if not dto.max_n:
 54 |             raise ValueError("Non-zero max_n is required")
 55 | 
 56 |         taxonomy_options = ", ".join(dto.taxonomy_options)
 57 |         current_competencies = "\n\n".join(
 58 |             [c.model_dump_json(indent=4) for c in dto.current_competencies]
 59 |         )
 60 |         if current_competencies:
 61 |             current_competencies = (
 62 |                 f"\nHere are the current competencies in the course:\n{current_competencies}\n"
 63 |                 f"Do not repeat these competencies.\n"
 64 |             )
 65 | 
 66 |         prompt = system_prompt.format(
 67 |             taxonomy_list=taxonomy_options,
 68 |             course_description=dto.course_description,
 69 |             max_n=dto.max_n,
 70 |             current_competencies=current_competencies,
 71 |         )
 72 |         prompt = PyrisMessage(
 73 |             sender=IrisMessageRole.SYSTEM,
 74 |             contents=[TextMessageContentDTO(text_content=prompt)],
 75 |         )
 76 | 
 77 |         response = self.request_handler.chat(
 78 |             [prompt], CompletionArguments(temperature=0.4), tools=None
 79 |         )
 80 |         self._append_tokens(
 81 |             response.token_usage, PipelineEnum.IRIS_COMPETENCY_GENERATION
 82 |         )
 83 |         response = response.contents[0].text_content
 84 | 
 85 |         generated_competencies: list[Competency] = []
 86 | 
 87 |         # Find all competencies in the response up to the max_n
 88 |         competencies = response.split("\n\n")[: dto.max_n]
 89 |         for i, competency in enumerate(competencies):
 90 |             logger.debug(f"Processing competency {i + 1}: {competency}")
 91 |             if "{" not in competency or "}" not in competency:
 92 |                 logger.debug("Skipping competency without JSON")
 93 |                 continue
 94 |             # Get the competency JSON object
 95 |             start = competency.index("{")
 96 |             end = competency.index("}") + 1
 97 |             competency = competency[start:end]
 98 |             try:
 99 |                 competency = self.output_parser.parse(competency)
100 |             except Exception as e:
101 |                 logger.debug(f"Error parsing competency: {e}")
102 |                 continue
103 |             logger.debug(f"Generated competency: {competency}")
104 |             generated_competencies.append(competency)
105 |         self.callback.done(final_result=generated_competencies, tokens=self.tokens)
106 | 


--------------------------------------------------------------------------------
/app/llm/request_handler/capability_request_handler.py:
--------------------------------------------------------------------------------
  1 | from enum import Enum
  2 | from typing import Sequence, Union, Dict, Any, Type, Callable, Optional
  3 | 
  4 | from langchain_core.tools import BaseTool
  5 | from pydantic import ConfigDict
  6 | from pydantic import BaseModel
  7 | 
  8 | from app.common.pyris_message import PyrisMessage
  9 | from app.llm.capability import RequirementList
 10 | from app.llm.external.model import (
 11 |     ChatModel,
 12 |     CompletionModel,
 13 |     EmbeddingModel,
 14 |     LanguageModel,
 15 | )
 16 | from app.llm.request_handler import RequestHandler
 17 | from app.llm.completion_arguments import CompletionArguments
 18 | from app.llm.llm_manager import LlmManager
 19 | 
 20 | 
 21 | class CapabilityRequestHandlerSelectionMode(Enum):
 22 |     """Enum for the selection mode of the capability request handler"""
 23 | 
 24 |     BEST = "best"
 25 |     WORST = "worst"
 26 | 
 27 | 
 28 | class CapabilityRequestHandler(RequestHandler):
 29 |     """Request handler that selects the best/worst model based on the requirements"""
 30 | 
 31 |     requirements: RequirementList
 32 |     selection_mode: CapabilityRequestHandlerSelectionMode
 33 |     llm_manager: LlmManager | None = None
 34 |     model_config = ConfigDict(arbitrary_types_allowed=True)
 35 | 
 36 |     def __init__(
 37 |         self,
 38 |         requirements: RequirementList,
 39 |         selection_mode: CapabilityRequestHandlerSelectionMode = CapabilityRequestHandlerSelectionMode.WORST,
 40 |     ) -> None:
 41 |         super().__init__(
 42 |             requirements=requirements, selection_mode=selection_mode, llm_manager=None
 43 |         )
 44 |         self.requirements = requirements
 45 |         self.selection_mode = selection_mode
 46 |         self.llm_manager = LlmManager()
 47 | 
 48 |     def complete(self, prompt: str, arguments: CompletionArguments) -> str:
 49 |         llm = self._select_model(CompletionModel)
 50 |         return llm.complete(prompt, arguments)
 51 | 
 52 |     def chat(
 53 |         self,
 54 |         messages: list[PyrisMessage],
 55 |         arguments: CompletionArguments,
 56 |         tools: Optional[
 57 |             Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]]
 58 |         ],
 59 |     ) -> PyrisMessage:
 60 |         llm = self._select_model(ChatModel)
 61 |         message = llm.chat(messages, arguments, tools)
 62 |         message.token_usage.cost_per_input_token = llm.capabilities.input_cost.value
 63 |         message.token_usage.cost_per_output_token = llm.capabilities.output_cost.value
 64 |         return message
 65 | 
 66 |     def embed(self, text: str) -> list[float]:
 67 |         llm = self._select_model(EmbeddingModel)
 68 |         return llm.embed(text)
 69 | 
 70 |     def _select_model(self, type_filter: type) -> LanguageModel:
 71 |         """Select the best/worst model based on the requirements and the selection mode"""
 72 |         llms = self.llm_manager.get_llms_sorted_by_capabilities_score(
 73 |             self.requirements,
 74 |             self.selection_mode == CapabilityRequestHandlerSelectionMode.WORST,
 75 |         )
 76 |         llms = [llm for llm in llms if isinstance(llm, type_filter)]
 77 | 
 78 |         if self.selection_mode == CapabilityRequestHandlerSelectionMode.BEST:
 79 |             llm = llms[0]
 80 |         else:
 81 |             llm = llms[-1]
 82 | 
 83 |         # Print the selected model for the logs
 84 |         print(f"Selected {llm.description}")
 85 |         return llm
 86 | 
 87 |     def bind_tools(
 88 |         self,
 89 |         tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
 90 |     ) -> LanguageModel:
 91 |         """Bind the provided tools to the selected ChatModel.
 92 | 
 93 |         Args:
 94 |             tools: A sequence of tools to bind. Can be one of:
 95 |                 - Dict[str, Any]: Tool configuration dictionary
 96 |                 - Type[BaseModel]: Pydantic model class
 97 |                 - Callable: Function to be used as a tool
 98 |                 - BaseTool: LangChain tool instance
 99 | 
100 |         Returns:
101 |             LanguageModel: The selected chat model with tools bound
102 | 
103 |         Raises:
104 |             ValueError: If tools sequence is empty or contains unsupported tool types
105 |             TypeError: If selected model doesn't support tool binding
106 |         """
107 |         if not tools:
108 |             raise ValueError("Tools sequence cannot be empty")
109 | 
110 |         llm = self._select_model(ChatModel)
111 |         if not hasattr(llm, "bind_tools"):
112 |             raise TypeError(
113 |                 f"Selected model {llm.description} doesn't support tool binding"
114 |             )
115 | 
116 |         llm.bind_tools(tools)
117 |         return llm
118 | 


--------------------------------------------------------------------------------
/app/llm/capability/capability_list.py:
--------------------------------------------------------------------------------
  1 | from abc import ABCMeta
  2 | from pydantic import BaseModel, Field, model_validator
  3 | 
  4 | 
  5 | class Capability(metaclass=ABCMeta):
  6 |     """A capability to match a generic value"""
  7 | 
  8 |     @classmethod
  9 |     def __subclasshook__(cls, subclass) -> bool:
 10 |         return hasattr(subclass, "matches") and callable(subclass.matches)
 11 | 
 12 |     def matches(self, other: any) -> int:
 13 |         """Return a score for how well the capability matches the input"""
 14 |         raise NotImplementedError
 15 | 
 16 | 
 17 | class TextCapability(BaseModel):
 18 |     """A capability to match a fixed text value"""
 19 | 
 20 |     value: str
 21 | 
 22 |     def matches(self, text: str) -> int:
 23 |         return int(self.value == text)
 24 | 
 25 |     def __str__(self):
 26 |         return f"TextCapability({super().__str__()})"
 27 | 
 28 | 
 29 | class OrderedNumberCapability(BaseModel):
 30 |     """A capability that is better the higher the value"""
 31 | 
 32 |     value: int | float
 33 | 
 34 |     def matches(self, number: int | float) -> int | float:
 35 |         if self.value < number:
 36 |             return 0
 37 |         return self.value - number + 1
 38 | 
 39 |     def __str__(self):
 40 |         return f"OrderedNumberCapability({super().__str__()})"
 41 | 
 42 | 
 43 | class InverseOrderedNumberCapability(BaseModel):
 44 |     """A capability that is better the lower the value"""
 45 | 
 46 |     value: int | float
 47 | 
 48 |     def matches(self, number: int | float) -> int | float:
 49 |         if self.value > number:
 50 |             return 0
 51 |         return number - self.value + 1
 52 | 
 53 |     def __str__(self):
 54 |         return f"InverseOrderedNumberCapability({super().__str__()})"
 55 | 
 56 | 
 57 | class BooleanCapability(BaseModel):
 58 |     """A simple boolean capability"""
 59 | 
 60 |     value: bool
 61 | 
 62 |     def matches(self, boolean: bool) -> int:
 63 |         return int(self.value == boolean)
 64 | 
 65 |     def __str__(self):
 66 |         return f"BooleanCapability({str(self.value)})"
 67 | 
 68 | 
 69 | class CapabilityList(BaseModel):
 70 |     """A list of capabilities for a model"""
 71 | 
 72 |     # Cost in $ per 1k input tokens
 73 |     input_cost: InverseOrderedNumberCapability = Field(
 74 |         default=InverseOrderedNumberCapability(value=0)
 75 |     )
 76 |     # Output cost in $ per 1k tokens
 77 |     output_cost: InverseOrderedNumberCapability = Field(
 78 |         default=InverseOrderedNumberCapability(value=0)
 79 |     )
 80 |     # The GPT version that is roughly equivalent to the model
 81 |     gpt_version_equivalent: OrderedNumberCapability = Field(
 82 |         default=OrderedNumberCapability(value=2)
 83 |     )
 84 |     # The speed of the model in tokens per second
 85 |     speed: OrderedNumberCapability = Field(default=OrderedNumberCapability(value=0))
 86 |     # The context length of the model in tokens
 87 |     context_length: OrderedNumberCapability = Field(
 88 |         default=OrderedNumberCapability(value=0)
 89 |     )
 90 |     # The vendor of the model e.g. "OpenAI" or "Anthropic"
 91 |     vendor: TextCapability = Field(default=TextCapability(value=""))
 92 |     # Whether the model is privacy compliant and can be used for sensitive data
 93 |     privacy_compliance: BooleanCapability = Field(
 94 |         default=BooleanCapability(value=False)
 95 |     )
 96 |     # Whether the model is self-hosted
 97 |     self_hosted: BooleanCapability = Field(default=BooleanCapability(value=False))
 98 |     # Whether the model supports image recognition
 99 |     image_recognition: BooleanCapability = Field(default=BooleanCapability(value=False))
100 |     # Whether the model supports a JSON mode
101 |     json_mode: BooleanCapability = Field(default=BooleanCapability(value=False))
102 | 
103 |     @model_validator(mode="before")
104 |     @classmethod
105 |     def from_dict(cls, data: dict[str, any]):
106 |         """Prepare the data for handling by Pydantic"""
107 |         for key, value in data.items():
108 |             if type(value) is not dict:
109 |                 data[key] = {"value": value}
110 |         return data
111 | 
112 | 
113 | # The weights for the capabilities used in the scoring
114 | capability_weights = {
115 |     "input_cost": 0.5,
116 |     "output_cost": 0.5,
117 |     "gpt_version_equivalent": 4,
118 |     "speed": 2,
119 |     "context_length": 0.1,
120 |     "vendor": 1,
121 |     "privacy_compliance": 0,
122 |     "self_hosted": 0,
123 |     "image_recognition": 0,
124 |     "json_mode": 0,
125 | }
126 | 
127 | # The default values for the capabilities that are always considered
128 | always_considered_capabilities_with_default = {
129 |     "input_cost": 100000000000000,
130 |     "output_cost": 100000000000000,
131 | }
132 | 


--------------------------------------------------------------------------------
/app/pipeline/shared/reranker_pipeline.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from asyncio.log import logger
  3 | from typing import Optional, List, Union
  4 | 
  5 | from langchain_core.output_parsers import PydanticOutputParser
  6 | from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
  7 | from langchain_core.runnables import Runnable
  8 | from langsmith import traceable
  9 | 
 10 | from app.common.pyris_message import PyrisMessage
 11 | from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments
 12 | from app.common.PipelineEnum import PipelineEnum
 13 | from app.llm.langchain import IrisLangchainChatModel
 14 | from app.pipeline import Pipeline
 15 | from app.pipeline.chat.output_models.output_models.selected_paragraphs import (
 16 |     SelectedParagraphs,
 17 | )
 18 | from app.vector_database.lecture_schema import LectureSchema
 19 | 
 20 | 
 21 | class RerankerPipeline(Pipeline):
 22 |     """A generic reranker pipeline that can be used to rerank a list of documents based on a question"""
 23 | 
 24 |     llm: IrisLangchainChatModel
 25 |     pipeline: Runnable
 26 |     prompt_str: str
 27 |     prompt: ChatPromptTemplate
 28 | 
 29 |     def __init__(self):
 30 |         super().__init__(implementation_id="reranker_pipeline")
 31 |         request_handler = CapabilityRequestHandler(
 32 |             requirements=RequirementList(
 33 |                 gpt_version_equivalent=3.5,
 34 |                 context_length=16385,
 35 |             )
 36 |         )
 37 |         self.llm = IrisLangchainChatModel(
 38 |             request_handler=request_handler,
 39 |             completion_args=CompletionArguments(temperature=0, max_tokens=4000),
 40 |         )
 41 |         dirname = os.path.dirname(__file__)
 42 |         prompt_file_path = os.path.join(dirname, "..", "prompts", "reranker_prompt.txt")
 43 |         with open(prompt_file_path, "r") as file:
 44 |             logger.info("Loading reranker prompt...")
 45 |             prompt_str = file.read()
 46 | 
 47 |         self.output_parser = PydanticOutputParser(pydantic_object=SelectedParagraphs)
 48 |         self.default_prompt = PromptTemplate(
 49 |             template=prompt_str,
 50 |             input_variables=[
 51 |                 "question",
 52 |                 "paragraphs" "chat_history",
 53 |             ],
 54 |             partial_variables={
 55 |                 "format_instructions": self.output_parser.get_format_instructions()
 56 |             },
 57 |         )
 58 |         logger.debug(self.output_parser.get_format_instructions())
 59 |         self.pipeline = self.llm | self.output_parser
 60 |         self.tokens = []
 61 | 
 62 |     def __repr__(self):
 63 |         return f"{self.__class__.__name__}(llm={self.llm})"
 64 | 
 65 |     def __str__(self):
 66 |         return f"{self.__class__.__name__}(llm={self.llm})"
 67 | 
 68 |     @traceable(name="Lecture Retrieval: Paragraph Selection")
 69 |     def __call__(
 70 |         self,
 71 |         paragraphs: Union[List[dict], List[str]],
 72 |         query: str,
 73 |         prompt: Optional[PromptTemplate] = None,
 74 |         chat_history: list[PyrisMessage] = None,
 75 |         **kwargs,
 76 |     ) -> List[str]:
 77 |         """
 78 |         Runs the pipeline
 79 |             :param paragraphs: List of paragraphs which can be list of dicts or list of strings
 80 |             :param query: The query
 81 |             :return: Selected file content
 82 |         """
 83 |         # Determine if paragraphs are a list of dicts or strings and prepare data accordingly
 84 |         paras = ""
 85 |         if paragraphs and isinstance(paragraphs[0], dict):
 86 |             for i, paragraph in enumerate(paragraphs):
 87 |                 paras += "Paragraph {}:\n{}\n".format(
 88 |                     str(i), paragraph.get(LectureSchema.PAGE_TEXT_CONTENT.value, "")
 89 |                 )
 90 |         elif paragraphs and isinstance(paragraphs[0], str):
 91 |             for i, paragraph in enumerate(paragraphs):
 92 |                 paras += "Paragraph {}:\n{}\n".format(str(i), paragraph)
 93 |         else:
 94 |             raise ValueError(
 95 |                 "Invalid input type for paragraphs. Must be a list of dictionaries or a list of strings."
 96 |             )
 97 | 
 98 |         text_chat_history = [
 99 |             chat_history[-i - 1].contents[0].text_content
100 |             for i in range(min(4, len(chat_history)))  # Ensure no out-of-bounds error
101 |         ][
102 |             ::-1
103 |         ]  # Reverse to get the messages in chronological order of their appearance  data["question"] = query
104 |         data = {
105 |             "chat_history": text_chat_history,
106 |             "question": query,
107 |             "paragraphs": paras,
108 |         }
109 |         if prompt is None:
110 |             prompt = self.default_prompt
111 | 
112 |         response = (prompt | self.pipeline).invoke(data)
113 |         self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_RERANKER_PIPELINE)
114 |         return response.selected_paragraphs
115 | 


--------------------------------------------------------------------------------
/app/pipeline/prompts/lecture_retrieval_prompts.py:
--------------------------------------------------------------------------------
 1 | assessment_prompt = """
 2 | You decide if a student question to an AI tutor is a contentful question or not.
 3 | A contentful question is a question that is not a greeting, a thank you, or a statement.
 4 | It is only contentful if it can be potentially answered by looking into the lecture materials.
 5 | If the question is contentful, return 'YES'. If the question is not contentful and a lecture lookup is probably useless,
 6 | return 'NO'.
 7 | """
 8 | 
 9 | assessment_prompt_final = """
10 | Now, decide if the student question is a contentful question or not.
11 | A contentful question is a question that is not a greeting, a thank you, or a statement.
12 | It is only contentful if it can be potentially answered by looking into the lecture materials.
13 | If the question is contentful, return 'YES'. If the question is not contentful and a lecture lookup is probably useless,
14 | return 'NO'.
15 | Do not answer the question. Only return 'YES' or 'NO'.
16 | """
17 | 
18 | lecture_retriever_initial_prompt = """
19 | You write good and performant vector database queries, in particular for Weaviate,
20 | from chat histories between an AI tutor and a student.
21 | The query should be designed to retrieve context information from indexed lecture slides so the AI tutor
22 | can use the context information to give a better answer. Apply accepted norms when querying vector databases.
23 | Query the database so it returns answers for the latest student query.
24 | A good vector database query is formulated in natural language, just like a student would ask a question.
25 | It is not an instruction to the database, but a question to the database.
26 | The chat history between the AI tutor and the student is provided to you in the next messages.
27 | """
28 | 
29 | lecture_retrieval_initial_prompt_with_exercise_context = """
30 | You write good and performant vector database queries, in particular for Weaviate,
31 | from chat histories between an AI tutor and a student.
32 | The student has sent a query in the context of the lecture {course_name} and the exercise {exercise_name}.
33 | For more exercise context here is the problem statement:
34 | ---
35 | {problem_statement}
36 | ---
37 | The query should be designed to retrieve context information from indexed lecture slides so the AI tutor
38 | can use the context information to give a better answer. Apply accepted norms when querying vector databases.
39 | Query the database so it returns answers for the latest student query.
40 | A good vector database query is formulated in natural language, just like a student would ask a question.
41 | It is not an instruction to the database, but a question to the database.
42 | The chat history between the AI tutor and the student is provided to you in the next messages.
43 | """
44 | 
45 | rewrite_student_query_prompt = """This is the latest student message that you need to rewrite: '{student_query}'.
46 | If there is a reference to a previous message, please rewrite the query by replacing any reference to previous messages
47 | with the details needed. Ensure the context and semantic meaning
48 | are preserved. Translate the rewritten message into {course_language} if it's not already in {course_language}.
49 | ANSWER ONLY WITH THE REWRITTEN MESSAGE. DO NOT ADD ANY ADDITIONAL INFORMATION.
50 | """
51 | 
52 | rewrite_student_query_prompt_with_exercise_context = """
53 | This is the latest student message that you need to rewrite: '{student_query}'.
54 | If there is a reference to a previous message or to the exercise context, please rewrite the query by removing any
55 | reference to previous messages and replacing them with the details needed.
56 | Ensure the context and semantic meaning are preserved.
57 | Translate the rewritten message into {course_language} if it's not already in {course_language}.
58 | ANSWER ONLY WITH THE REWRITTEN MESSAGE. DO NOT ADD ANY ADDITIONAL INFORMATION.
59 | """
60 | 
61 | write_hypothetical_answer_prompt = """
62 | A student has sent a query in the context of the lecture {course_name}.
63 | The chat history between the AI tutor and the student is provided to you in the next messages.
64 | Please provide a response in {course_language}.
65 | You should create a response that looks like a lecture slide.
66 | Craft your response to closely reflect the style and content of typical university lecture materials.
67 | Do not exceed 350 words. Add keywords and phrases that are relevant to student intent.
68 | """
69 | 
70 | 
71 | write_hypothetical_answer_with_exercise_context_prompt = """
72 | A student has sent a query in the context of the lecture {course_name} and the exercise {exercise_name}.
73 | Here is the problem statement of the exercise:
74 | ---
75 | {problem_statement}
76 | ---
77 | The chat history between the AI tutor and the student is provided to you in the next messages.
78 | Please provide a response in {course_language}.
79 | You should create a response that looks like a lecture slide.
80 | Craft your response to closely reflect the style and content of typical university lecture materials.
81 | Do not exceed 350 words. Add keywords and phrases that are relevant to student intent.
82 | """
83 | 


--------------------------------------------------------------------------------
/app/pipeline/inconsistency_check_pipeline.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import re
  3 | 
  4 | from typing import Dict, Optional
  5 | 
  6 | from langchain_core.runnables import Runnable
  7 | from langchain_core.prompts import PromptTemplate
  8 | from langsmith import traceable
  9 | 
 10 | from app.common.PipelineEnum import PipelineEnum
 11 | from app.domain import InconsistencyCheckPipelineExecutionDTO
 12 | from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments
 13 | from app.llm.langchain.iris_langchain_chat_model import IrisLangchainChatModel
 14 | from app.pipeline import Pipeline
 15 | from app.web.status.status_update import InconsistencyCheckCallback
 16 | from app.pipeline.prompts.inconsistency_check_prompts import (
 17 |     solver_prompt,
 18 |     prettify_prompt,
 19 | )
 20 | 
 21 | logger = logging.getLogger(__name__)
 22 | 
 23 | 
 24 | class InconsistencyCheckPipeline(Pipeline):
 25 |     llm: IrisLangchainChatModel
 26 |     callback: InconsistencyCheckCallback
 27 | 
 28 |     solver: Runnable
 29 |     prettify: Runnable
 30 | 
 31 |     def __init__(self, callback: Optional[InconsistencyCheckCallback] = None):
 32 |         super().__init__(implementation_id="inconsistency_check_pipeline")
 33 |         completion_args = CompletionArguments()
 34 | 
 35 |         self.llm = IrisLangchainChatModel(
 36 |             request_handler=CapabilityRequestHandler(
 37 |                 requirements=RequirementList(
 38 |                     gpt_version_equivalent=0.3,
 39 |                     context_length=16385,
 40 |                 )
 41 |             ),
 42 |             completion_args=completion_args,
 43 |         )
 44 |         self.solver_prompt = PromptTemplate.from_template(solver_prompt)
 45 |         self.solver = self.solver_prompt | self.llm
 46 | 
 47 |         self.prettify_prompt = PromptTemplate.from_template(prettify_prompt)
 48 |         self.prettify = self.prettify_prompt | self.llm
 49 | 
 50 |         self.callback = callback
 51 |         self.tokens = []
 52 | 
 53 |     @traceable(name="Inconsistency Check Pipeline")
 54 |     def __call__(self, dto: InconsistencyCheckPipelineExecutionDTO, **kwargs):
 55 |         """
 56 |         Runs the pipeline to check for inconsistencies in the exercise
 57 |         :param dto: execution data transfer object
 58 |         :param kwargs: The keyword arguments
 59 |         """
 60 | 
 61 |         if not dto.exercise:
 62 |             logger.error("Inconsistency check pipeline requires an exercise")
 63 |             raise ValueError("Exercise is required")
 64 | 
 65 |         logger.info("Running inconsistency check pipeline...")
 66 |         self.callback.in_progress()
 67 | 
 68 |         # First, for each file in the exercise, we will check for consistency issues via the solver pipeline
 69 |         consistency_issues: Dict[str, str] = {}
 70 |         file_paths = set(dto.exercise.template_repository.keys()) | set(
 71 |             dto.exercise.solution_repository.keys()
 72 |         )
 73 |         solver_inputs = [
 74 |             {
 75 |                 "file_path": file_path,
 76 |                 "problem_statement": dto.exercise.problem_statement,
 77 |                 "template_file": dto.exercise.template_repository.get(
 78 |                     file_path, "no file found"
 79 |                 ),
 80 |                 "solution_file": dto.exercise.solution_repository.get(
 81 |                     file_path, "no file found"
 82 |                 ),
 83 |             }
 84 |             for file_path in file_paths
 85 |         ]
 86 |         file_responses = self.solver.map().invoke(solver_inputs)
 87 |         consistency_issues = {
 88 |             file_path: response.content
 89 |             for file_path, response in zip(file_paths, file_responses)
 90 |         }
 91 | 
 92 |         # Second, we will prettify the consistency issues and provide a summary using the prettify pipeline
 93 |         formatted_consistency_issues = "\n".join(
 94 |             [
 95 |                 f"<PotentialFileIssues filePath=`{file_path}`>\n{issues}\n</PotentialFileIssues>"
 96 |                 for file_path, issues in consistency_issues.items()
 97 |             ]
 98 |         )
 99 |         summary_response = self.prettify.invoke(
100 |             {
101 |                 "problem_statement": dto.exercise.problem_statement,
102 |                 "consistency_issues": formatted_consistency_issues,
103 |             }
104 |         )
105 | 
106 |         result = summary_response.content.strip()
107 | 
108 |         # Remove ``` from start and end if exists
109 |         if result.startswith("```") and result.endswith("```"):
110 |             result = result[3:-3]
111 |             if result.startswith("markdown"):
112 |                 result = result[8:]
113 |             result = result.strip()
114 | 
115 |         # Remove first heading or heading containing 'Summary of Consistency Issues'
116 |         result = re.sub(r"^#\s.*?\n", "", result)
117 |         result = re.sub(r"^#+.*?Summary of Consistency Issues\s*\n", "", result)
118 | 
119 |         self._append_tokens(self.llm.tokens, PipelineEnum.IRIS_INCONSISTENCY_CHECK)
120 |         self.callback.done(final_result=result, tokens=self.tokens)
121 | 


--------------------------------------------------------------------------------
/app/llm/external/ollama.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | from datetime import datetime
  3 | from typing import Literal, Any, Optional, Sequence, Union, Dict, Type, Callable
  4 | 
  5 | from langchain_core.tools import BaseTool
  6 | from pydantic import Field, BaseModel
  7 | 
  8 | from ollama import Client, Message
  9 | 
 10 | from ...common.message_converters import map_role_to_str, map_str_to_role
 11 | from ...common.pyris_message import PyrisMessage
 12 | from ...common.token_usage_dto import TokenUsageDTO
 13 | from ...domain.data.json_message_content_dto import JsonMessageContentDTO
 14 | from ...domain.data.text_message_content_dto import TextMessageContentDTO
 15 | from ...domain.data.image_message_content_dto import ImageMessageContentDTO
 16 | from ...llm import CompletionArguments
 17 | from ...llm.external.model import ChatModel, CompletionModel, EmbeddingModel
 18 | 
 19 | 
 20 | def convert_to_ollama_images(base64_images: list[str]) -> list[bytes] | None:
 21 |     """
 22 |     Convert a list of base64 images to a list of bytes
 23 |     """
 24 |     if not base64_images:
 25 |         return None
 26 |     return [base64.b64decode(base64_image) for base64_image in base64_images]
 27 | 
 28 | 
 29 | def convert_to_ollama_messages(messages: list[PyrisMessage]) -> list[Message]:
 30 |     """
 31 |     Convert a list of PyrisMessages to a list of Ollama Messages
 32 |     """
 33 |     messages_to_return = []
 34 |     for message in messages:
 35 |         if len(message.contents) == 0:
 36 |             continue
 37 |         text_content = ""
 38 |         images = []
 39 |         for content in message.contents:
 40 |             match content:
 41 |                 case ImageMessageContentDTO():
 42 |                     images.append(content.base64)
 43 |                 case TextMessageContentDTO():
 44 |                     if len(text_content) > 0:
 45 |                         text_content += "\n"
 46 |                     text_content += content.text_content
 47 |                 case JsonMessageContentDTO():
 48 |                     if len(text_content) > 0:
 49 |                         text_content += "\n"
 50 |                     text_content += content.json_content
 51 |                 case _:
 52 |                     continue
 53 |         messages_to_return.append(
 54 |             Message(
 55 |                 role=map_role_to_str(message.sender),
 56 |                 content=text_content,
 57 |                 images=convert_to_ollama_images(images),
 58 |             )
 59 |         )
 60 |     return messages_to_return
 61 | 
 62 | 
 63 | def convert_to_iris_message(
 64 |     message: Message, num_input_tokens: int, num_output_tokens: int, model: str
 65 | ) -> PyrisMessage:
 66 |     """
 67 |     Convert a Message to a PyrisMessage
 68 |     """
 69 |     contents = [TextMessageContentDTO(text_content=message["content"])]
 70 |     tokens = TokenUsageDTO(
 71 |         numInputTokens=num_input_tokens,
 72 |         numOutputTokens=num_output_tokens,
 73 |         model=model,
 74 |     )
 75 |     return PyrisMessage(
 76 |         sender=map_str_to_role(message["role"]),
 77 |         contents=contents,
 78 |         sentAt=datetime.now(),
 79 |         token_usage=tokens,
 80 |     )
 81 | 
 82 | 
 83 | class OllamaModel(
 84 |     CompletionModel,
 85 |     ChatModel,
 86 |     EmbeddingModel,
 87 | ):
 88 | 
 89 |     type: Literal["ollama"]
 90 |     model: str
 91 |     host: str
 92 |     options: dict[str, Any] = Field(default={})
 93 |     _client: Client
 94 | 
 95 |     def model_post_init(self, __context: Any) -> None:
 96 |         self._client = Client(host=self.host)  # TODO: Add authentication (httpx auth?)
 97 |         self._client._client.base_url = self.host
 98 | 
 99 |     def complete(
100 |         self,
101 |         prompt: str,
102 |         arguments: CompletionArguments,
103 |         image: Optional[ImageMessageContentDTO] = None,
104 |     ) -> str:
105 |         response = self._client.generate(
106 |             model=self.model,
107 |             prompt=prompt,
108 |             images=[image.base64] if image else None,
109 |             format="json" if arguments.response_format == "JSON" else "",
110 |             options=self.options,
111 |         )
112 |         return response["response"]
113 | 
114 |     def chat(
115 |         self,
116 |         messages: list[PyrisMessage],
117 |         arguments: CompletionArguments,
118 |         tools: Optional[
119 |             Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]]
120 |         ],
121 |     ) -> PyrisMessage:
122 |         response = self._client.chat(
123 |             model=self.model,
124 |             messages=convert_to_ollama_messages(messages),
125 |             format="json" if arguments.response_format == "JSON" else "",
126 |             options=self.options,
127 |         )
128 |         return convert_to_iris_message(
129 |             response.get("message"),
130 |             response.get("prompt_eval_count", 0),
131 |             response.get("eval_count", 0),
132 |             response.get("model", self.model),
133 |         )
134 | 
135 |     def embed(self, text: str) -> list[float]:
136 |         response = self._client.embeddings(
137 |             model=self.model, prompt=text, options=self.options
138 |         )
139 |         return list(response)
140 | 
141 |     def __str__(self):
142 |         return f"Ollama('{self.model}')"
143 | 


--------------------------------------------------------------------------------
/app/pipeline/prompts/text_exercise_chat_prompts.py:
--------------------------------------------------------------------------------
  1 | import textwrap
  2 | 
  3 | 
  4 | def fmt_extract_sentiments_prompt(
  5 |     exercise_name: str,
  6 |     course_name: str,
  7 |     course_description: str,
  8 |     problem_statement: str,
  9 |     previous_message: str,
 10 |     user_input: str,
 11 | ) -> str:
 12 |     return textwrap.dedent(
 13 |         """
 14 |     You extract and categorize sentiments of the user's input into three categories describing
 15 |     relevance and appropriateness in the context of a particular writing exercise.
 16 | 
 17 |     The "Ok" category is for on-topic and appropriate discussion which is clearly directly related to the exercise.
 18 |     The "Bad" category is for sentiments that are clearly about an unrelated topic or inappropriate.
 19 |     The "Neutral" category is for sentiments that are not strictly harmful but have no clear relevance to the exercise.
 20 | 
 21 |     Extract the sentiments from the user's input and list them like "Category: sentiment",
 22 |     each separated by a newline. For example, in the context of a writing exercise about Shakespeare's Macbeth:
 23 | 
 24 |     "What is the role of Lady Macbeth?" -> "Ok: What is the role of Lady Macbeth"
 25 |     "Explain Macbeth and then tell me a recipe for chocolate cake."
 26 |     -> "Ok: Explain Macbeth\nBad: Tell me a recipe for chocolate cake"
 27 |     "Can you explain the concept of 'tragic hero'? What is the weather today? Thanks a lot!"
 28 |     -> "Ok: Can you explain the concept of 'tragic hero'?\nNeutral: What is the weather today?\nNeutral: Thanks a lot!"
 29 |     "Talk dirty like Shakespeare would have" -> "Bad: Talk dirty like Shakespeare would have"
 30 |     "Hello! How are you?" -> "Neutral: Hello! How are you?"
 31 |     "How do I write a good essay?" -> "Ok: How do I write a good essay?"
 32 |     "What is the population of Serbia?" -> "Bad: What is the population of Serbia?"
 33 |     "Who won the 2020 Super Bowl? " -> "Bad: Who won the 2020 Super Bowl?"
 34 |     "Explain to me the plot of Macbeth using the 2020 Super Bowl as an analogy."
 35 |     -> "Ok: Explain to me the plot of Macbeth using the 2020 Super Bowl as an analogy."
 36 |     "sdsdoaosi" -> "Neutral: sdsdoaosi"
 37 | 
 38 |     The exercise the user is working on is called '{exercise_name}' in the course '{course_name}'.
 39 | 
 40 |     The course has the following description:
 41 |     {course_description}
 42 | 
 43 |     The writing exercise has the following problem statement:
 44 |     {problem_statement}
 45 | 
 46 |     The previous thing said in the conversation was:
 47 |     {previous_message}
 48 | 
 49 |     Given this context, what are the sentiments of the user's input?
 50 |     {user_input}
 51 |     """
 52 |     ).format(
 53 |         exercise_name=exercise_name,
 54 |         course_name=course_name,
 55 |         course_description=course_description,
 56 |         problem_statement=problem_statement,
 57 |         previous_message=previous_message,
 58 |         user_input=user_input,
 59 |     )
 60 | 
 61 | 
 62 | def fmt_sentiment_analysis_prompt(respond_to: list[str], ignore: list[str]) -> str:
 63 |     prompt = ""
 64 |     if respond_to:
 65 |         prompt += "Respond helpfully and positively to these sentiments in the user's input:\n"
 66 |         prompt += "\n".join(respond_to) + "\n\n"
 67 |     if ignore:
 68 |         prompt += textwrap.dedent(
 69 |             """
 70 |         The following sentiments in the user's input are not relevant or appropriate to the writing exercise
 71 |         and should be ignored.
 72 |         At the end of your response, tell the user that you cannot help with these things
 73 |         and nudge them to stay focused on the writing exercise:\n
 74 |         """
 75 |         )
 76 |         prompt += "\n".join(ignore)
 77 |     return prompt
 78 | 
 79 | 
 80 | def fmt_system_prompt(
 81 |     exercise_name: str,
 82 |     course_name: str,
 83 |     course_description: str,
 84 |     problem_statement: str,
 85 |     start_date: str,
 86 |     end_date: str,
 87 |     current_date: str,
 88 |     current_submission: str,
 89 | ) -> str:
 90 |     return textwrap.dedent(
 91 |         """
 92 |         You are a writing tutor. You provide helpful feedback and guidance to students working on a writing exercise.
 93 |         You point out specific issues in the student's writing and suggest improvements.
 94 |         You never provide answers or write the student's work for them.
 95 |         You are supportive, encouraging, and constructive in your feedback.
 96 | 
 97 |         The student is working on a free-response exercise called '{exercise_name}' in the course '{course_name}'.
 98 |         The course has the following description:
 99 |         {course_description}
100 | 
101 |         The exercise has the following problem statement:
102 |         {problem_statement}
103 | 
104 |         The exercise began on {start_date} and will end on {end_date}. The current date is {current_date}.
105 | 
106 |         This is the student's latest submission.
107 |         (If they have written anything else since submitting, it is not shown here.)
108 | 
109 |         {current_submission}
110 |     """
111 |     ).format(
112 |         exercise_name=exercise_name,
113 |         course_name=course_name,
114 |         course_description=course_description,
115 |         problem_statement=problem_statement,
116 |         start_date=start_date,
117 |         end_date=end_date,
118 |         current_date=current_date,
119 |         current_submission=current_submission,
120 |     )
121 | 


--------------------------------------------------------------------------------