├── REST-Service ├── app │ ├── __init__.py │ ├── src │ │ ├── __init__.py │ │ ├── models │ │ │ ├── Experiment.py │ │ │ ├── RequestHistory.py │ │ │ ├── LLMInput.py │ │ │ ├── MultiTurnInput.py │ │ │ └── SingleTurnInput.py │ │ ├── config │ │ │ └── TimeoutMiddleware.py │ │ ├── services │ │ │ ├── MongoService.py │ │ │ ├── WatsonXService.py │ │ │ ├── answer_similarity.py │ │ │ ├── LLMJudgeService.py │ │ │ ├── answer_rating.py │ │ │ └── ManagementService.py │ │ └── utils │ │ │ └── Helper.py │ └── route │ │ └── root │ │ └── routes.py ├── deployment │ ├── base │ │ ├── redis │ │ │ ├── kustomization.yaml │ │ │ ├── service.yaml │ │ │ └── deployment.yaml │ │ ├── celery-worker │ │ │ ├── kustomization.yaml │ │ │ ├── service.yaml │ │ │ └── deployment.yaml │ │ ├── flower │ │ │ ├── kustomization.yaml │ │ │ ├── route.yaml │ │ │ ├── service.yaml │ │ │ └── deployment.yaml │ │ ├── rest-app │ │ │ ├── kustomization.yaml │ │ │ ├── route.yaml │ │ │ ├── service.yaml │ │ │ ├── deployment.yaml │ │ │ └── secret.yaml │ │ └── kustomization.yaml │ └── readme.md ├── chart │ ├── Chart.yaml │ ├── values.yaml │ └── templates │ │ ├── service.yaml │ │ └── deployment.yaml ├── Dockerfile ├── requirements.txt ├── cert │ └── mongo.crt ├── main.py └── docker-compose.yml ├── JudgeIt-App ├── .eslintrc.json ├── jsconfig.json ├── next.config.js ├── app │ ├── favicon.ico │ ├── (auth) │ │ └── signin │ │ │ └── page.js │ ├── api │ │ └── auth │ │ │ ├── [...nextauth] │ │ │ └── route.js │ │ │ └── logout │ │ │ └── route.js │ ├── layout.js │ ├── pages │ │ ├── help │ │ │ └── page.js │ │ └── single │ │ │ └── doc │ │ │ └── [doc_id] │ │ │ └── page.js │ ├── globals.css │ └── page.module.css ├── env ├── utils │ ├── setDynamicRoute.js │ ├── sessionProviderWrapper.js │ ├── encryption.js │ ├── sessionTokenAccessor.js │ └── Helper.js ├── components │ ├── globals │ │ ├── PageTitle.jsx │ │ ├── DataGridToolbar.jsx │ │ ├── LinearProgressWithLabel.jsx │ │ ├── Footer.jsx │ │ ├── icons │ │ │ ├── IBMIconTop.jsx │ │ │ └── IBMIcon.jsx │ │ ├── SignIn.jsx │ │ ├── DeleteConfirmationDialog.jsx │ │ ├── BarChart.jsx │ │ ├── DrawerMenu.jsx │ │ ├── LeftNavigation.jsx │ │ ├── SingleInstructions.jsx │ │ ├── BatchInstructions.jsx │ │ └── Topbar.jsx │ └── judge │ │ ├── EvaluationTypeLabel.jsx │ │ ├── MultiTurnWithConversationForm.jsx │ │ ├── SoloResult.jsx │ │ ├── RatingSimilarityDataGrid.jsx │ │ ├── DataGridMultiTurnConversation.jsx │ │ ├── DataGridMultiTurnSummary.jsx │ │ ├── RatingSimilarityDataGridSummary.jsx │ │ ├── DataGridSingleTurn.jsx │ │ ├── DataGridSingleTurnSummary.jsx │ │ ├── DisplayRequestHistoryMultiTurn.jsx │ │ ├── RatingSimilarityForm.jsx │ │ ├── DisplayRequestHistorySingleTurn.jsx │ │ ├── SingleTurnForm.jsx │ │ ├── DisplayRequestHistoryRatingSimilarity.jsx │ │ ├── EvaluationTypeComponent.jsx │ │ └── ExperimentForm.jsx ├── deployment │ ├── readme.md │ └── deployment.yaml ├── .gitignore ├── Dockerfile ├── package.json ├── styles │ └── globals.css ├── public │ ├── next.svg │ └── vercel.svg ├── README.md └── services │ ├── ManagemenBackendAPI.js │ ├── Config.js │ └── JudgeBackendAPISolo.js ├── .gitignore ├── images ├── features.png ├── swagger-ui.png ├── LLM-Judge-App.png ├── flow-diagram.png ├── llm-judge-app-saas.png ├── LLM-Judge_framework.png ├── multiturn-app-batch.gif ├── multiturn-framework.gif ├── LLM-Judge-app-onpremise.png ├── RAG-reliability-testing.png ├── multi-turn-evaluation.png ├── LLM-judge-framework-saas.png ├── rest-service-architecture.png ├── rag-evaluation-reliability.png ├── LLM-Judge-Architecture-Backend.png ├── llm-judge-framework-onpremise.png └── multi-turn-evaluation-reliability.png ├── Framework ├── data │ ├── input │ │ ├── sample_multi_turn_input.xlsx │ │ ├── sample_rag_answer_rating_input.xlsx │ │ └── sample_rag_answer_similarity_input.xlsx │ └── output │ │ ├── sample_multi_turn_output.xlsx │ │ ├── sample_rag_answer_rating_output.xlsx │ │ └── sample_rag_answer_similarity_output.xlsx ├── config.ini ├── sample_config.ini ├── wml_setup.py ├── main.py └── answer_similarity.py ├── requirements.txt └── evaluationapp-readme.md /REST-Service/app/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /REST-Service/app/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /JudgeIt-App/.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "next/core-web-vitals" 3 | } 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | .DS_Store 3 | __pycache__ 4 | config.ini 5 | virtual-env 6 | -------------------------------------------------------------------------------- /images/features.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/images/features.png -------------------------------------------------------------------------------- /images/swagger-ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/images/swagger-ui.png -------------------------------------------------------------------------------- /JudgeIt-App/jsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "paths": { 4 | "@/*": ["./*"] 5 | } 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /images/LLM-Judge-App.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/images/LLM-Judge-App.png -------------------------------------------------------------------------------- /images/flow-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/images/flow-diagram.png -------------------------------------------------------------------------------- /JudgeIt-App/next.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('next').NextConfig} */ 2 | const nextConfig = {} 3 | 4 | module.exports = nextConfig -------------------------------------------------------------------------------- /JudgeIt-App/app/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/JudgeIt-App/app/favicon.ico -------------------------------------------------------------------------------- /REST-Service/deployment/base/redis/kustomization.yaml: -------------------------------------------------------------------------------- 1 | kind: Kustomization 2 | resources: 3 | - deployment.yaml 4 | - service.yaml 5 | -------------------------------------------------------------------------------- /images/llm-judge-app-saas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/images/llm-judge-app-saas.png -------------------------------------------------------------------------------- /images/LLM-Judge_framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/images/LLM-Judge_framework.png -------------------------------------------------------------------------------- /images/multiturn-app-batch.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/images/multiturn-app-batch.gif -------------------------------------------------------------------------------- /images/multiturn-framework.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/images/multiturn-framework.gif -------------------------------------------------------------------------------- /REST-Service/deployment/base/celery-worker/kustomization.yaml: -------------------------------------------------------------------------------- 1 | kind: Kustomization 2 | resources: 3 | - deployment.yaml 4 | - service.yaml 5 | -------------------------------------------------------------------------------- /images/LLM-Judge-app-onpremise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/images/LLM-Judge-app-onpremise.png -------------------------------------------------------------------------------- /images/RAG-reliability-testing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/images/RAG-reliability-testing.png -------------------------------------------------------------------------------- /images/multi-turn-evaluation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/images/multi-turn-evaluation.png -------------------------------------------------------------------------------- /REST-Service/chart/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: mychart 3 | description: A Helm chart for Kubernetes 4 | version: 0.1.0 5 | appVersion: "1.0" 6 | -------------------------------------------------------------------------------- /images/LLM-judge-framework-saas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/images/LLM-judge-framework-saas.png -------------------------------------------------------------------------------- /images/rest-service-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/images/rest-service-architecture.png -------------------------------------------------------------------------------- /REST-Service/deployment/base/flower/kustomization.yaml: -------------------------------------------------------------------------------- 1 | kind: Kustomization 2 | resources: 3 | - deployment.yaml 4 | - service.yaml 5 | - route.yaml 6 | -------------------------------------------------------------------------------- /images/rag-evaluation-reliability.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/images/rag-evaluation-reliability.png -------------------------------------------------------------------------------- /REST-Service/deployment/base/rest-app/kustomization.yaml: -------------------------------------------------------------------------------- 1 | kind: Kustomization 2 | resources: 3 | - deployment.yaml 4 | - service.yaml 5 | - route.yaml 6 | -------------------------------------------------------------------------------- /images/LLM-Judge-Architecture-Backend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/images/LLM-Judge-Architecture-Backend.png -------------------------------------------------------------------------------- /images/llm-judge-framework-onpremise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/images/llm-judge-framework-onpremise.png -------------------------------------------------------------------------------- /JudgeIt-App/app/(auth)/signin/page.js: -------------------------------------------------------------------------------- 1 | import SignIn from '@/components/globals/SignIn' 2 | 3 | export default function SignInPage() { 4 | return ; 5 | } -------------------------------------------------------------------------------- /images/multi-turn-evaluation-reliability.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/images/multi-turn-evaluation-reliability.png -------------------------------------------------------------------------------- /Framework/data/input/sample_multi_turn_input.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/Framework/data/input/sample_multi_turn_input.xlsx -------------------------------------------------------------------------------- /Framework/data/output/sample_multi_turn_output.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/Framework/data/output/sample_multi_turn_output.xlsx -------------------------------------------------------------------------------- /Framework/data/input/sample_rag_answer_rating_input.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/Framework/data/input/sample_rag_answer_rating_input.xlsx -------------------------------------------------------------------------------- /Framework/data/output/sample_rag_answer_rating_output.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/Framework/data/output/sample_rag_answer_rating_output.xlsx -------------------------------------------------------------------------------- /Framework/data/input/sample_rag_answer_similarity_input.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/Framework/data/input/sample_rag_answer_similarity_input.xlsx -------------------------------------------------------------------------------- /REST-Service/app/src/models/Experiment.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | from pydantic import BaseModel, Field 3 | 4 | class Experiment(BaseModel): 5 | name: str 6 | user_id: str 7 | type: str -------------------------------------------------------------------------------- /Framework/data/output/sample_rag_answer_similarity_output.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ibm-self-serve-assets/JudgeIt-LLM-as-a-Judge/HEAD/Framework/data/output/sample_rag_answer_similarity_output.xlsx -------------------------------------------------------------------------------- /JudgeIt-App/env: -------------------------------------------------------------------------------- 1 | NEXT_PUBLIC_JUDGE_BACKEND_URL= 2 | NEXT_PUBLIC_LLM_JUDGE_API_KEY= 3 | OAUTH_ISSUER_URL= 4 | OAUTH_CLIENT_ID= 5 | OAUTH_CLIENT_SECRET= 6 | NEXTAUTH_URL= 7 | NEXTAUTH_SECRET= 8 | -------------------------------------------------------------------------------- /REST-Service/app/src/models/RequestHistory.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | from pydantic import BaseModel, Field 3 | 4 | class RequestHistory(BaseModel): 5 | name: str 6 | user_id: str 7 | experiment_name: str 8 | content: Any 9 | type: str 10 | eval_type: str -------------------------------------------------------------------------------- /REST-Service/app/src/models/LLMInput.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | from pydantic import BaseModel, Field 3 | 4 | class LLMInput(BaseModel): 5 | question: str 6 | golden_text: str 7 | generated_text: str 8 | model: str = "meta-llama/llama-3-70b-instruct" 9 | 10 | 11 | -------------------------------------------------------------------------------- /REST-Service/app/src/models/MultiTurnInput.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | from pydantic import BaseModel 3 | 4 | class MultiTurnInput(BaseModel): 5 | conversation_history: str 6 | follow_up_query: str 7 | golden_query: str 8 | rewritten_query: str 9 | model: str = "meta-llama/llama-3-70b-instruct" -------------------------------------------------------------------------------- /JudgeIt-App/utils/setDynamicRoute.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { useEffect } from 'react'; 4 | import { useRouter } from 'next/navigation'; 5 | 6 | export function SetDynamicRoute() { 7 | const router = useRouter(); 8 | 9 | useEffect(() => { 10 | router.refresh(); 11 | }, [router]); 12 | 13 | return <>; 14 | } -------------------------------------------------------------------------------- /JudgeIt-App/utils/sessionProviderWrapper.js: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | import React from 'react' 3 | 4 | import { SessionProvider } from 'next-auth/react'; 5 | 6 | const SessionProviderWrapper = ({children}) => { 7 | return ( 8 | {children} 9 | ) 10 | } 11 | 12 | export default SessionProviderWrapper -------------------------------------------------------------------------------- /JudgeIt-App/components/globals/PageTitle.jsx: -------------------------------------------------------------------------------- 1 | import {Box, Typography} from '@mui/material' 2 | 3 | const PageTitle = ({ title }) => { 4 | return ( 5 | 6 | {title} 7 | 8 | ); 9 | }; 10 | 11 | export default PageTitle; 12 | -------------------------------------------------------------------------------- /REST-Service/app/src/models/SingleTurnInput.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | from pydantic import BaseModel 3 | 4 | class SingleTurnInput(BaseModel): 5 | previous_question: str 6 | previous_answer: str 7 | current_question: str 8 | golden_rewritten_question: str 9 | rewritten_question: str 10 | model: str = "meta-llama/llama-3-70b-instruct" -------------------------------------------------------------------------------- /Framework/config.ini: -------------------------------------------------------------------------------- 1 | [Default] 2 | home_dir = path_to_JudgeIt_repository 3 | model_id = llm_model_id 4 | input_file_name = input_path_and_filename 5 | output_file_name = input_path_and_filename 6 | judge_type = judge_type 7 | 8 | [WML_CRED] 9 | wml_platform = saas 10 | wml_user = watsonx.user 11 | wml_url = watsonx.ai_url 12 | api_key = ibm_cloud_api_key 13 | project_id = watsonx.ai_project_id -------------------------------------------------------------------------------- /REST-Service/deployment/base/redis/service.yaml: -------------------------------------------------------------------------------- 1 | kind: Service 2 | apiVersion: v1 3 | metadata: 4 | name: redis 5 | labels: 6 | app: redis 7 | spec: 8 | ports: 9 | - name: 6379-tcp 10 | protocol: TCP 11 | port: 6379 12 | targetPort: 6379 13 | internalTrafficPolicy: Cluster 14 | type: ClusterIP 15 | selector: 16 | app: redis 17 | deployment: redis 18 | 19 | -------------------------------------------------------------------------------- /REST-Service/deployment/base/flower/route.yaml: -------------------------------------------------------------------------------- 1 | kind: Route 2 | apiVersion: route.openshift.io/v1 3 | metadata: 4 | name: flower-app 5 | labels: 6 | app: flower-app 7 | spec: 8 | to: 9 | kind: Service 10 | name: flower-app 11 | weight: 100 12 | port: 13 | targetPort: 5555-tcp 14 | tls: 15 | termination: edge 16 | insecureEdgeTerminationPolicy: Redirect 17 | wildcardPolicy: None 18 | -------------------------------------------------------------------------------- /REST-Service/deployment/base/rest-app/route.yaml: -------------------------------------------------------------------------------- 1 | kind: Route 2 | apiVersion: route.openshift.io/v1 3 | metadata: 4 | name: llm-judge-backend 5 | labels: 6 | app: llm-judge-backend 7 | spec: 8 | to: 9 | kind: Service 10 | name: llm-judge-backend 11 | weight: 100 12 | port: 13 | targetPort: 3001-tcp 14 | tls: 15 | termination: edge 16 | insecureEdgeTerminationPolicy: Redirect 17 | wildcardPolicy: None 18 | -------------------------------------------------------------------------------- /REST-Service/deployment/base/celery-worker/service.yaml: -------------------------------------------------------------------------------- 1 | kind: Service 2 | apiVersion: v1 3 | metadata: 4 | name: celery-worker 5 | labels: 6 | app: celery-worker 7 | spec: 8 | ports: 9 | - name: 3001-tcp 10 | protocol: TCP 11 | port: 3001 12 | targetPort: 3001 13 | - name: 8080-tcp 14 | protocol: TCP 15 | port: 8080 16 | targetPort: 8080 17 | selector: 18 | app: celery-worker 19 | deployment: celery-worker -------------------------------------------------------------------------------- /REST-Service/deployment/base/flower/service.yaml: -------------------------------------------------------------------------------- 1 | kind: Service 2 | apiVersion: v1 3 | metadata: 4 | name: flower-app 5 | labels: 6 | app: flower-app 7 | spec: 8 | ports: 9 | - name: 3001-tcp 10 | protocol: TCP 11 | port: 3001 12 | targetPort: 3001 13 | - name: 5555-tcp 14 | protocol: TCP 15 | port: 5555 16 | targetPort: 5555 17 | type: ClusterIP 18 | selector: 19 | app: flower-app 20 | deployment: flower-app 21 | -------------------------------------------------------------------------------- /JudgeIt-App/deployment/readme.md: -------------------------------------------------------------------------------- 1 | # LLM-Judge frontend 2 | 3 | Make sure you deploy the Backend REST service first. 4 | 5 | ```yaml 6 | oc create secret generic llmjudge-frontend-secret \ 7 | --from-literal=JUDGE_BACKEND_URL='' \ 8 | --from-literal=LLM_JUDGE_API_KEY='' \ 9 | --from-literal=NEXTAUTH_SECRET='' \ 10 | --from-literal=NEXTAUTH_URL='' \ 11 | --from-literal=OAUTH_CLIENT_ID='' \ 12 | --from-literal=OAUTH_ISSUER_URL='' 13 | ``` 14 | 15 | ```sh 16 | oc apply -f deployment.yaml 17 | ``` -------------------------------------------------------------------------------- /REST-Service/deployment/base/rest-app/service.yaml: -------------------------------------------------------------------------------- 1 | kind: Service 2 | apiVersion: v1 3 | metadata: 4 | name: llm-judge-backend 5 | labels: 6 | app: llm-judge-backend 7 | spec: 8 | ports: 9 | - name: 3001-tcp 10 | protocol: TCP 11 | port: 3001 12 | targetPort: 3001 13 | - name: 8080-tcp 14 | protocol: TCP 15 | port: 8080 16 | targetPort: 8080 17 | type: ClusterIP 18 | selector: 19 | app: llm-judge-backend 20 | deployment: llm-judge-backend 21 | -------------------------------------------------------------------------------- /Framework/sample_config.ini: -------------------------------------------------------------------------------- 1 | [Default] 2 | home_dir = //JudgeIt-LLM-as-a-Judge/ 3 | model_id = meta-llama/llama-3-1-70b-instruct 4 | input_file_name = Framework/data/input/sample_rag_answer_similarity_input.xlsx 5 | output_file_name = Framework/data/output/sample_rag_answer_similarity_output.xlsx 6 | judge_type = rag_eval_answer_similarity 7 | 8 | [WML_CRED] 9 | wml_platform = saas 10 | wml_user = '' 11 | wml_url = https://us-south.ml.cloud.ibm.com 12 | api_key = ibm_cloud_api_key 13 | project_id = watsonx.ai_project_id -------------------------------------------------------------------------------- /JudgeIt-App/utils/encryption.js: -------------------------------------------------------------------------------- 1 | import Cryptr from "cryptr"; 2 | 3 | export function encrypt(text) { 4 | const secretKey = process.env.NEXTAUTH_SECRET; 5 | const cryptr = new Cryptr(secretKey); 6 | 7 | const encryptedString = cryptr.encrypt(text); 8 | return encryptedString; 9 | } 10 | 11 | export function decrypt(encryptedString) { 12 | const secretKey = process.env.NEXTAUTH_SECRET; 13 | const cryptr = new Cryptr(secretKey); 14 | 15 | const text = cryptr.decrypt(encryptedString); 16 | return text; 17 | } -------------------------------------------------------------------------------- /JudgeIt-App/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | .yarn/install-state.gz 8 | 9 | # testing 10 | /coverage 11 | 12 | # next.js 13 | /.next/ 14 | /out/ 15 | 16 | # production 17 | /build 18 | 19 | # misc 20 | .DS_Store 21 | *.pem 22 | 23 | # debug 24 | npm-debug.log* 25 | yarn-debug.log* 26 | yarn-error.log* 27 | 28 | # local env files 29 | .env*.local 30 | 31 | # vercel 32 | .vercel 33 | 34 | # typescript 35 | *.tsbuildinfo 36 | next-env.d.ts 37 | -------------------------------------------------------------------------------- /JudgeIt-App/components/globals/DataGridToolbar.jsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import { 3 | GridToolbarContainer, 4 | GridToolbarColumnsButton, 5 | GridToolbarFilterButton, 6 | GridToolbarDensitySelector, 7 | GridToolbarExport 8 | } from "@mui/x-data-grid"; 9 | 10 | const DataGridToolbar = () => { 11 | 12 | return ( 13 | 14 | 15 | 16 | 17 | 18 | 19 | ); 20 | }; 21 | export default DataGridToolbar; -------------------------------------------------------------------------------- /REST-Service/chart/values.yaml: -------------------------------------------------------------------------------- 1 | replicaCount: 1 2 | 3 | image: 4 | repository: 5 | tag: "latest" 6 | pullPolicy: IfNotPresent 7 | 8 | service: 9 | fastapi: 10 | type: ClusterIP 11 | port: 3001 12 | redis: 13 | type: ClusterIP 14 | port: 6379 15 | flower: 16 | type: ClusterIP 17 | port: 5555 18 | 19 | env: 20 | WATSONX_URL: "https://us-south.ml.cloud.ibm.com" 21 | WX_PROJECT_ID: "" 22 | IBM_CLOUD_API_KEY: "" 23 | CELERY_BROKER_URL: "redis://redis:6379/0" 24 | CELERY_RESULT_BACKEND: "redis://redis:6379/0" 25 | 26 | resources: {} 27 | -------------------------------------------------------------------------------- /JudgeIt-App/app/api/auth/[...nextauth]/route.js: -------------------------------------------------------------------------------- 1 | import NextAuth from "next-auth"; 2 | import Auth0Provider from "next-auth/providers/auth0"; 3 | 4 | export const authOptions = { 5 | providers: [ 6 | Auth0Provider({ 7 | issuer: `${process.env.OAUTH_ISSUER_URL}`, 8 | clientId:`${process.env.OAUTH_CLIENT_ID}`, 9 | clientSecret: `${process.env.OAUTH_CLIENT_SECRET}`, 10 | id: 'IBMid', 11 | name: 'IBMid', 12 | }), 13 | ], 14 | pages: { 15 | signIn: "/signin" 16 | } 17 | } 18 | 19 | const handler = NextAuth(authOptions); 20 | 21 | export { handler as GET, handler as POST }; -------------------------------------------------------------------------------- /JudgeIt-App/components/judge/EvaluationTypeLabel.jsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import { Tooltip } from "@mui/material"; 3 | import InfoOutlinedIcon from "@mui/icons-material/InfoOutlined"; 4 | 5 | const EvaluationTypeLabel = ({ label, tooltip }) => { 6 | return ( 7 |
8 | {label} 9 | 16 | 17 | 18 |
19 | ); 20 | }; 21 | 22 | export default EvaluationTypeLabel; 23 | -------------------------------------------------------------------------------- /REST-Service/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official Selenium Standalone Chrome image as the base image 2 | FROM registry.access.redhat.com/ubi8/python-311:latest 3 | 4 | # Set the working directory inside the container 5 | WORKDIR /app/backend 6 | 7 | # Copy the requirements file to the container and install dependencies 8 | COPY requirements.txt requirements.txt 9 | RUN pip3 install -r requirements.txt 10 | RUN pip3 install bson 11 | RUN pip3 install pymongo 12 | 13 | # Copy your FastAPI Python script to the container 14 | COPY main.py main.py 15 | COPY app/ app/ 16 | COPY cert/ cert/ 17 | 18 | EXPOSE 3001 19 | 20 | # Set the command to run your Python script 21 | CMD ["python3", "main.py"] -------------------------------------------------------------------------------- /JudgeIt-App/components/globals/LinearProgressWithLabel.jsx: -------------------------------------------------------------------------------- 1 | import LinearProgress from '@mui/material/LinearProgress'; 2 | import Typography from '@mui/material/Typography'; 3 | import Box from '@mui/material/Box'; 4 | 5 | export default function LinearProgressWithLabel({ value, width }) { 6 | return ( 7 | 8 | 9 | 10 | 11 | 12 | {`${Math.round( 13 | value, 14 | )}%`} 15 | 16 | 17 | ); 18 | } -------------------------------------------------------------------------------- /JudgeIt-App/utils/sessionTokenAccessor.js: -------------------------------------------------------------------------------- 1 | import { getServerSession } from "next-auth"; 2 | import { authOptions } from "../app/api/auth/[...nextauth]/route"; 3 | import { decrypt } from "./encryption"; 4 | 5 | export async function getAccessToken() { 6 | 7 | const session = await getServerSession(authOptions); 8 | if(session){ 9 | const accessTokenDecrypted = decrypt(session.access_token) 10 | return accessTokenDecrypted; 11 | } 12 | return null; 13 | } 14 | 15 | export async function getIdToken() { 16 | 17 | const session = await getServerSession(authOptions); 18 | if(session){ 19 | const idTokenDecrypted = decrypt(session.id_token) 20 | return idTokenDecrypted; 21 | } 22 | return null; 23 | } -------------------------------------------------------------------------------- /REST-Service/app/route/root/routes.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, requests as request 2 | from fastapi.responses import HTMLResponse 3 | 4 | root_api_route = APIRouter() 5 | 6 | API_PREFIX = "/" 7 | ## This routes returns the text to SQL from a given context and a sql query 8 | @root_api_route.get(API_PREFIX) 9 | def root_api(): 10 | return HTMLResponse( 11 | """ 12 | 13 | 14 | LLM Judge service 15 | 16 | 17 |

LLM Judge service!

18 |

For complete API visit open API docs

19 | 20 | 21 | """ 22 | ) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2024.07.04 2 | chardet==5.2.0 3 | charset-normalizer==3.3.2 4 | click==8.1.7 5 | ibm-cos-sdk==2.13.5 6 | ibm-cos-sdk-core==2.13.5 7 | ibm-cos-sdk-s3transfer==2.13.5 8 | ibm_watsonx_ai==1.0.10 9 | idna==3.7 10 | importlib_metadata==8.0.0 11 | jmespath==1.0.1 12 | joblib==1.4.2 13 | langchain-ibm==0.1.12 14 | lomond==0.3.3 15 | nltk==3.8.1 16 | numpy==1.26.4 17 | openpyxl==3.1.5 18 | packaging==24.1 19 | pandas==2.1.4 20 | python-dateutil==2.9.0.post0 21 | pytz==2024.1 22 | regex==2024.5.15 23 | requests==2.32.3 24 | rouge==1.0.1 25 | scikit-learn==1.5.0 26 | scipy==1.14.0 27 | six==1.16.0 28 | tabulate==0.9.0 29 | threadpoolctl==3.5.0 30 | tqdm==4.66.4 31 | tzdata==2024.1 32 | urllib3==2.1.0 33 | XlsxWriter==3.2.0 34 | zipp==3.19.2 35 | -------------------------------------------------------------------------------- /REST-Service/deployment/base/kustomization.yaml: -------------------------------------------------------------------------------- 1 | kind: Kustomization 2 | images: 3 | - name: backend-image-name 4 | newName: image-registry.openshift-image-registry.svc:5000/llm-judge-dev/backend 5 | newTag: v1.0 6 | secretGenerator: 7 | - name: llm-judge-secret 8 | literals: 9 | - WATSONX_URL= 10 | - WX_PROJECT_ID= 11 | - IBM_CLOUD_API_KEY= 12 | - LLM_JUDGE_API_KEY=JudgeIt-Secret-Api-Key 13 | - WX_PLATFORM=saas 14 | - WX_USER= 15 | - CELERY_BROKER_URL=redis://redis:6379/0 16 | - CELERY_RESULT_BACKEND=redis://redis:6379/0 17 | - SERVER_URL= 18 | - MONGO_URL= 19 | - MONGO_USER= 20 | - MONGO_PASS= 21 | - MONGO_DB="judgeit_app" 22 | resources: 23 | - redis/ 24 | - celery-worker/ 25 | - flower/ 26 | - rest-app/ -------------------------------------------------------------------------------- /JudgeIt-App/app/api/auth/logout/route.js: -------------------------------------------------------------------------------- 1 | import { authOptions } from "../[...nextauth]/route"; 2 | import { getServerSession } from "next-auth" 3 | import { getIdToken } from "@/utils/sessionTokenAccessor"; 4 | 5 | export async function GET() { 6 | const session = await getServerSession(authOptions); 7 | 8 | if (session) { 9 | 10 | const idToken = await getIdToken(); 11 | 12 | // this will log out the user on Keycloak side 13 | var url = `${process.env.END_SESSION_URL}?id_token_hint=${idToken}&post_logout_redirect_uri=${encodeURIComponent(process.env.NEXTAUTH_URL)}`; 14 | 15 | try { 16 | const resp = await fetch(url, { method: "GET" }); 17 | } catch (err) { 18 | console.error(err); 19 | return new Response({ status: 500 }); 20 | } 21 | } 22 | return new Response({ status: 200 }); 23 | } -------------------------------------------------------------------------------- /JudgeIt-App/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:22.4.1-alpine AS deps 2 | #RUN apk add --no-cache libc6-compat=1.2.4-r2 3 | WORKDIR /app 4 | 5 | COPY package.json ./ 6 | COPY package-lock.json ./ 7 | RUN npm install 8 | 9 | FROM node:22.4.1-alpine AS builder 10 | WORKDIR /app 11 | COPY --from=deps /app/node_modules ./node_modules 12 | COPY . . 13 | 14 | RUN npm run build 15 | 16 | FROM node:22.4.1-alpine AS runner 17 | WORKDIR /app 18 | 19 | ENV NODE_ENV production 20 | ENV NEXT_TELEMETRY_DISABLED 1 21 | 22 | RUN addgroup --system --gid 1001 nodejs 23 | RUN adduser --system --uid 1001 nextjs 24 | 25 | COPY --from=builder --chown=nextjs:nodejs /app/.next ./.next 26 | COPY --from=builder /app/node_modules ./node_modules 27 | COPY --from=builder /app/package.json ./package.json 28 | 29 | USER nextjs 30 | 31 | EXPOSE 3000 32 | 33 | ENV PORT 3000 34 | 35 | CMD ["npm", "start"] -------------------------------------------------------------------------------- /JudgeIt-App/components/globals/Footer.jsx: -------------------------------------------------------------------------------- 1 | function Footer() { 2 | return ( 3 |
12 |
13 |

14 | Disclaimer - Please note that this content is made available to foster 15 | Embedded AI technology adoption. The content may include systems & 16 | methods pending patent with the USPTO and protected under US Patent 17 | Laws. Copyright - 2024 IBM Corporation. In case of any questions or 18 | support, please reach out to{" "} 19 | kunal@ibm.com 20 |

21 |
22 |
23 | ); 24 | } 25 | 26 | export default Footer; 27 | -------------------------------------------------------------------------------- /JudgeIt-App/deployment/deployment.yaml: -------------------------------------------------------------------------------- 1 | kind: Deployment 2 | apiVersion: apps/v1 3 | metadata: 4 | resourceVersion: '108957306' 5 | name: llm-judge-frontend 6 | labels: 7 | app: llm-judge-frontend 8 | spec: 9 | replicas: 1 10 | selector: 11 | matchLabels: 12 | app: llm-judge-frontend 13 | template: 14 | metadata: 15 | labels: 16 | app: llm-judge-frontend 17 | deployment: llm-judge-frontend 18 | annotations: 19 | openshift.io/generated-by: OpenShiftWebConsole 20 | spec: 21 | containers: 22 | - name: llm-judge-frontend 23 | image: 'image-registry.openshift-image-registry.svc:5000/llm-judge/llm-judge-frontend@sha256:5ac9b1aa09123b4d09a7e0f297e542c895350f7a700779b36df77b0897f45f46' 24 | ports: 25 | - containerPort: 3000 26 | protocol: TCP 27 | envFrom: 28 | - secretRef: 29 | name: llmjudge-frontend-secret 30 | resources: {} 31 | -------------------------------------------------------------------------------- /REST-Service/deployment/base/redis/deployment.yaml: -------------------------------------------------------------------------------- 1 | kind: Deployment 2 | apiVersion: apps/v1 3 | metadata: 4 | name: redis 5 | labels: 6 | app: redis 7 | spec: 8 | replicas: 1 9 | selector: 10 | matchLabels: 11 | app: redis 12 | template: 13 | metadata: 14 | labels: 15 | app: redis 16 | deployment: redis 17 | annotations: 18 | openshift.io/generated-by: OpenShiftWebConsole 19 | spec: 20 | volumes: 21 | - name: redis-1 22 | emptyDir: {} 23 | containers: 24 | - name: redis 25 | image: redis:7.2.5-alpine 26 | ports: 27 | - containerPort: 6379 28 | protocol: TCP 29 | resources: {} 30 | volumeMounts: 31 | - name: redis-1 32 | mountPath: /data 33 | terminationMessagePath: /dev/termination-log 34 | terminationMessagePolicy: File 35 | imagePullPolicy: IfNotPresent 36 | restartPolicy: Always 37 | -------------------------------------------------------------------------------- /JudgeIt-App/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "judge-app", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@emotion/react": "^11.11.4", 13 | "@emotion/styled": "^11.11.5", 14 | "@mui/icons-material": "^5.16.0", 15 | "@mui/material": "^5.16.0", 16 | "@mui/x-data-grid": "^7.16.0", 17 | "axios": "^1.7.2", 18 | "chart.js": "^4.4.4", 19 | "chartjs-plugin-datalabels": "^2.2.0", 20 | "cryptr": "^6.3.0", 21 | "formik": "^2.4.6", 22 | "next": "14.2.5", 23 | "next-auth": "^4.24.7", 24 | "react": "^18", 25 | "react-chartjs-2": "^5.2.0", 26 | "react-dom": "^18", 27 | "react-dropzone": "^14.2.3", 28 | "react-pro-sidebar": "^1.1.0", 29 | "uuid": "^10.0.0", 30 | "yup": "^1.4.0" 31 | }, 32 | "devDependencies": { 33 | "eslint": "^8", 34 | "eslint-config-next": "14.2.5" 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /REST-Service/app/src/config/TimeoutMiddleware.py: -------------------------------------------------------------------------------- 1 | from fastapi.responses import JSONResponse 2 | from starlette.middleware.base import BaseHTTPMiddleware 3 | from fastapi import FastAPI, Request, HTTPException 4 | import time 5 | 6 | class TimeoutMiddleware(BaseHTTPMiddleware): 7 | def __init__(self, app, timeout: int): 8 | super().__init__(app) 9 | self.timeout = timeout 10 | 11 | async def dispatch(self, request: Request, call_next): 12 | start_time = time.time() 13 | try: 14 | response = await call_next(request) 15 | process_time = time.time() - start_time 16 | if process_time > self.timeout: 17 | raise HTTPException(status_code=408, detail="Request Timeout") 18 | return response 19 | except Exception as e: 20 | process_time = time.time() - start_time 21 | if process_time > self.timeout: 22 | return JSONResponse(content={"detail": "Request Timeout"}, status_code=408) 23 | raise e -------------------------------------------------------------------------------- /JudgeIt-App/app/layout.js: -------------------------------------------------------------------------------- 1 | import "../styles/globals.css"; 2 | import Footer from "@/components/globals/Footer"; 3 | import Topbar from "@/components/globals/Topbar"; 4 | import { Grid, Box, AppBar } from "@mui/material"; 5 | import SessionProviderWrapper from "@/utils/sessionProviderWrapper"; 6 | 7 | export const metadata = { 8 | title: "LLM Judge Application", 9 | description: "LLM Judge Application to evaluate LLM response.", 10 | }; 11 | 12 | export default function RootLayout({ children }) { 13 | return ( 14 | 15 | 16 | 17 |
18 | 19 | 20 | 21 | {children} 22 | 23 | 24 |
25 | 26 | 27 |
28 | ); 29 | } 30 | -------------------------------------------------------------------------------- /REST-Service/chart/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: fastapi-app 5 | labels: 6 | app: fastapi-app 7 | spec: 8 | type: {{ .Values.service.fastapi.type }} 9 | ports: 10 | - port: {{ .Values.service.fastapi.port }} 11 | targetPort: {{ .Values.service.fastapi.port }} 12 | selector: 13 | app: fastapi-app 14 | 15 | --- 16 | 17 | apiVersion: v1 18 | kind: Service 19 | metadata: 20 | name: redis 21 | labels: 22 | app: redis 23 | spec: 24 | type: {{ .Values.service.redis.type }} 25 | ports: 26 | - port: {{ .Values.service.redis.port }} 27 | targetPort: {{ .Values.service.redis.port }} 28 | selector: 29 | app: redis 30 | 31 | --- 32 | 33 | apiVersion: v1 34 | kind: Service 35 | metadata: 36 | name: flower 37 | labels: 38 | app: flower 39 | spec: 40 | type: {{ .Values.service.flower.type }} 41 | ports: 42 | - port: {{ .Values.service.flower.port }} 43 | targetPort: {{ .Values.service.flower.port }} 44 | selector: 45 | app: flower 46 | -------------------------------------------------------------------------------- /REST-Service/requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi 2 | uvicorn 3 | certifi==2024.6.2 4 | charset-normalizer==3.3.2 5 | click==8.1.7 6 | ibm-cos-sdk==2.13.5 7 | ibm-cos-sdk-core==2.13.5 8 | ibm-cos-sdk-s3transfer==2.13.5 9 | ibm_watson_machine_learning==1.0.359 10 | ibm_watsonx_ai==1.0.10 11 | idna==3.7 12 | importlib_metadata==8.0.0 13 | jmespath==1.0.1 14 | joblib==1.4.2 15 | lomond==0.3.3 16 | nltk==3.8.1 17 | numpy==1.26.4 18 | packaging==24.1 19 | pandas==2.1.4 20 | python-dateutil==2.9.0.post0 21 | pytz==2024.1 22 | regex==2024.5.15 23 | requests==2.32.4 24 | rouge==1.0.1 25 | scikit-learn==1.5.0 26 | scipy==1.14.0 27 | six==1.16.0 28 | tabulate==0.9.0 29 | threadpoolctl==3.5.0 30 | tqdm==4.66.4 31 | tzdata==2024.1 32 | urllib3==2.1.0 33 | zipp==3.19.2 34 | openpyxl==3.1.5 35 | langchain-ibm==0.1.10 36 | celery==5.4.0 37 | redis==5.0.7 38 | flower==2.0.1 39 | asyncio==3.4.3 40 | python-dotenv 41 | python-multipart 42 | fuzzywuzzy==0.18.0 43 | python-Levenshtein==0.27.1 44 | ibm-watsonx-gov==1.2.2 45 | Jinja2==3.1.2 46 | jsonschema==4.25.1 47 | unitxt==1.26.6 48 | textstat==0.7.10 -------------------------------------------------------------------------------- /JudgeIt-App/styles/globals.css: -------------------------------------------------------------------------------- 1 | @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Sans:wght@300;400;500&family=Source+Sans+Pro:ital,wght@0,400;0,600;1,600&display=swap'); 2 | 3 | html, 4 | body, 5 | #root, 6 | .app, 7 | .content { 8 | margin: 0; 9 | height: 100%; 10 | width: 100%; 11 | font-family: 'IBM Plex Sans'; 12 | overflow: hidden; 13 | } 14 | 15 | .app { 16 | display: flex; 17 | position: relative; 18 | } 19 | 20 | ::-webkit-scrollbar { 21 | width: 10px; 22 | } 23 | 24 | /* Track */ 25 | 26 | ::-webkit-scrollbar-track { 27 | background: #e0e0e0; 28 | } 29 | 30 | /* handle */ 31 | 32 | ::-webkit-scrollbar-thumb { 33 | background: #888; 34 | } 35 | 36 | /* handle on Hover */ 37 | 38 | ::-webkit-scrollbar-track:hover { 39 | background: #555; 40 | } 41 | 42 | .drag-and-drop { 43 | width: 100%; 44 | height: 200px; 45 | border: 2px dashed #ccc; 46 | border-radius: 5px; 47 | display: flex; 48 | justify-content: center; 49 | align-items: center; 50 | cursor: pointer; 51 | } 52 | 53 | .dragging { 54 | background-color: #f1f1f1; 55 | } -------------------------------------------------------------------------------- /REST-Service/deployment/base/flower/deployment.yaml: -------------------------------------------------------------------------------- 1 | kind: Deployment 2 | apiVersion: apps/v1 3 | metadata: 4 | name: flower-app 5 | labels: 6 | app: flower-app 7 | spec: 8 | replicas: 1 9 | selector: 10 | matchLabels: 11 | app: flower-app 12 | template: 13 | metadata: 14 | labels: 15 | app: flower-app 16 | deployment: flower-app 17 | spec: 18 | containers: 19 | - resources: {} 20 | terminationMessagePath: /dev/termination-log 21 | name: flower-app 22 | command: 23 | - celery 24 | - '--broker=redis://redis:6379/0' 25 | - flower 26 | - '--port=5555' 27 | ports: 28 | - containerPort: 5555 29 | protocol: TCP 30 | - containerPort: 8080 31 | protocol: TCP 32 | imagePullPolicy: IfNotPresent 33 | terminationMessagePolicy: File 34 | envFrom: 35 | - secretRef: 36 | name: llm-judge-secret 37 | image: backend-image-name:latest 38 | -------------------------------------------------------------------------------- /REST-Service/cert/mongo.crt: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIDDzCCAfegAwIBAgIJANEH58y2/kzHMA0GCSqGSIb3DQEBCwUAMB4xHDAaBgNV 3 | BAMME0lCTSBDbG91ZCBEYXRhYmFzZXMwHhcNMTgwNjI1MTQyOTAwWhcNMjgwNjIy 4 | MTQyOTAwWjAeMRwwGgYDVQQDDBNJQk0gQ2xvdWQgRGF0YWJhc2VzMIIBIjANBgkq 5 | hkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA8lpaQGzcFdGqeMlmqjffMPpIQhqpd8qJ 6 | Pr3bIkrXJbTcJJ9uIckSUcCjw4Z/rSg8nnT13SCcOl+1to+7kdMiU8qOWKiceYZ5 7 | y+yZYfCkGaiZVfazQBm45zBtFWv+AB/8hfCTdNF7VY4spaA3oBE2aS7OANNSRZSK 8 | pwy24IUgUcILJW+mcvW80Vx+GXRfD9Ytt6PRJgBhYuUBpgzvngmCMGBn+l2KNiSf 9 | weovYDCD6Vngl2+6W9QFAFtWXWgF3iDQD5nl/n4mripMSX6UG/n6657u7TDdgkvA 10 | 1eKI2FLzYKpoKBe5rcnrM7nHgNc/nCdEs5JecHb1dHv1QfPm6pzIxwIDAQABo1Aw 11 | TjAdBgNVHQ4EFgQUK3+XZo1wyKs+DEoYXbHruwSpXjgwHwYDVR0jBBgwFoAUK3+X 12 | Zo1wyKs+DEoYXbHruwSpXjgwDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOC 13 | AQEAJf5dvlzUpqaix26qJEuqFG0IP57QQI5TCRJ6Xt/supRHo63eDvKw8zR7tlWQ 14 | lV5P0N2xwuSl9ZqAJt7/k/3ZeB+nYwPoyO3KvKvATunRvlPBn4FWVXeaPsG+7fhS 15 | qsejmkyonYw77HRzGOzJH4Zg8UN6mfpbaWSsyaExvqknCp9SoTQP3D67AzWqb1zY 16 | doqqgGIZ2nxCkp5/FXxF/TMb55vteTQwfgBy60jVVkbF7eVOWCv0KaNHPF5hrqbN 17 | i+3XjJ7/peF3xMvTMoy35DcT3E2ZeSVjouZs15O90kI3k2daS2OHJABW0vSj4nLz 18 | +PQzp/B9cQmOO8dCe049Q3oaUA== 19 | -----END CERTIFICATE----- 20 | 21 | -------------------------------------------------------------------------------- /REST-Service/deployment/base/celery-worker/deployment.yaml: -------------------------------------------------------------------------------- 1 | kind: Deployment 2 | apiVersion: apps/v1 3 | metadata: 4 | name: celery-worker 5 | labels: 6 | app: celery-worker 7 | spec: 8 | replicas: 1 9 | selector: 10 | matchLabels: 11 | app: celery-worker 12 | template: 13 | metadata: 14 | labels: 15 | app: celery-worker 16 | deployment: celery-worker 17 | annotations: 18 | openshift.io/generated-by: OpenShiftWebConsole 19 | spec: 20 | containers: 21 | - resources: {} 22 | terminationMessagePath: /dev/termination-log 23 | name: celery-worker 24 | command: 25 | - celery 26 | - '-A' 27 | - app.celery.celery_worker.celery 28 | - worker 29 | - '--loglevel=info' 30 | ports: 31 | - containerPort: 3001 32 | protocol: TCP 33 | - containerPort: 8080 34 | protocol: TCP 35 | imagePullPolicy: IfNotPresent 36 | terminationMessagePolicy: File 37 | envFrom: 38 | - secretRef: 39 | name: llm-judge-secret 40 | image: backend-image-name:latest 41 | -------------------------------------------------------------------------------- /REST-Service/deployment/base/rest-app/deployment.yaml: -------------------------------------------------------------------------------- 1 | kind: Deployment 2 | apiVersion: apps/v1 3 | metadata: 4 | name: llm-judge-backend 5 | labels: 6 | app: llm-judge-backend 7 | spec: 8 | replicas: 1 9 | selector: 10 | matchLabels: 11 | app: llm-judge-backend 12 | template: 13 | metadata: 14 | labels: 15 | app: llm-judge-backend 16 | deployment: llm-judge-backend 17 | spec: 18 | containers: 19 | - resources: {} 20 | terminationMessagePath: /dev/termination-log 21 | name: llm-judge-backend 22 | ports: 23 | - containerPort: 3001 24 | protocol: TCP 25 | - containerPort: 8080 26 | protocol: TCP 27 | imagePullPolicy: IfNotPresent 28 | envFrom: 29 | - secretRef: 30 | name: llm-judge-secret 31 | image: backend-image-name:latest 32 | volumeMounts: 33 | - name: mongodb-cert-volume 34 | readOnly: true 35 | mountPath: /app/backend/cert 36 | volumes: 37 | - name: mongodb-cert-volume 38 | secret: 39 | secretName: mongodb-cert-secret 40 | defaultMode: 420 -------------------------------------------------------------------------------- /JudgeIt-App/public/next.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /JudgeIt-App/components/globals/icons/IBMIconTop.jsx: -------------------------------------------------------------------------------- 1 | import * as React from 'react'; 2 | import SvgIcon from '@mui/material/SvgIcon'; 3 | 4 | export default function IBMIcon() { 5 | 6 | return ( 7 | 8 | 9 | 10 | ); 11 | } -------------------------------------------------------------------------------- /JudgeIt-App/components/globals/SignIn.jsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import { signIn } from "next-auth/react"; 4 | import { useSearchParams } from "next/navigation"; 5 | import IBMIcon from "./icons/IBMIcon"; 6 | import { LineWeight } from "@mui/icons-material"; 7 | import { Grid } from "@mui/material"; 8 | import React, { Suspense } from "react"; 9 | 10 | function SignInWithIBMIdContent() { 11 | const searchParams = useSearchParams(); 12 | const callbackUrl = searchParams.get("callbackUrl") || "/"; 13 | 14 | return ( 15 | 16 | 17 |
25 | 38 |
39 |
40 |
41 | ); 42 | } 43 | 44 | export default function SignInWithIBMId() { 45 | return ( 46 | Loading...}> 47 | 48 | 49 | ); 50 | } 51 | -------------------------------------------------------------------------------- /REST-Service/app/src/services/MongoService.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | from pymongo import MongoClient 4 | from pymongo.errors import ConnectionFailure 5 | from bson.objectid import ObjectId 6 | 7 | load_dotenv() 8 | 9 | class MongoService: 10 | 11 | def __init__(self): 12 | # MongoDB backend 13 | MONGO_URL=os.getenv('MONGO_URL') 14 | MONGO_USER=os.getenv('MONGO_USER') 15 | MONGO_PASS=os.getenv('MONGO_PASS') 16 | 17 | self.MONGO_DB=os.getenv('MONGO_DB') 18 | 19 | ##f"mongodb://{MONGO_USER}:{MONGO_PASS}@{MONGO_URL}" 20 | 21 | client = MongoClient( 22 | 23 | f"mongodb://{MONGO_USER}:{MONGO_PASS}@{MONGO_URL}/{self.MONGO_DB}?authSource={self.MONGO_DB}", 24 | ssl=True, 25 | tlsCAFile="cert/mongo.crt" 26 | ) 27 | self.client = client 28 | print(f"mongo client:{client}yyyy") 29 | 30 | def get_db(self): 31 | db = self.client[self.MONGO_DB] 32 | return db 33 | 34 | def get_collection(self, collection_name): 35 | collection = self.get_db()[collection_name] 36 | return collection 37 | 38 | def get_request_history_collection(self): 39 | return self.get_collection('request_histories') 40 | 41 | def get_experiment_collection(self): 42 | return self.get_collection('experiments') 43 | 44 | def find_one(self, collection, id): 45 | one = collection.find_one({'_id': ObjectId(id)}) 46 | return one 47 | 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /JudgeIt-App/public/vercel.svg: -------------------------------------------------------------------------------- 1 | 2 | 7 | -------------------------------------------------------------------------------- /REST-Service/deployment/base/rest-app/secret.yaml: -------------------------------------------------------------------------------- 1 | kind: Secret 2 | apiVersion: v1 3 | metadata: 4 | name: mongodb-cert-secret 5 | data: 6 | mongo.crt: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUREekNDQWZlZ0F3SUJBZ0lKQU5FSDU4eTIva3pITUEwR0NTcUdTSWIzRFFFQkN3VUFNQjR4SERBYUJnTlYKQkFNTUUwbENUU0JEYkc5MVpDQkVZWFJoWW1GelpYTXdIaGNOTVRnd05qSTFNVFF5T1RBd1doY05Namd3TmpJeQpNVFF5T1RBd1dqQWVNUnd3R2dZRFZRUUREQk5KUWswZ1EyeHZkV1FnUkdGMFlXSmhjMlZ6TUlJQklqQU5CZ2txCmhraUc5dzBCQVFFRkFBT0NBUThBTUlJQkNnS0NBUUVBOGxwYVFHemNGZEdxZU1sbXFqZmZNUHBJUWhxcGQ4cUoKUHIzYklrclhKYlRjSko5dUlja1NVY0NqdzRaL3JTZzhublQxM1NDY09sKzF0bys3a2RNaVU4cU9XS2ljZVlaNQp5K3laWWZDa0dhaVpWZmF6UUJtNDV6QnRGV3YrQUIvOGhmQ1RkTkY3Vlk0c3BhQTNvQkUyYVM3T0FOTlNSWlNLCnB3eTI0SVVnVWNJTEpXK21jdlc4MFZ4K0dYUmZEOVl0dDZQUkpnQmhZdVVCcGd6dm5nbUNNR0JuK2wyS05pU2YKd2VvdllEQ0Q2Vm5nbDIrNlc5UUZBRnRXWFdnRjNpRFFENW5sL240bXJpcE1TWDZVRy9uNjY1N3U3VERkZ2t2QQoxZUtJMkZMellLcG9LQmU1cmNuck03bkhnTmMvbkNkRXM1SmVjSGIxZEh2MVFmUG02cHpJeHdJREFRQUJvMUF3ClRqQWRCZ05WSFE0RUZnUVVLMytYWm8xd3lLcytERW9ZWGJIcnV3U3BYamd3SHdZRFZSMGpCQmd3Rm9BVUszK1gKWm8xd3lLcytERW9ZWGJIcnV3U3BYamd3REFZRFZSMFRCQVV3QXdFQi96QU5CZ2txaGtpRzl3MEJBUXNGQUFPQwpBUUVBSmY1ZHZselVwcWFpeDI2cUpFdXFGRzBJUDU3UVFJNVRDUko2WHQvc3VwUkhvNjNlRHZLdzh6Ujd0bFdRCmxWNVAwTjJ4d3VTbDlacUFKdDcvay8zWmVCK25Zd1BveU8zS3ZLdkFUdW5SdmxQQm40RldWWGVhUHNHKzdmaFMKcXNlam1reW9uWXc3N0hSekdPekpINFpnOFVONm1mcGJhV1NzeWFFeHZxa25DcDlTb1RRUDNENjdBeldxYjF6WQpkb3FxZ0dJWjJueENrcDUvRlh4Ri9UTWI1NXZ0ZVRRd2ZnQnk2MGpWVmtiRjdlVk9XQ3YwS2FOSFBGNWhycWJOCmkrM1hqSjcvcGVGM3hNdlRNb3kzNURjVDNFMlplU1Zqb3VaczE1Tzkwa0kzazJkYVMyT0hKQUJXMHZTajRuTHoKK1BRenAvQjljUW1PTzhkQ2UwNDlRM29hVUE9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCgo= 7 | type: Opaque 8 | -------------------------------------------------------------------------------- /REST-Service/main.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI, Request, HTTPException 2 | from fastapi.middleware.trustedhost import TrustedHostMiddleware 3 | import uvicorn 4 | import logging 5 | from dotenv import load_dotenv 6 | from app.route.root import routes as root_api 7 | from app.route.llm_judge import routes as llm_judge_api 8 | from app.route.llm_manage import routes as judge_management_api 9 | from fastapi.middleware.cors import CORSMiddleware 10 | import os 11 | from app.src.config.TimeoutMiddleware import TimeoutMiddleware 12 | 13 | load_dotenv() 14 | platform = os.environ.get("PLATFORM") 15 | server_url = os.environ.get("SERVER_URL", default="http://localhost:3001") 16 | 17 | app = FastAPI( 18 | title="LLM JUDGE API", 19 | description="This api will be used to judge llm response and get ratings and feedback", 20 | version="1.0.1-fastapi", 21 | servers=[ 22 | { 23 | "url": server_url 24 | } 25 | ], 26 | ) 27 | 28 | logging.basicConfig(level=logging.INFO) 29 | logger = logging.getLogger('api-service') 30 | 31 | # Register blueprints 32 | app.include_router(root_api.root_api_route) 33 | app.include_router(llm_judge_api.judge_api_route) 34 | app.include_router(judge_management_api.judge_management_api_route) 35 | 36 | origins = [ "*"] 37 | 38 | app.add_middleware( 39 | CORSMiddleware, 40 | allow_origins=origins, 41 | allow_credentials=False, 42 | allow_methods=["*"], 43 | allow_headers=["*"], 44 | ) 45 | 46 | app.add_middleware(TimeoutMiddleware, timeout=600) # Timeout set to 600 seconds (10 minutes) 47 | 48 | if __name__ == '__main__': 49 | uvicorn.run("main:app", host='0.0.0.0', port=3001) -------------------------------------------------------------------------------- /JudgeIt-App/app/pages/help/page.js: -------------------------------------------------------------------------------- 1 | import BatchInstructions from "@/components/globals/BatchInstructions"; 2 | import Footer from "@/components/globals/Footer"; 3 | import SingleInstructions from "@/components/globals/SingleInstructions"; 4 | import { Box, Grid, Paper, Typography } from "@mui/material"; 5 | import React from "react"; 6 | 7 | const HelperPage = () => { 8 | return ( 9 |
18 | 19 | 20 | 27 | Documentation 28 | 29 | 30 | 31 | 32 | 37 | 38 | 39 | 40 | 41 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
53 | 54 |
55 | ); 56 | }; 57 | 58 | export default HelperPage; 59 | -------------------------------------------------------------------------------- /JudgeIt-App/components/globals/icons/IBMIcon.jsx: -------------------------------------------------------------------------------- 1 | import * as React from "react"; 2 | import SvgIcon from "@mui/material/SvgIcon"; 3 | 4 | export default function IBMIcon() { 5 | return ( 6 | 7 | 12 | 13 | ); 14 | } 15 | -------------------------------------------------------------------------------- /JudgeIt-App/components/globals/DeleteConfirmationDialog.jsx: -------------------------------------------------------------------------------- 1 | import React, { useState } from 'react'; 2 | import Button from '@mui/material/Button'; 3 | import Dialog from '@mui/material/Dialog'; 4 | import DialogActions from '@mui/material/DialogActions'; 5 | import DialogContent from '@mui/material/DialogContent'; 6 | import DialogContentText from '@mui/material/DialogContentText'; 7 | import DialogTitle from '@mui/material/DialogTitle'; 8 | 9 | const DeleteConfirmationDialog = ({ itemName, onDelete }) => { 10 | const [open, setOpen] = useState(false); 11 | 12 | const handleClickOpen = () => { 13 | setOpen(true); 14 | }; 15 | 16 | const handleClose = () => { 17 | setOpen(false); 18 | }; 19 | 20 | const handleConfirmDelete = () => { 21 | onDelete(); // Call the delete action 22 | handleClose(); // Close the dialog 23 | }; 24 | 25 | return ( 26 |
27 | 30 | 36 | 37 | {"Confirm Delete"} 38 | 39 | 40 | 41 | Are you sure you want to delete {itemName}? This action cannot be undone. 42 | 43 | 44 | 45 | 48 | 51 | 52 | 53 |
54 | ); 55 | }; 56 | 57 | export default DeleteConfirmationDialog; 58 | -------------------------------------------------------------------------------- /REST-Service/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | fastapi_app: 3 | container_name: fastapi_app 4 | platform: linux/amd64 5 | image: fastapi_app_image 6 | #volumes: 7 | # - ./app:/app 8 | ports: 9 | - 3001:3001 10 | environment: 11 | - WATSONX_URL=https://us-south.ml.cloud.ibm.com 12 | - WX_PROJECT_ID=*** 13 | - IBM_CLOUD_API_KEY=*** 14 | - LLM_JUDGE_API_KEY=JudgeIt-Secret-Api-Key 15 | - WX_PLATFORM=saas 16 | - WX_USER='' 17 | - WX_GOV_REGION=eu-de 18 | - CELERY_BROKER_URL=redis://redis:6379/0 19 | - CELERY_RESULT_BACKEND=redis://redis:6379/0 20 | - SERVER_URL=http://localhost:3001 21 | - MONGO_URL=*** 22 | - MONGO_USER=*** 23 | - MONGO_PASS=*** 24 | - MONGO_DB=judge_it_dev 25 | - WX_NEG_TEST_MODEL=mistralai/mistral-medium-2505 26 | - WX_GOV_INSTANCE= 27 | restart: always 28 | redis: 29 | container_name: redis 30 | image: redis:7.2.5-alpine 31 | restart: always 32 | celery_worker: 33 | container_name: celery_worker 34 | build: . 35 | #volumes: 36 | # - ./app:/app 37 | command: celery -A app.celery.celery_worker.celery worker --loglevel=info 38 | environment: 39 | - WATSONX_URL=https://us-south.ml.cloud.ibm.com 40 | - WX_PROJECT_ID=*** 41 | - WX_PLATFORM=saas 42 | - WX_USER='' 43 | - WX_GOV_REGION=eu-de 44 | - IBM_CLOUD_API_KEY=*** 45 | - CELERY_BROKER_URL=redis://redis:6379/0 46 | - CELERY_RESULT_BACKEND=redis://redis:6379/0 47 | - WX_NEG_TEST_MODEL=mistralai/mistral-medium-2505 48 | - WX_GOV_INSTANCE= 49 | depends_on: 50 | - fastapi_app 51 | - redis 52 | restart: always 53 | flower: 54 | container_name: flower 55 | build: . 56 | command: celery --broker=redis://redis:6379/0 flower --port=5555 57 | ports: 58 | - 5556:5555 59 | environment: 60 | - CELERY_BROKER_URL=redis://redis:6379/0 61 | - CELERY_RESULT_BACKEND=redis://redis:6379/0 62 | depends_on: 63 | - fastapi_app 64 | - redis 65 | - celery_worker 66 | restart: always -------------------------------------------------------------------------------- /JudgeIt-App/components/judge/MultiTurnWithConversationForm.jsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import React from "react"; 3 | import { TextField, Box } from "@mui/material"; 4 | 5 | const MultiTurnWithConversationForm = ({ 6 | values, 7 | handleChange, 8 | handleBlur, 9 | errors, 10 | touched, 11 | }) => { 12 | return ( 13 |
14 | 15 | 27 | 28 | 29 | 39 | 40 | 41 | 51 | 52 | 53 | 63 | 64 |
65 | ); 66 | }; 67 | 68 | export default MultiTurnWithConversationForm; 69 | -------------------------------------------------------------------------------- /JudgeIt-App/components/judge/SoloResult.jsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import { 3 | Alert, 4 | Table, 5 | TableHead, 6 | TableRow, 7 | TableCell, 8 | TableBody, 9 | Paper, 10 | } from "@mui/material"; 11 | import { 12 | API_TYPE_MULTITURN, 13 | API_TYPE_SINGLETURN, 14 | API_TYPE_RATING, 15 | API_TYPE_SIMILARITY, 16 | } from "@/services/Config"; 17 | 18 | import { grade_map_rating, grade_map_similarity, grade_map_multiturn } from "@/services/Config"; 19 | 20 | const grade_col_name = "JudgeIt Score" 21 | const explanation_col_name = "JudgeIt Reasoning" 22 | 23 | const SoloResult = ({ data, api_type }) => { 24 | return ( 25 | 29 | 30 | 31 | {api_type === API_TYPE_RATING && ( 32 | 33 | {grade_col_name} 34 | {explanation_col_name} 35 | 36 | )} 37 | {api_type === API_TYPE_SIMILARITY && ( 38 | 39 | {grade_col_name} 40 | {explanation_col_name} 41 | 42 | )} 43 | {(api_type === API_TYPE_MULTITURN || api_type === API_TYPE_SINGLETURN) && ( 44 | 45 | {grade_col_name} 46 | 47 | )} 48 | 49 | 50 | {api_type === API_TYPE_RATING && ( 51 | 52 | {grade_map_rating[data.Grade]} 53 | {data.Explanation} 54 | 55 | )} 56 | {api_type === API_TYPE_SIMILARITY && ( 57 | 58 | {grade_map_similarity[data.Grade]} 59 | {data.Explanation} 60 | 61 | )} 62 | {(api_type === API_TYPE_MULTITURN || api_type === API_TYPE_SINGLETURN) && ( 63 | 64 | {grade_map_multiturn[data.Grade]} 65 | 66 | )} 67 | 68 |
69 |
70 | ); 71 | }; 72 | 73 | export default SoloResult; 74 | -------------------------------------------------------------------------------- /JudgeIt-App/components/judge/RatingSimilarityDataGrid.jsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import React from "react"; 3 | import { DataGrid } from "@mui/x-data-grid"; 4 | import DataGridToolbar from "@/components/globals/DataGridToolbar"; 5 | import { API_TYPE_RATING, grade_map_rating, grade_map_similarity } from "@/services/Config"; 6 | 7 | const RatingSimilarityDataGrid = ({ serverData }) => { 8 | const columns = [ 9 | { 10 | field: "id", 11 | headerName: "Id", 12 | hide: true, 13 | }, 14 | { 15 | field: "name", 16 | headerName: "Name", 17 | width: "250", 18 | }, 19 | { 20 | field: "eval_type", 21 | headerName: "Eval Type", 22 | }, 23 | { 24 | field: "model", 25 | headerName: "Model", 26 | width: "250", 27 | }, 28 | { 29 | field: "golden_text", 30 | headerName: "Golden Text", 31 | width: "400", 32 | }, 33 | { 34 | field: "generated_text", 35 | headerName: "Generated Text", 36 | width: "400", 37 | }, 38 | { 39 | field: "Grade", 40 | headerName: "JudgeIt Score", 41 | width: 100, 42 | }, 43 | { 44 | field: "Explanation", 45 | headerName: "JudgeIt Reasoning", 46 | width: "400", 47 | }, 48 | ]; 49 | 50 | return ( 51 |
52 | {" "} 53 | { 57 | return { 58 | id: item._id, 59 | name: item.name, 60 | eval_type: item.eval_type, 61 | model: item.content.query.model, 62 | golden_text: item.content.query.golden_text, 63 | generated_text: item.content.query.generated_text, 64 | Grade: (item.eval_type === API_TYPE_RATING) ? grade_map_rating[item.content.result.Grade] : grade_map_similarity[item.content.result.Grade], 65 | Explanation: item.content.result.Explanation, 66 | }; 67 | }), 68 | }} 69 | density="compact" 70 | getRowHeight={() => "auto"} 71 | autoHeight={true} 72 | initialState={{ 73 | ...{ 74 | columns: columns, 75 | rows: [], 76 | }.initialState, 77 | pagination: { paginationModel: { pageSize: 10 } }, 78 | }} 79 | pageSizeOptions={[5, 10, 25]} 80 | slots={{ toolbar: DataGridToolbar }} 81 | /> 82 |
83 | ); 84 | }; 85 | 86 | export default RatingSimilarityDataGrid; 87 | -------------------------------------------------------------------------------- /JudgeIt-App/components/judge/DataGridMultiTurnConversation.jsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import React from "react"; 3 | import { DataGrid } from "@mui/x-data-grid"; 4 | import DataGridToolbar from "@/components/globals/DataGridToolbar"; 5 | 6 | const DataGridMultiTurnConversation = ({ serverData }) => { 7 | const columns = [ 8 | { 9 | field: "id", 10 | headerName: "Id", 11 | hide: true, 12 | }, 13 | { 14 | field: "name", 15 | headerName: "Name", 16 | width: "250", 17 | }, 18 | { 19 | field: "eval_type", 20 | headerName: "Eval Type", 21 | }, 22 | { 23 | field: "model", 24 | headerName: "Model", 25 | width: "250", 26 | }, 27 | { 28 | field: "conversation_history", 29 | headerName: "Conversation history", 30 | width: "400", 31 | }, 32 | { 33 | field: "follow_up_query", 34 | headerName: "Follow up query", 35 | width: "400", 36 | }, 37 | { 38 | field: "golden_query", 39 | headerName: "Golden query", 40 | width: "400", 41 | }, 42 | { 43 | field: "rewritten_query", 44 | headerName: "Rewritten query", 45 | width: "400", 46 | }, 47 | { 48 | field: "Grade", 49 | headerName: "Grade", 50 | width: 100, 51 | } 52 | ]; 53 | 54 | return ( 55 |
56 | {" "} 57 | { 61 | return { 62 | id: item._id, 63 | name: item.name, 64 | eval_type: item.eval_type, 65 | model: item.content.query.model, 66 | conversation_history: item.content.query.conversation_history, 67 | follow_up_query: item.content.query.follow_up_query, 68 | golden_query: item.content.query.golden_query, 69 | rewritten_query: item.content.query.rewritten_query, 70 | Grade: item.content.result.Grade 71 | }; 72 | }), 73 | }} 74 | density="compact" 75 | getRowHeight={() => "auto"} 76 | autoHeight={true} 77 | initialState={{ 78 | ...{ 79 | columns: columns, 80 | rows: [], 81 | }.initialState, 82 | pagination: { paginationModel: { pageSize: 10 } }, 83 | }} 84 | pageSizeOptions={[5, 10, 25]} 85 | slots={{ toolbar: DataGridToolbar }} 86 | /> 87 |
88 | ); 89 | }; 90 | 91 | export default DataGridMultiTurnConversation; 92 | -------------------------------------------------------------------------------- /JudgeIt-App/components/judge/DataGridMultiTurnSummary.jsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import React from "react"; 3 | import { DataGrid } from "@mui/x-data-grid"; 4 | import DataGridToolbar from "@/components/globals/DataGridToolbar"; 5 | 6 | const DataGridMultiTurnSummaryConversation = ({ serverData }) => { 7 | const columns = [ 8 | { 9 | field: "id", 10 | headerName: "Id", 11 | hide: true, 12 | }, 13 | { 14 | field: "name", 15 | headerName: "Name", 16 | width: "250", 17 | }, 18 | { 19 | field: "experiment_name", 20 | headerName: "Experiment Name", 21 | width: "250", 22 | }, 23 | { 24 | field: "eval_type", 25 | headerName: "Eval Type", 26 | }, 27 | { 28 | field: "conversation_history", 29 | headerName: "Conversation history", 30 | width: "500", 31 | }, 32 | { 33 | field: "follow_up_query", 34 | headerName: "Follow up query", 35 | width: "300", 36 | }, 37 | { 38 | field: "golden_query", 39 | headerName: "Golden query", 40 | width: "300", 41 | }, 42 | { 43 | field: "rewritten_query", 44 | headerName: "Rewritten query", 45 | width: "300", 46 | }, 47 | { 48 | field: "Grade", 49 | headerName: "JudgeIt Score", 50 | width: 100, 51 | } 52 | ]; 53 | 54 | return ( 55 |
56 | {" "} 57 | { 61 | return { 62 | id: item._id, 63 | name: item.name, 64 | eval_type: item.eval_type, 65 | experiment_name: item.experiment_name, 66 | conversation_history: item.conversation_history, 67 | follow_up_query: item.follow_up_query, 68 | golden_query: item.golden_query, 69 | rewritten_query: item.rewritten_query, 70 | Grade: (item?.Grade) ? item?.Grade : item?.judgeit_score 71 | }; 72 | }), 73 | }} 74 | density="compact" 75 | getRowHeight={() => "auto"} 76 | autoHeight={true} 77 | initialState={{ 78 | ...{ 79 | columns: columns, 80 | rows: [], 81 | }.initialState, 82 | pagination: { paginationModel: { pageSize: 10 } }, 83 | }} 84 | pageSizeOptions={[5, 10, 25]} 85 | slots={{ toolbar: DataGridToolbar }} 86 | /> 87 |
88 | ); 89 | }; 90 | 91 | export default DataGridMultiTurnSummaryConversation; 92 | -------------------------------------------------------------------------------- /JudgeIt-App/utils/Helper.js: -------------------------------------------------------------------------------- 1 | import { 2 | API_TYPE_MULTITURN, 3 | API_TYPE_RATING, 4 | API_TYPE_SIMILARITY, 5 | grade_map_multiturn, 6 | grade_map_rating, 7 | grade_map_similarity, 8 | } from "@/services/Config"; 9 | 10 | export function getRandomInt(max) { 11 | return Math.floor(Math.random() * max); 12 | } 13 | 14 | export function generateRandomString(length = 4) { 15 | const characters = 16 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; 17 | let result = ""; 18 | for (let i = 0; i < length; i++) { 19 | const randomIndex = Math.floor(Math.random() * characters.length); 20 | result += characters.charAt(randomIndex); 21 | } 22 | return result; 23 | } 24 | 25 | // Function to generate columns dynamically from JSON object keys 26 | export const generateColumns = (jsonObject) => { 27 | return Object.keys(jsonObject).map((key) => ({ 28 | field: key, 29 | headerName: rename_grade_explanation_cloumn_name(key), // Capitalize the header 30 | width: 300, // You can adjust the width or make it dynamic 31 | })); 32 | }; 33 | 34 | const rename_grade_explanation_cloumn_name = (column_name) => { 35 | if (column_name === "Grade") { 36 | return "JudgeIt Score"; 37 | } else if (column_name === "Explanation") { 38 | return "JudgeIt Reasoning"; 39 | } else { 40 | return column_name.charAt(0).toUpperCase() + column_name.slice(1); 41 | } 42 | }; 43 | 44 | // Function to generate rows dynamically from JSON object 45 | export const generateRows = (jsonObject, eval_type) => { 46 | const firstKey = Object.keys(jsonObject)[0]; // Get the first key to check structure 47 | const rowIds = Object.keys(jsonObject[firstKey]); // Assuming same structure for all keys 48 | 49 | return rowIds.map((_, index) => { 50 | const rowData = { id: index }; // Initialize row with id 51 | Object.keys(jsonObject).forEach((field) => { 52 | rowData[field] = get_rating_label( 53 | eval_type, 54 | field, 55 | jsonObject[field][index] 56 | ); // Add data for each field 57 | }); 58 | return rowData; 59 | }); 60 | }; 61 | 62 | const get_rating_label = (eval_type, column_name, value) => { 63 | if (column_name !== "Grade") return value; 64 | 65 | const gradeMap = { 66 | [API_TYPE_RATING]: grade_map_rating, 67 | [API_TYPE_SIMILARITY]: grade_map_similarity, 68 | [API_TYPE_MULTITURN]: grade_map_multiturn, 69 | }; 70 | 71 | return gradeMap[eval_type]?.[value] || value; 72 | }; 73 | 74 | export function trimText(text) { 75 | if (text.length > 15) { 76 | return text.substring(0, 15) + ".."; 77 | } 78 | return text; 79 | } 80 | -------------------------------------------------------------------------------- /Framework/wml_setup.py: -------------------------------------------------------------------------------- 1 | from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams 2 | from ibm_watsonx_ai.foundation_models import Model 3 | 4 | #config Watsonx.ai environment 5 | api_key = '' 6 | ibm_cloud_url = 'https://us-south.ml.cloud.ibm.com' 7 | project_id = '' 8 | 9 | def send_to_watsonxai(prompts, 10 | model_id="MIXTRAL", 11 | decoding_method="greedy", 12 | max_new_tokens=500, 13 | min_new_tokens=30, 14 | temperature=1.0, 15 | repetition_penalty=1.0 16 | ): 17 | if model_id == "MIXTRAL": 18 | model_name = "mistralai/mixtral-8x7b-instruct-v01" 19 | elif model_id == "LLAMA3": 20 | model_name="meta-llama/llama-3-70b-instruct" 21 | # Instantiate parameters for text generation 22 | model_params = { 23 | GenParams.DECODING_METHOD: decoding_method, 24 | GenParams.MIN_NEW_TOKENS: min_new_tokens, 25 | GenParams.MAX_NEW_TOKENS: max_new_tokens, 26 | GenParams.RANDOM_SEED: 42, 27 | GenParams.TEMPERATURE: temperature, 28 | GenParams.REPETITION_PENALTY: repetition_penalty, 29 | } 30 | model = Model( 31 | model_id=model_name, 32 | params=model_params, 33 | credentials={ 34 | "url" : ibm_cloud_url, 35 | "apikey" : api_key 36 | }, 37 | project_id=project_id) 38 | 39 | response=model.generate_text(prompts) 40 | return response 41 | 42 | 43 | def send_to_watsonxai_multi_turn(prompts, 44 | model_id="MIXTRAL", 45 | decoding_method="greedy", 46 | max_new_tokens=128, 47 | temperature=0.7, 48 | repetition_penalty=1.0 49 | ): 50 | if model_id == "MIXTRAL": 51 | model_name = "mistralai/mixtral-8x7b-instruct-v01" 52 | elif model_id == "LLAMA3": 53 | model_name="meta-llama/llama-3-70b-instruct" 54 | # Instantiate parameters for text generation 55 | model_params = { 56 | GenParams.DECODING_METHOD: decoding_method, 57 | GenParams.MAX_NEW_TOKENS: max_new_tokens, 58 | GenParams.RANDOM_SEED: 42, 59 | GenParams.TEMPERATURE: temperature, 60 | GenParams.REPETITION_PENALTY: repetition_penalty, 61 | } 62 | model = Model( 63 | model_id=model_name, 64 | params=model_params, 65 | credentials={ 66 | "url" : ibm_cloud_url, 67 | "apikey" : api_key 68 | }, 69 | project_id=project_id) 70 | 71 | response=model.generate_text(prompts) 72 | return response -------------------------------------------------------------------------------- /JudgeIt-App/components/judge/RatingSimilarityDataGridSummary.jsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import React from "react"; 3 | import { DataGrid } from "@mui/x-data-grid"; 4 | import DataGridToolbar from "@/components/globals/DataGridToolbar"; 5 | import { API_TYPE_RATING, grade_map_rating, grade_map_similarity } from "@/services/Config"; 6 | 7 | const RatingSimilarityDataGridSummary = ({ serverData }) => { 8 | const columns = [ 9 | { 10 | field: "id", 11 | headerName: "Id", 12 | hide: true, 13 | }, 14 | { 15 | field: "Question", 16 | headerName: "Question", 17 | width: "250", 18 | }, 19 | { 20 | field: "experiment_name", 21 | headerName: "Experiment Name", 22 | width: "100", 23 | }, 24 | { 25 | field: "name", 26 | headerName: "Name", 27 | width: "100", 28 | }, 29 | { 30 | field: "eval_type", 31 | headerName: "Eval Type", 32 | }, 33 | { 34 | field: "golden_text", 35 | headerName: "Golden Text", 36 | width: "400", 37 | }, 38 | { 39 | field: "generated_text", 40 | headerName: "Generated Text", 41 | width: "400", 42 | }, 43 | { 44 | field: "Grade", 45 | headerName: "JudgeIt Score", 46 | width: 100, 47 | }, 48 | { 49 | field: "Explanation", 50 | headerName: "JudgeIt Reasoning", 51 | width: "400", 52 | }, 53 | ]; 54 | 55 | return ( 56 |
57 | { 61 | return { 62 | id: item._id, 63 | Question: item.question, 64 | experiment_name: item.experiment_name, 65 | name: item.name, 66 | eval_type: item.eval_type, 67 | golden_text: item.golden_text, 68 | generated_text: item.generated_text, 69 | Grade: (item?.Grade) ? item?.Grade : item?.judgeit_score, 70 | Explanation: (item?.Explanation) ? item?.Grade : item?.judgeit_reasoning 71 | }; 72 | }), 73 | }} 74 | density="compact" 75 | getRowHeight={() => "auto"} 76 | autoHeight={true} 77 | initialState={{ 78 | ...{ 79 | columns: columns, 80 | rows: [], 81 | }.initialState, 82 | pagination: { paginationModel: { pageSize: 10 } }, 83 | }} 84 | pageSizeOptions={[5, 10, 25]} 85 | slots={{ toolbar: DataGridToolbar }} 86 | /> 87 |
88 | ); 89 | }; 90 | 91 | export default RatingSimilarityDataGridSummary; 92 | -------------------------------------------------------------------------------- /JudgeIt-App/app/globals.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --max-width: 1100px; 3 | --border-radius: 12px; 4 | --font-mono: ui-monospace, Menlo, Monaco, "Cascadia Mono", "Segoe UI Mono", 5 | "Roboto Mono", "Oxygen Mono", "Ubuntu Monospace", "Source Code Pro", 6 | "Fira Mono", "Droid Sans Mono", "Courier New", monospace; 7 | 8 | --foreground-rgb: 0, 0, 0; 9 | --background-start-rgb: 214, 219, 220; 10 | --background-end-rgb: 255, 255, 255; 11 | 12 | --primary-glow: conic-gradient( 13 | from 180deg at 50% 50%, 14 | #16abff33 0deg, 15 | #0885ff33 55deg, 16 | #54d6ff33 120deg, 17 | #0071ff33 160deg, 18 | transparent 360deg 19 | ); 20 | --secondary-glow: radial-gradient( 21 | rgba(255, 255, 255, 1), 22 | rgba(255, 255, 255, 0) 23 | ); 24 | 25 | --tile-start-rgb: 239, 245, 249; 26 | --tile-end-rgb: 228, 232, 233; 27 | --tile-border: conic-gradient( 28 | #00000080, 29 | #00000040, 30 | #00000030, 31 | #00000020, 32 | #00000010, 33 | #00000010, 34 | #00000080 35 | ); 36 | 37 | --callout-rgb: 238, 240, 241; 38 | --callout-border-rgb: 172, 175, 176; 39 | --card-rgb: 180, 185, 188; 40 | --card-border-rgb: 131, 134, 135; 41 | } 42 | 43 | @media (prefers-color-scheme: dark) { 44 | :root { 45 | --foreground-rgb: 255, 255, 255; 46 | --background-start-rgb: 0, 0, 0; 47 | --background-end-rgb: 0, 0, 0; 48 | 49 | --primary-glow: radial-gradient(rgba(1, 65, 255, 0.4), rgba(1, 65, 255, 0)); 50 | --secondary-glow: linear-gradient( 51 | to bottom right, 52 | rgba(1, 65, 255, 0), 53 | rgba(1, 65, 255, 0), 54 | rgba(1, 65, 255, 0.3) 55 | ); 56 | 57 | --tile-start-rgb: 2, 13, 46; 58 | --tile-end-rgb: 2, 5, 19; 59 | --tile-border: conic-gradient( 60 | #ffffff80, 61 | #ffffff40, 62 | #ffffff30, 63 | #ffffff20, 64 | #ffffff10, 65 | #ffffff10, 66 | #ffffff80 67 | ); 68 | 69 | --callout-rgb: 20, 20, 20; 70 | --callout-border-rgb: 108, 108, 108; 71 | --card-rgb: 100, 100, 100; 72 | --card-border-rgb: 200, 200, 200; 73 | } 74 | } 75 | 76 | * { 77 | box-sizing: border-box; 78 | padding: 0; 79 | margin: 0; 80 | } 81 | 82 | html, 83 | body { 84 | max-width: 100vw; 85 | overflow-x: hidden; 86 | } 87 | 88 | body { 89 | color: rgb(var(--foreground-rgb)); 90 | background: linear-gradient( 91 | to bottom, 92 | transparent, 93 | rgb(var(--background-end-rgb)) 94 | ) 95 | rgb(var(--background-start-rgb)); 96 | } 97 | 98 | a { 99 | color: inherit; 100 | text-decoration: none; 101 | } 102 | 103 | @media (prefers-color-scheme: dark) { 104 | html { 105 | color-scheme: dark; 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /JudgeIt-App/components/judge/DataGridSingleTurn.jsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import React from "react"; 3 | import { DataGrid } from "@mui/x-data-grid"; 4 | import DataGridToolbar from "@/components/globals/DataGridToolbar"; 5 | 6 | const DataGridSingleTurn = ({ serverData }) => { 7 | const columns = [ 8 | { 9 | field: "id", 10 | headerName: "Id", 11 | hide: true, 12 | }, 13 | { 14 | field: "name", 15 | headerName: "Name", 16 | width: "250", 17 | }, 18 | { 19 | field: "eval_type", 20 | headerName: "Eval Type", 21 | }, 22 | { 23 | field: "model", 24 | headerName: "Model", 25 | width: "250", 26 | }, 27 | { 28 | field: "previous_question", 29 | headerName: "Previous Question", 30 | width: "400", 31 | }, 32 | { 33 | field: "previous_answer", 34 | headerName: "Previous Answer", 35 | width: "400", 36 | }, 37 | { 38 | field: "current_question", 39 | headerName: "Current Question", 40 | width: "400", 41 | }, 42 | { 43 | field: "golden_rewritten_question", 44 | headerName: "Golden Rewritten Question", 45 | width: "400", 46 | }, 47 | { 48 | field: "rewritten_question", 49 | headerName: "Rewritten Question", 50 | width: "400", 51 | }, 52 | { 53 | field: "Grade", 54 | headerName: "Grade", 55 | width: 100, 56 | } 57 | ]; 58 | 59 | return ( 60 |
61 | {" "} 62 | { 66 | return { 67 | id: item._id, 68 | name: item.name, 69 | eval_type: item.eval_type, 70 | model: item.content.query.model, 71 | previous_question: item.content.query.previous_question, 72 | previous_answer: item.content.query.previous_answer, 73 | current_question: item.content.query.current_question, 74 | golden_rewritten_question: item.content.query.golden_rewritten_question, 75 | rewritten_question: item.content.query.rewritten_question, 76 | Grade: item.content.result.Grade 77 | }; 78 | }), 79 | }} 80 | density="compact" 81 | getRowHeight={() => "auto"} 82 | autoHeight={true} 83 | initialState={{ 84 | ...{ 85 | columns: columns, 86 | rows: [], 87 | }.initialState, 88 | pagination: { paginationModel: { pageSize: 10 } }, 89 | }} 90 | pageSizeOptions={[5, 10, 25]} 91 | slots={{ toolbar: DataGridToolbar }} 92 | /> 93 |
94 | ); 95 | }; 96 | 97 | export default DataGridSingleTurn; 98 | -------------------------------------------------------------------------------- /JudgeIt-App/components/judge/DataGridSingleTurnSummary.jsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import React from "react"; 3 | import { DataGrid } from "@mui/x-data-grid"; 4 | import DataGridToolbar from "@/components/globals/DataGridToolbar"; 5 | 6 | const DataGridSingleTurnSummary = ({ serverData }) => { 7 | const columns = [ 8 | { 9 | field: "id", 10 | headerName: "Id", 11 | hide: true, 12 | }, 13 | { 14 | field: "name", 15 | headerName: "Name", 16 | width: "250", 17 | }, 18 | { 19 | field: "experiment_name", 20 | headerName: "Experiment Name", 21 | width: "250", 22 | }, 23 | { 24 | field: "eval_type", 25 | headerName: "Eval Type", 26 | }, 27 | 28 | { 29 | field: "previous_question", 30 | headerName: "Previous Question", 31 | width: "400", 32 | }, 33 | { 34 | field: "previous_answer", 35 | headerName: "Previous Answer", 36 | width: "400", 37 | }, 38 | { 39 | field: "current_question", 40 | headerName: "Current Question", 41 | width: "400", 42 | }, 43 | { 44 | field: "golden_rewritten_question", 45 | headerName: "Golden Rewritten Question", 46 | width: "400", 47 | }, 48 | { 49 | field: "rewritten_question", 50 | headerName: "Rewritten Question", 51 | width: "400", 52 | }, 53 | { 54 | field: "Grade", 55 | headerName: "JudgeIt Score", 56 | width: 100, 57 | } 58 | ]; 59 | 60 | return ( 61 |
62 | {" "} 63 | { 67 | return { 68 | id: item._id, 69 | name: item.name, 70 | eval_type: item.eval_type, 71 | experiment_name: item.experiment_name, 72 | previous_question: item.previous_question, 73 | previous_answer: item.previous_answer, 74 | current_question: item.current_question, 75 | golden_rewritten_question: item.golden_rewritten_question, 76 | rewritten_question: item.rewritten_question, 77 | Grade: (item?.Grade) ? item?.Grade : item?.judgeit_score 78 | }; 79 | }), 80 | }} 81 | density="compact" 82 | getRowHeight={() => "auto"} 83 | autoHeight={true} 84 | initialState={{ 85 | ...{ 86 | columns: columns, 87 | rows: [], 88 | }.initialState, 89 | pagination: { paginationModel: { pageSize: 10 } }, 90 | }} 91 | pageSizeOptions={[5, 10, 25]} 92 | slots={{ toolbar: DataGridToolbar }} 93 | /> 94 |
95 | ); 96 | }; 97 | 98 | export default DataGridSingleTurnSummary; 99 | -------------------------------------------------------------------------------- /JudgeIt-App/components/judge/DisplayRequestHistoryMultiTurn.jsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { Grid, Paper, Box, CircularProgress } from "@mui/material"; 3 | 4 | const DisplayRequestHistoryMultiTurnConversation = ({ serverData }) => { 5 | return ( 6 | <> 7 | 8 | 14 | 15 | 16 | Experiment name: 17 | 18 | 19 | {serverData.experiment_name} 20 | 21 | 22 | 23 | Request type: 24 | 25 | 26 | {serverData.eval_type} 27 | 28 | 29 | 30 | Conversation History: 31 | 32 | 33 | {serverData.content.query.conversation_history} 34 | 35 | 36 | 37 | Follow up query: 38 | 39 | 40 | {serverData.content.query.follow_up_query} 41 | 42 | 43 | Golden query: 44 | 45 | 46 | {serverData.content.query.golden_query} 47 | 48 | 49 | Rewritten query: 50 | 51 | 52 | {serverData.content.query.rewritten_query} 53 | 54 | 55 | Model: 56 | 57 | 58 | {serverData.content.query.model} 59 | 60 | 61 | 62 | 63 | 64 | 65 | 71 | 72 | 73 | Grade: 74 | 75 | 76 | {serverData.content.result.Grade || serverData.content.result.judgeit_score} 77 | 78 | 79 | 80 | 81 | 82 | ); 83 | }; 84 | 85 | export default DisplayRequestHistoryMultiTurnConversation; 86 | -------------------------------------------------------------------------------- /JudgeIt-App/components/judge/RatingSimilarityForm.jsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import React from "react"; 3 | import { TextField, Box, Tooltip } from "@mui/material"; 4 | import InfoOutlinedIcon from "@mui/icons-material/InfoOutlined"; 5 | 6 | const RatingSimilarityForm = ({ 7 | values, 8 | handleChange, 9 | handleBlur, 10 | errors, 11 | touched, 12 | }) => { 13 | return ( 14 |
15 | 16 | 26 | 27 | 28 | 29 | 30 | 31 | 43 | 47 | 48 | 49 | 50 | 57 | 69 | 73 | 74 | 75 | 76 |
77 | ); 78 | }; 79 | 80 | export default RatingSimilarityForm; 81 | -------------------------------------------------------------------------------- /REST-Service/app/src/services/WatsonXService.py: -------------------------------------------------------------------------------- 1 | from ibm_watson_machine_learning.foundation_models import Model 2 | from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams 3 | 4 | from ibm_watsonx_ai.foundation_models import Model 5 | from langchain_ibm import WatsonxLLM 6 | from langchain_core.prompts import PromptTemplate 7 | 8 | class WatsonXService: 9 | 10 | def __init__(self, 11 | api_key, 12 | project_id, 13 | llm_model_id) -> None: 14 | self.api_key = api_key 15 | self.ibm_cloud_url = 'https://us-south.ml.cloud.ibm.com' 16 | self.project_id = project_id 17 | self.llm_model_id = llm_model_id 18 | 19 | def get_wml_llm_services(self, 20 | decoding_method="greedy", 21 | min_new_tokens=1, 22 | max_new_tokens=200, 23 | repetition_penalty=1, 24 | stop_sequences=['}']) -> WatsonxLLM: 25 | 26 | # llm parameters 27 | generate_parameters = { 28 | "decoding_method": decoding_method, 29 | "min_new_tokens": min_new_tokens, 30 | "max_new_tokens": max_new_tokens, 31 | "repetition_penalty": repetition_penalty, 32 | "stop_sequences": stop_sequences 33 | } 34 | 35 | # instatiate llm 36 | llm_model = WatsonxLLM(apikey=self.api_key, 37 | url=self.ibm_cloud_url, 38 | project_id=self.project_id, 39 | model_id=self.llm_model_id, 40 | params=generate_parameters) 41 | return llm_model 42 | 43 | ## using watsonx machine learning api 44 | def send_to_watsonxai( 45 | self, 46 | prompts, 47 | model_id="meta-llama/llama-3-70b-instruct", 48 | decoding_method="greedy", 49 | max_new_tokens=500, 50 | min_new_tokens=30, 51 | temperature=1.0, 52 | repetition_penalty=1.0 53 | ): 54 | 55 | # Instantiate parameters for text generation 56 | model_params = { 57 | GenParams.DECODING_METHOD: decoding_method, 58 | GenParams.MIN_NEW_TOKENS: min_new_tokens, 59 | GenParams.MAX_NEW_TOKENS: max_new_tokens, 60 | GenParams.RANDOM_SEED: 42, 61 | GenParams.TEMPERATURE: temperature, 62 | GenParams.REPETITION_PENALTY: repetition_penalty, 63 | } 64 | 65 | model = Model( 66 | model_id=model_id, 67 | params=model_params, 68 | credentials={ 69 | "url" : self.ibm_cloud_url, 70 | "apikey" : self.api_key 71 | }, 72 | project_id=self.project_id) 73 | 74 | response=model.generate_text(prompts) 75 | return response 76 | -------------------------------------------------------------------------------- /REST-Service/deployment/readme.md: -------------------------------------------------------------------------------- 1 | # Deploy REST Service in OpenShift cluster 2 | 3 | ## Login to OpenShift cluster 4 | 5 | Step 1: Login to openshift console and copy login command 6 | 7 | image 8 | 9 | Login with the token or user user and password in the command line 10 | 11 | ## Deployment steps 12 | 13 | - Create a new project 14 | 15 | ```sh 16 | oc new-project llm-judge 17 | ``` 18 | 19 | - Set the project name in a variable 20 | 21 | ```sh 22 | export $NAMESPACE_NAME='llm-judge' 23 | ``` 24 | 25 | - We are using the OpenShift internal registry; however, you can use any container registry. 26 | 27 | ```sh 28 | export REGISTRY=$(oc get routes -n openshift-image-registry -o jsonpath='{.items[0].spec.host}') 29 | echo $(oc whoami -t) | docker login $REGISTRY -u $(oc whoami) --password-stdin 30 | ``` 31 | 32 | - Build the docker image and push it to internal registry 33 | 34 | ```sh 35 | docker build -t $REGISTRY/$NAMESPACE_NAME/backend:v1.0 . 36 | docker push $REGISTRY/$NAMESPACE_NAME/backend:v1.0 37 | ``` 38 | 39 | - We have a deployment directory with kustomization. Before you applying the deployment please edit [base/kustomize.yaml](base/kustomization.yaml) file and update the below variables based on the values you have. 40 | 41 | - WATSONX_URL= 42 | - WX_PROJECT_ID= 43 | - IBM_CLOUD_API_KEY= 44 | - LLM_JUDGE_API_KEY=JudgeIt-Secret-Api-Key 45 | - WX_PLATFORM=saas 46 | - WX_USER= 47 | - CELERY_BROKER_URL=redis://redis:6379/0 48 | - CELERY_RESULT_BACKEND=redis://redis:6379/0 49 | - SERVER_URL= 50 | - MONGO_URL= 51 | - MONGO_USER= 52 | - MONGO_PASS= 53 | - MONGO_DB="judgeit_app" 54 | 55 | ```yaml 56 | kind: Kustomization 57 | images: 58 | - name: backend-image-name 59 | newName: image-registry.openshift-image-registry.svc:5000/llm-judge-dev/backend 60 | newTag: v1.0 61 | secretGenerator: 62 | - name: llm-judge-secret 63 | literals: 64 | - WATSONX_URL= 65 | - WX_PROJECT_ID= 66 | - IBM_CLOUD_API_KEY= 67 | - LLM_JUDGE_API_KEY=JudgeIt-Secret-Api-Key 68 | - WX_PLATFORM=saas 69 | - WX_USER= 70 | - CELERY_BROKER_URL=redis://redis:6379/0 71 | - CELERY_RESULT_BACKEND=redis://redis:6379/0 72 | - SERVER_URL= 73 | - MONGO_URL= 74 | - MONGO_USER= 75 | - MONGO_PASS= 76 | - MONGO_DB="judgeit_app" 77 | resources: 78 | - redis/ 79 | - celery-worker/ 80 | - flower/ 81 | - rest-app/ 82 | ``` 83 | 84 | - Apply the deployment 85 | 86 | ```sh 87 | oc apply -k base/ 88 | ``` 89 | 90 | - Monitor the deployment 91 | 92 | ```sh 93 | watch oc get deployments,pods 94 | ``` 95 | 96 | - Test 97 | 98 | Copy the url from the command executed below and paste it in the browser. 99 | 100 | ```sh 101 | oc get routes/llm-judge-backend -o jsonpath='https://{.spec.host}/docs{"\n"}' 102 | ``` 103 | 104 | - Clean up 105 | 106 | ```sh 107 | oc delete -k base/ 108 | ``` 109 | -------------------------------------------------------------------------------- /REST-Service/app/src/services/answer_similarity.py: -------------------------------------------------------------------------------- 1 | from langchain_core.prompts import PromptTemplate 2 | 3 | ## Grading a generated text compared to a golden text 4 | SIMILARITY_PROMPT= """Follow these structured steps to accurately assess the similarity between a Golden Text and a Generated Text: 5 | 1. **Role and Task**: Assume the role of an impartial assistant and evaluator. Your task is to assess the similarity between a Golden Text and a Generated Text using the provided information. 6 | 2. **Initial Setup**: Begin by carefully reviewing the Golden Text to understand the key information, entities, and intents it contains. The Golden Text is considered fully correct and comprehensive. Then, examine the Generated Text that needs evaluation. 7 | 3. **Evaluation Criteria**: Evaluate the Generated Text based on the following criteria: 8 | - Output {{"Grade": "1"}} if: 9 | a) The Generated Text matches the Golden Text closely in terms of key entities and intents. Note that these may be worded differently but convey the same meaning. 10 | b) The Generated Text contains all the essential information from the Golden Text, even if presented in a different order or with slight variations in phrasing. 11 | c) The Generated Text includes the core information from the Golden Text and may contain additional relevant details or expansions that don't contradict the original. 12 | - Output {{"Grade": "0"}} if: 13 | a) The Generated Text is missing critical entities or intents that are present in the Golden Text. 14 | b) The Generated Text contains significant factual errors or contradictions when compared to the Golden Text. 15 | c) The overall meaning or intent of the Generated Text substantially differs from the Golden Text. 16 | 4. **Tolerance for Minor Differences**: Allow for minor differences in numerical values, slight variations in proper nouns, and small discrepancies in less critical details, as long as the core meaning and primary facts remain intact. 17 | 5. **Explanation**: After providing the grade, explain your reasoning in 1 sentence, highlighting key similarities or differences that influenced your decision. 18 | 6. **Output Format**: Format your evaluation output strictly as {{"Grade": "evaluated grade", "Explanation": "explanation for grade"}} to ensure clarity and consistency in assessment. 19 | Remember, the goal is to identify substantive similarity rather than expecting word-for-word matches. Focus on the core information, key facts, and overall intent when making your assessment. 20 | 21 | Input: 22 | Golden Text: {prompt_parameter_1} 23 | Generated Text: {prompt_parameter_2} 24 | 25 | Output: 26 | """ 27 | 28 | def build_query_similarity_prompt(row): 29 | input_variables = ['prompt_parameter_1', 'prompt_parameter_2'] 30 | prompt = PromptTemplate(input_variables=input_variables, template=SIMILARITY_PROMPT) 31 | # create invoke parameter which is a dictionary of your prompt parameters 32 | prompt_data = {'prompt_parameter_1': row['golden_text'], 33 | 'prompt_parameter_2': row['generated_text']} 34 | 35 | return prompt, prompt_data -------------------------------------------------------------------------------- /JudgeIt-App/components/globals/BarChart.jsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { Bar } from 'react-chartjs-2'; 3 | import { Chart as ChartJS, CategoryScale, LinearScale, BarElement, Title, Tooltip, Legend } from 'chart.js'; 4 | import { API_TYPE_RATING, API_TYPE_SIMILARITY, API_TYPE_MULTITURN } from "@/services/Config"; 5 | import ChartDataLabels from 'chartjs-plugin-datalabels'; 6 | 7 | ChartJS.register(CategoryScale, LinearScale, BarElement, Title, Tooltip, Legend, ChartDataLabels); 8 | 9 | const BarChart = ({ gradeData, gradeType }) => { 10 | const totalCount = Object.values(gradeData).reduce((sum, count) => sum + count, 0); 11 | 12 | const mapGradeLabels = (label) => { 13 | const labelMaps = { 14 | [API_TYPE_RATING]: { 15 | '1': 'Incorrect', 16 | '2': 'Partially Correct', 17 | '3': 'Correct' 18 | }, 19 | [API_TYPE_SIMILARITY]: { 20 | '0': 'Incorrect', 21 | '1': 'Correct' 22 | }, 23 | [API_TYPE_MULTITURN]: { 24 | '0': 'Incorrect', 25 | '1': 'Correct' 26 | } 27 | }; 28 | 29 | return labelMaps[gradeType]?.[label] || label; 30 | }; 31 | 32 | const data = { 33 | labels: Object.keys(gradeData).map(mapGradeLabels), 34 | datasets: [ 35 | { 36 | label: 'Count', 37 | data: Object.values(gradeData), 38 | backgroundColor: 'rgba(144, 202, 249, 0.6)', 39 | borderColor: 'rgba(144, 202, 249, 1)', 40 | borderWidth: 1, 41 | }, 42 | ], 43 | }; 44 | 45 | const options = { 46 | responsive: true, 47 | maintainAspectRatio: false, 48 | scales: { 49 | x: { 50 | title: { 51 | display: true, 52 | text: 'JudgeIt Score', 53 | font: { 54 | size: 14, 55 | weight: 'bold', 56 | }, 57 | }, 58 | }, 59 | y: { 60 | title: { 61 | display: true, 62 | text: 'Count', 63 | font: { 64 | size: 14, 65 | weight: 'bold', 66 | }, 67 | }, 68 | beginAtZero: true, 69 | }, 70 | }, 71 | plugins: { 72 | tooltip: { 73 | callbacks: { 74 | label: (context) => { 75 | const count = context.raw; 76 | const percentage = ((count / totalCount) * 100).toFixed(2); 77 | return `Count: ${count} (${percentage}%)`; 78 | }, 79 | }, 80 | }, 81 | datalabels: { 82 | color: 'black', // Label color 83 | anchor: 'end', // Positioning of the label 84 | align: 'top', // Align the label at the top 85 | font: { 86 | weight: 'bold', 87 | size: 12, 88 | }, 89 | formatter: (value) => "Count: " + value, // Format the value as you want 90 | }, 91 | }, 92 | }; 93 | 94 | return ( 95 |
96 | 97 |
98 | ); 99 | }; 100 | 101 | export default BarChart; -------------------------------------------------------------------------------- /JudgeIt-App/components/judge/DisplayRequestHistorySingleTurn.jsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { Grid, Paper, Box, CircularProgress } from "@mui/material"; 3 | 4 | const DisplayRequestHistorySingleTurn = ({ serverData }) => { 5 | return ( 6 | <> 7 | 8 | 14 | 15 | 16 | Experiment name: 17 | 18 | 19 | {serverData.experiment_name} 20 | 21 | 22 | 23 | Request type: 24 | 25 | 26 | {serverData.eval_type} 27 | 28 | 29 | 30 | Previous question: 31 | 32 | 33 | {serverData.content.query.previous_question} 34 | 35 | 36 | 37 | Previous answer: 38 | 39 | 40 | {serverData.content.query.previous_answer} 41 | 42 | 43 | Current question: 44 | 45 | 46 | {serverData.content.query.current_question} 47 | 48 | 49 | Golden rewritten question: 50 | 51 | 52 | {serverData.content.query.golden_rewritten_question} 53 | 54 | 55 | Rewritten question: 56 | 57 | 58 | {serverData.content.query.rewritten_question} 59 | 60 | 61 | Model: 62 | 63 | 64 | {serverData.content.query.model} 65 | 66 | 67 | 68 | 69 | 70 | 71 | 77 | 78 | 79 | Grade: 80 | 81 | 82 | {serverData.content.result.Grade || serverData.content.result.judgeit_score} 83 | 84 | 85 | 86 | 87 | 88 | ); 89 | }; 90 | 91 | export default DisplayRequestHistorySingleTurn; 92 | -------------------------------------------------------------------------------- /JudgeIt-App/components/judge/SingleTurnForm.jsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import React from "react"; 3 | import { TextField, Box } from "@mui/material"; 4 | 5 | const SingleTurnForm = ({ 6 | values, 7 | handleChange, 8 | handleBlur, 9 | errors, 10 | touched, 11 | }) => { 12 | return ( 13 |
14 | 15 | 25 | 26 | 27 | 39 | 40 | 41 | 51 | 52 | 53 | 65 | 66 | 67 | 79 | 80 |
81 | ); 82 | }; 83 | 84 | export default SingleTurnForm; 85 | -------------------------------------------------------------------------------- /JudgeIt-App/components/judge/DisplayRequestHistoryRatingSimilarity.jsx: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import { Grid, Box } from "@mui/material"; 3 | import { 4 | API_TYPE_RATING, 5 | API_TYPE_SIMILARITY, 6 | grade_map_rating, 7 | grade_map_similarity, 8 | } from "@/services/Config"; 9 | 10 | const DisplayRequestHistoryRatingSimilarity = ({ serverData }) => { 11 | return ( 12 | <> 13 | 14 | 20 | 21 | 22 | Experiment name: 23 | 24 | 25 | {serverData.experiment_name} 26 | 27 | 28 | 29 | Request type: 30 | 31 | 32 | {serverData.eval_type} 33 | 34 | 35 | Question: 36 | 37 | 38 | {serverData.content.query.question} 39 | 40 | 41 | Golden Text: 42 | 43 | 44 | {serverData.content.query.golden_text} 45 | 46 | 47 | 48 | LLM Response: 49 | 50 | 51 | {serverData.content.query.generated_text} 52 | 53 | 54 | Model: 55 | 56 | 57 | {serverData.content.query.model} 58 | 59 | 60 | 61 | 62 | 63 | 64 | 70 | 71 | 72 | JudgeIt Score: 73 | 74 | {API_TYPE_RATING === serverData.eval_type && ( 75 | 76 | {grade_map_rating[serverData.content.result.Grade]} 77 | 78 | )} 79 | {API_TYPE_SIMILARITY === serverData.eval_type && ( 80 | 81 | {grade_map_similarity[serverData.content.result.Grade]} 82 | 83 | )} 84 | 85 | 86 | JudgeIt Reasoning: 87 | 88 | 89 | {serverData.content.result.Explanation} 90 | 91 | 92 | 93 | 94 | 95 | ); 96 | }; 97 | 98 | export default DisplayRequestHistoryRatingSimilarity; 99 | -------------------------------------------------------------------------------- /Framework/main.py: -------------------------------------------------------------------------------- 1 | from answer_similarity import batch_llm_answer_similarity 2 | from answer_rating import batch_llm_answer_rating 3 | from multi_turn_eval import batch_llm_multi_turn_eval 4 | 5 | import pandas as pd 6 | import json 7 | import configparser 8 | 9 | import chardet 10 | 11 | config = configparser.ConfigParser() 12 | config.read('./config.ini') 13 | 14 | ## Setup the filename and values 15 | home_dir = config['Default']['home_dir'] 16 | input_file_name = config['Default']['input_file_name'] 17 | output_file_name = config['Default']['output_file_name'] 18 | model_id = config['Default']['model_id'] 19 | judge_type = config['Default']['judge_type'] 20 | 21 | input_file = home_dir + input_file_name 22 | output_file = home_dir + output_file_name 23 | 24 | def read_data(input_file): 25 | ## Read the data for batch processing 26 | data_df = pd.DataFrame() 27 | if '.xlsx' in input_file: 28 | data_df = pd.read_excel(input_file) 29 | elif '.csv' in input_file: 30 | with open(input_file, 'rb') as f: 31 | result = chardet.detect(f.read()) 32 | data_df = pd.read_csv(input_file, encoding=result['encoding']) 33 | return data_df 34 | 35 | def write_data(data_df): 36 | ## save the output 37 | if '.xlsx' in output_file: 38 | # write the dataframe to an excel file 39 | writer = pd.ExcelWriter(output_file, engine='xlsxwriter') 40 | data_df.to_excel(writer, index=False, sheet_name='Sheet1') 41 | workbook = writer.book 42 | worksheet = writer.sheets['Sheet1'] 43 | cell_format = workbook.add_format({'text_wrap': True, 'valign': 'top', 'align': 'left'}) 44 | for i, column in enumerate(data_df.columns): 45 | worksheet.set_column(i, i, 40, cell_format) 46 | worksheet.set_column(3, 3, 70, cell_format) 47 | writer.close() 48 | elif '.csv' in output_file: 49 | data_df.to_csv(output_file) 50 | print("File saved in /JudgeIt-LLM-as-a-Judge/Framework/data/output") 51 | 52 | 53 | def batch_llm_multi_turn_eval_caller(input_file): 54 | input_data = read_data(input_file) 55 | output_data = batch_llm_multi_turn_eval(model_id, input_data) 56 | write_data(output_data) 57 | return output_data 58 | 59 | def batch_llm_answer_similarity_caller(input_file): 60 | input_data = read_data(input_file) 61 | output_data = batch_llm_answer_similarity(model_id, input_data) 62 | write_data(output_data) 63 | return output_data 64 | 65 | def batch_llm_answer_rating_caller(input_file): 66 | input_data = read_data(input_file) 67 | output_data = batch_llm_answer_rating(model_id, input_data) 68 | write_data(output_data) 69 | return output_data 70 | 71 | def processing(judge_type): 72 | if judge_type == 'multi_turn_eval': 73 | batch_llm_multi_turn_eval_caller(input_file) 74 | elif judge_type == 'rag_eval_answer_similarity': 75 | batch_llm_answer_similarity_caller(input_file) 76 | elif judge_type == 'rag_eval_answer_rating': 77 | batch_llm_answer_rating_caller(input_file) 78 | 79 | 80 | 81 | 82 | processing(judge_type) 83 | ## all options basis of tabs 84 | #processing('rating','batch') 85 | # processing('rating','simple') 86 | #processing('similarity','batch') 87 | #processing('similarity','simple') 88 | #processing('multi_turn') -------------------------------------------------------------------------------- /REST-Service/app/src/services/LLMJudgeService.py: -------------------------------------------------------------------------------- 1 | from langchain_ibm import WatsonxLLM 2 | from app.src.services.answer_similarity import build_query_similarity_prompt 3 | from app.src.services.answer_rating import build_query_rating_prompt 4 | import json 5 | from app.src.services.single_turn_eval import build_single_turn_prompt 6 | from app.src.services.mult_turn_with_conversation_eval import build_multi_turn_prompt 7 | 8 | class LLMJudgeService: 9 | 10 | def __init__(self) -> None: 11 | pass 12 | 13 | def simple_processing_rating(self, golden_text: str, generated_text:str, llm_model: WatsonxLLM): 14 | 15 | prompt, prompt_data = build_query_rating_prompt(row={ 16 | "golden_text": golden_text, 17 | "generated_text": generated_text 18 | }) 19 | 20 | llm_chain = prompt | llm_model 21 | prompt_results = llm_chain.invoke(prompt_data) 22 | return json.loads(prompt_results) 23 | 24 | def simple_processing_similarity_answer(self, golden_text: str, generated_text:str, llm_model: WatsonxLLM): 25 | 26 | prompt, prompt_data = build_query_similarity_prompt(row={ 27 | "golden_text": golden_text, 28 | "generated_text": generated_text 29 | }) 30 | 31 | llm_chain = prompt | llm_model 32 | 33 | prompt_results = llm_chain.invoke(prompt_data) 34 | prompt_results = prompt_results.replace("\"1\" or \"0\"", "\"0\"") 35 | return json.loads(prompt_results) 36 | 37 | def single_trun_llm_judge(self, 38 | previous_question: str, 39 | previous_answer: str, 40 | current_question: str, 41 | golden_rewritten_question: str, 42 | rewritten_question: str, 43 | llm_model: WatsonxLLM): 44 | 45 | prompt, prompt_data = build_single_turn_prompt(row={ 46 | "previous_question": previous_question, 47 | "previous_answer": previous_answer, 48 | "current_question": current_question, 49 | "golden_rewritten_question": golden_rewritten_question, 50 | "rewritten_question": rewritten_question 51 | }) 52 | llm_chain = prompt | llm_model 53 | prompt_results = {"Grade": None} 54 | try: 55 | prompt_results = json.loads(llm_chain.invoke(prompt_data)) 56 | except: 57 | prompt_results = prompt_results = { 58 | "Grade": "Error" 59 | } 60 | 61 | return prompt_results 62 | 63 | def multi_trun_llm_judge(self, 64 | conversation_history: str, 65 | follow_up_query: str, 66 | golden_query: str, 67 | rewritten_query: str, 68 | llm_model: WatsonxLLM): 69 | 70 | prompt, prompt_data = build_multi_turn_prompt(row={ 71 | "conversation_history": conversation_history, 72 | "follow_up_query": follow_up_query, 73 | "golden_query": golden_query, 74 | "rewritten_query": rewritten_query 75 | }) 76 | llm_chain = prompt | llm_model 77 | prompt_results = {"Grade": None} 78 | try: 79 | prompt_results = json.loads(llm_chain.invoke(prompt_data)) 80 | except: 81 | prompt_results = prompt_results = { 82 | "Grade": "Error" 83 | } 84 | 85 | return prompt_results 86 | 87 | -------------------------------------------------------------------------------- /REST-Service/app/src/services/answer_rating.py: -------------------------------------------------------------------------------- 1 | from langchain_core.prompts import PromptTemplate 2 | 3 | ## Grading a generated text compared to a golden text 4 | RATING_PROMPT = """Follow these structured steps to accurately assess the similarity between a Golden Text and a Generated Text: 5 | 1. **Role and Task**: Assume the role of an impartial assistant and evaluator. Your task is to assess the similarity between a Golden Text and a Generated Text using the provided information. 6 | 2. **Initial Setup**: Begin by carefully reviewing the Golden Text to understand the key information, entities, and intents it contains. The Golden Text is considered fully correct and comprehensive. Then, examine the Generated Text that needs evaluation. 7 | 3. **Evaluation Criteria**: Evaluate the Generated Text based on the following criteria: 8 | - Output {{"Grade": "1"}} if: 9 | a) The Generated Text is missing critical entities or intents that are present in the Golden Text. 10 | b) The Generated Text contains significant factual errors or contradictions when compared to the Golden Text. 11 | c) The overall meaning or intent of the Generated Text substantially differs from the Golden Text. 12 | - Output {{"Grade": "2"}} if: 13 | a) The Generated Text somewhat matches the Golden Text in terms of key entities and intents. Note that these may be worded differently but convey the same meaning. 14 | b) The Generated Text contains part of the essential information from the Golden Text, even if presented in a different order or with slight variations in phrasing. 15 | c) The Generated Text includes part the core information from the Golden Text and may contain additional relevant details or expansions that don't contradict the original. 16 | - Output {{"Grade": "3"}} if: 17 | a) The Generated Text matches the Golden Text closely in terms of key entities and intents. Note that these may be worded differently but convey the same meaning. 18 | b) The Generated Text contains all the essential information from the Golden Text, even if presented in a different order or with slight variations in phrasing. 19 | c) The Generated Text includes the core information from the Golden Text and may contain additional relevant details or expansions that don't contradict the original. 20 | 4. **Tolerance for Minor Differences**: Allow for minor differences in numerical values, slight variations in proper nouns, and small discrepancies in less critical details, as long as the core meaning and primary facts remain intact. 21 | 5. **Explanation**: After providing the grade, explain your reasoning in 1 sentence, highlighting key similarities or differences that influenced your decision. 22 | 6. **Output Format**: Format your evaluation output strictly as {{"Grade": "evaluated grade", "Explanation": "explanation for grade"}} to ensure clarity and consistency in assessment. 23 | Remember, the goal is to identify substantive similarity rather than expecting word-for-word matches. Focus on the core information, key facts, and overall intent when making your assessment. 24 | 25 | Input: 26 | Golden Text: {prompt_parameter_1} 27 | Generated Text: {prompt_parameter_2} 28 | 29 | Output: 30 | """ 31 | 32 | def build_query_rating_prompt(row): 33 | input_variables = ['prompt_parameter_1', 'prompt_parameter_2'] 34 | prompt = PromptTemplate(input_variables=input_variables, template=RATING_PROMPT) 35 | # create invoke parameter which is a dictionary of your prompt parameters 36 | prompt_data = {'prompt_parameter_1': row['golden_text'], 37 | 'prompt_parameter_2': row['generated_text']} 38 | 39 | return prompt, prompt_data -------------------------------------------------------------------------------- /REST-Service/chart/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: fastapi-app 5 | labels: 6 | app: fastapi-app 7 | spec: 8 | replicas: {{ .Values.replicaCount }} 9 | selector: 10 | matchLabels: 11 | app: fastapi-app 12 | template: 13 | metadata: 14 | labels: 15 | app: fastapi-app 16 | spec: 17 | containers: 18 | - name: fastapi-app 19 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 20 | imagePullPolicy: {{ .Values.image.pullPolicy }} 21 | ports: 22 | - containerPort: {{ .Values.service.fastapi.port }} 23 | env: 24 | - name: WATSONX_URL 25 | value: "{{ .Values.env.WATSONX_URL }}" 26 | - name: WX_PROJECT_ID 27 | value: "{{ .Values.env.WX_PROJECT_ID }}" 28 | - name: IBM_CLOUD_API_KEY 29 | value: "{{ .Values.env.IBM_CLOUD_API_KEY }}" 30 | - name: CELERY_BROKER_URL 31 | value: "{{ .Values.env.CELERY_BROKER_URL }}" 32 | - name: CELERY_RESULT_BACKEND 33 | value: "{{ .Values.env.CELERY_RESULT_BACKEND }}" 34 | 35 | --- 36 | 37 | apiVersion: apps/v1 38 | kind: Deployment 39 | metadata: 40 | name: redis 41 | labels: 42 | app: redis 43 | spec: 44 | replicas: {{ .Values.replicaCount }} 45 | selector: 46 | matchLabels: 47 | app: redis 48 | template: 49 | metadata: 50 | labels: 51 | app: redis 52 | spec: 53 | containers: 54 | - name: redis 55 | image: redis:7.2.5-alpine 56 | ports: 57 | - containerPort: {{ .Values.service.redis.port }} 58 | 59 | --- 60 | 61 | apiVersion: apps/v1 62 | kind: Deployment 63 | metadata: 64 | name: celery-worker 65 | labels: 66 | app: celery-worker 67 | spec: 68 | replicas: {{ .Values.replicaCount }} 69 | selector: 70 | matchLabels: 71 | app: celery-worker 72 | template: 73 | metadata: 74 | labels: 75 | app: celery-worker 76 | spec: 77 | containers: 78 | - name: celery-worker 79 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 80 | imagePullPolicy: {{ .Values.image.pullPolicy }} 81 | command: ["celery", "-A", "app.celery.celery_worker.celery", "worker", "--loglevel=info"] 82 | env: 83 | - name: WATSONX_URL 84 | value: "{{ .Values.env.WATSONX_URL }}" 85 | - name: WX_PROJECT_ID 86 | value: "{{ .Values.env.WX_PROJECT_ID }}" 87 | - name: IBM_CLOUD_API_KEY 88 | value: "{{ .Values.env.IBM_CLOUD_API_KEY }}" 89 | - name: CELERY_BROKER_URL 90 | value: "{{ .Values.env.CELERY_BROKER_URL }}" 91 | - name: CELERY_RESULT_BACKEND 92 | value: "{{ .Values.env.CELERY_RESULT_BACKEND }}" 93 | 94 | --- 95 | 96 | apiVersion: apps/v1 97 | kind: Deployment 98 | metadata: 99 | name: flower 100 | labels: 101 | app: flower 102 | spec: 103 | replicas: {{ .Values.replicaCount }} 104 | selector: 105 | matchLabels: 106 | app: flower 107 | template: 108 | metadata: 109 | labels: 110 | app: flower 111 | spec: 112 | containers: 113 | - name: flower 114 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 115 | imagePullPolicy: {{ .Values.image.pullPolicy }} 116 | command: ["celery", "--broker=redis://redis:6379/0", "flower", "--port=5555"] 117 | ports: 118 | - containerPort: {{ .Values.service.flower.port }} 119 | env: 120 | - name: CELERY_BROKER_URL 121 | value: "{{ .Values.env.CELERY_BROKER_URL }}" 122 | - name: CELERY_RESULT_BACKEND 123 | value: "{{ .Values.env.CELERY_RESULT_BACKEND }}" 124 | -------------------------------------------------------------------------------- /JudgeIt-App/components/globals/DrawerMenu.jsx: -------------------------------------------------------------------------------- 1 | import { Box, Toolbar, Typography } from "@mui/material"; 2 | import Drawer from "@mui/material/Drawer"; 3 | import List from "@mui/material/List"; 4 | import Divider from "@mui/material/Divider"; 5 | import ListItem from "@mui/material/ListItem"; 6 | import ListItemButton from "@mui/material/ListItemButton"; 7 | import ListItemIcon from "@mui/material/ListItemIcon"; 8 | import ListItemText from "@mui/material/ListItemText"; 9 | import HomeOutlinedIcon from "@mui/icons-material/HomeOutlined"; 10 | import LogoutOutlinedIcon from "@mui/icons-material/LogoutOutlined"; 11 | import { signOut } from "next-auth/react"; 12 | import GavelOutlinedIcon from "@mui/icons-material/GavelOutlined"; 13 | import BatchPredictionOutlinedIcon from "@mui/icons-material/BatchPredictionOutlined"; 14 | import HelpCenterOutlinedIcon from "@mui/icons-material/HelpCenterOutlined"; 15 | import { app_labels_and_config } from "@/services/Config"; 16 | 17 | const DrawerMenu = ({ 18 | open, 19 | handleDrawwerOpen, 20 | handleDrawwerClose, 21 | handleLogout, 22 | }) => { 23 | const list = () => ( 24 | 30 | 37 | 45 | {app_labels_and_config.app_title} 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | { 95 | signOut({ callbackUrl: "/" }); 96 | }} 97 | > 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | ); 107 | 108 | return ( 109 | 110 | {list()} 111 | 112 | ); 113 | }; 114 | 115 | export default DrawerMenu; 116 | -------------------------------------------------------------------------------- /JudgeIt-App/components/globals/LeftNavigation.jsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import { Sidebar, Menu, MenuItem, SubMenu } from "react-pro-sidebar"; 3 | import HomeOutlinedIcon from "@mui/icons-material/HomeOutlined"; 4 | import { Divider, Toolbar, Typography } from "@mui/material"; 5 | import LoginOutlinedIcon from "@mui/icons-material/LoginOutlined"; 6 | import LogoutOutlinedIcon from "@mui/icons-material/LogoutOutlined"; 7 | import CreateNewFolderOutlinedIcon from "@mui/icons-material/CreateNewFolderOutlined"; 8 | import Link from "next/link"; 9 | import GavelOutlinedIcon from "@mui/icons-material/GavelOutlined"; 10 | import BatchPredictionOutlinedIcon from "@mui/icons-material/BatchPredictionOutlined"; 11 | import { useSession, signIn, signOut } from "next-auth/react"; 12 | import { useEffect } from "react"; 13 | 14 | function LeftNavBar() { 15 | const { data: session, status } = useSession(); 16 | 17 | useEffect(() => { 18 | if ( 19 | status != "loading" && 20 | session && 21 | session?.error === "RefreshAccessTokenError" 22 | ) { 23 | signOut({ callbackUrl: "/" }); 24 | } 25 | }, [session, status]); 26 | 27 | return ( 28 | <> 29 | {session && ( 30 | 31 | 32 | LLM Judge 33 | {status === "loading" && ( 34 | Loading.. 35 | )} 36 | {session && ( 37 | 38 | Logged in as {session.user.email} 39 | 40 | )} 41 | 42 | 43 | 55 | } 57 | component={} 58 | > 59 | {" "} 60 | Home{" "} 61 | 62 | 63 | {session && ( 64 | }> 65 | {session && ( 66 | } 68 | component={} 69 | > 70 | Single{" "} 71 | 72 | )} 73 | {session && ( 74 | } 76 | component={} 77 | > 78 | Batch{" "} 79 | 80 | )} 81 | 82 | )} 83 | 84 | {!session && ( 85 | } 87 | onClick={() => signIn("auth0")} 88 | > 89 | Login 90 | 91 | )} 92 | 93 | {session && ( 94 | } 96 | onClick={() => { 97 | signOut({ callbackUrl: "/" }); 98 | }} 99 | > 100 | Logout 101 | 102 | )} 103 | 104 | 105 | )} 106 | 107 | ); 108 | } 109 | 110 | export default LeftNavBar; 111 | -------------------------------------------------------------------------------- /JudgeIt-App/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # JudgeIt Application 5 | 6 | One method of using JudgeIt is through a Service-Oriented Architecture (SOA). This directory contains the code for a React-based application that provides a user interface for interacting with the LLM Judge service. It is built on the Next.js framework and integrates with IBM App ID for authentication. There are three types of evaluation currently available: 7 | 8 | 1. **RAG Evaluation (Similarity)**: evaluate generated text against golden text 9 | 2. **RAG Evaluation (Rating)**: evaluate generated text against golden text 10 | 3. **Multi-turn evaluation**: evaluate rewritten queries given a mult-turn conversation 11 | 12 | The JudgeIt framework takes input data in the form of excel or csv files for any of these evaluations. 13 | 14 | ![LLM-Judges](/images/flow-diagram.png) 15 | 16 | 17 | ## Table of Contents 18 | 19 | - [Getting Started](#getting-started) 20 | - [Prerequisites](#prerequisites) 21 | - [Installation](#installation) 22 | - [Configuring your Input File](#configuring-your-input-file) 23 | - [Understanding the Results](#understanding-the-results) 24 | 25 | 26 | 27 | ## Getting Started 28 | 29 | ### Prerequisites 30 | 31 | The following prerequisites are required to run the tester: 32 | 33 | 1. [JudgeIt Backend REST Service](/REST-Service/README.md) is up and running 34 | 2. [Node.js](https://nodejs.org/en) v18 or higher 35 | 3. [IBM AppID](https://www.ibm.com/products/app-id) for application authentication 36 | 37 | ### Installation 38 | 39 | 1. Change directory into the JudgeIt App 40 | 41 | ```bash 42 | cd JudgeIt-LLM-as-a-Judge/JudgeIt-App 43 | ``` 44 | 45 | 2. Copy env file to .env 46 | 47 | ```bash 48 | cp env .env 49 | ``` 50 | 51 | 3. Configure your parameters in .env. Make sure `NEXT_PUBLIC_LLM_JUDGE_API_KEY` value matches with the value assigned in backend service. 52 | 53 | 4. Install dependencies 54 | 55 | ```bash 56 | npm install 57 | ``` 58 | 59 | 5. Run the development server 60 | 61 | ```bash 62 | npm run dev 63 | ``` 64 | 65 | 6. Open [http://localhost:3000](http://localhost:3000) with your browser to see the result. 66 | 67 | ## Configuring your Input File 68 | 69 | Each type of LLM Judge will accept an excel/csv file as an input file. The repository contains a sample input file for each type of LLM Judge that you can copy, edit, and use to test. They are located at: [JudgeIt-LLM-as-a-Judge/Framework/data/input](../Framework/data/input) 70 | 71 | 1. RAG Evaluation (Similarity): provide an excel/csv file with a `golden_text` column and `generated_text` column to compare 72 | 2. RAG Evaluation (Rating): provide an excel/csv file with a `golden_text` column and `generated_text` column to compare 73 | 3. Multi-turn Evaluation: provide an excel/csv file with the following columns: `previous_question`, `previous_answer`, `current_question`, `golden_rewritten_question`, and `rewritten_question` 74 | 75 | Note: Your input files can contain additional columns than the ones specified above. These columns will have no effect on the LLM Judge and will be perserved in the output file. 76 | 77 | ## Understanding the Results 78 | 79 | The generated results will be saved to an excel/csv file at the location specified in your config file. Each file will contain all the columns provided in the input file. 80 | 81 | 1. For RAG Evaluation (Similarity), the LLM Judge will output a `Grade` and `Explanation`. A grade of 0 means the texts are dissimilar, while a grade of 1 means the texts are similar. 82 | 2. For RAG Evaluation (Rating), the LLM Judge will output a `Grade` and `Explanation`. A grade of 1 means the texts are dissimilar, a grade of 2 means the texts are partially similar, and a text of 3 means the texts are significantly similar. 83 | 3. For Multi-turn Evaluation, the LLM Judge will output a `Grade`. A grade of 0 means the golden rewritten question and rewritten question are dissimilar, while a grade of 1 means the questions are similar. 84 | -------------------------------------------------------------------------------- /JudgeIt-App/components/judge/EvaluationTypeComponent.jsx: -------------------------------------------------------------------------------- 1 | import { 2 | FormControl, 3 | FormHelperText, 4 | RadioGroup, 5 | FormControlLabel, 6 | Radio, 7 | FormLabel, 8 | } from "@mui/material"; 9 | import { 10 | API_TYPE_MULTITURN, 11 | API_TYPE_RATING, 12 | API_TYPE_SIMILARITY, 13 | API_TYPE_WBOX_SDR, 14 | API_TYPE_BBOX_SDR, 15 | API_TYPE_KEY, 16 | API_TYPE_SINGLETURN, 17 | API_TYPE_AGENT, 18 | } from "@/services/Config"; 19 | import EvaluationTypeLabel from "@/components/judge/EvaluationTypeLabel"; 20 | 21 | const EvaluationTypeComponent = ({ 22 | values, 23 | handleChange, 24 | handleBlur, 25 | errors, 26 | touched, 27 | api_call_inprogress 28 | }) => { 29 | return ( 30 |
31 | {" "} 32 | 37 | 38 | Evaluation Type 39 | 40 | 48 | } 51 | label={ 52 | 56 | } 57 | /> 58 | } 61 | label={ 62 | 66 | } 67 | /> 68 | } 71 | label={ 72 | 76 | } 77 | /> 78 | } 81 | label={ 82 | 86 | } 87 | /> 88 | } 91 | label={ 92 | 96 | } 97 | /> 98 | } 101 | label={ 102 | 106 | } 107 | /> 108 | } 111 | label={ 112 | 116 | } 117 | /> 118 | 119 | 120 | {touched.apiType && errors.apiType && ( 121 | {errors.apiType} 122 | )} 123 | 124 |
125 | ); 126 | }; 127 | 128 | export default EvaluationTypeComponent; 129 | -------------------------------------------------------------------------------- /JudgeIt-App/components/globals/SingleInstructions.jsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import { Box, Typography, Button } from "@mui/material"; 3 | import GavelOutlinedIcon from "@mui/icons-material/GavelOutlined"; 4 | 5 | function SingleInstructions() { 6 | return ( 7 | 8 | 15 | 16 | Single Answer Evaluation Instructions 17 | 18 | 19 | 26 | Evaluate a single input using different LLM Judge types. 27 | 28 |
    29 |
  1. 30 | 38 | RAG Evaluation (Similarity): 39 | 40 |
      41 |
    • 42 | Function: Compare a golden text to a generated text 43 |
    • 44 |
    • 45 | Input: Provide the following: 46 |
    • 47 | 48 |
        49 |
      • golden text
      • 50 |
      • generated text
      • 51 |
      52 |
    • 53 | Output: The LLM Judge will output a Grade and Explanation. 54 | A grade of 0 means the texts are dissimilar, while a grade of 1 55 | means the texts are similar. 56 |
    • 57 |
    58 |
  2. 59 | 60 |
  3. 61 | 69 | RAG Evaluation (Rating): 70 | 71 |
      72 |
    • 73 | Function: Compare a golden text to a generated text 74 |
    • 75 |
    • 76 | Input: Provide the following: 77 |
    • 78 |
        79 |
      • golden text
      • 80 |
      • generated text
      • 81 |
      82 |
    • 83 | Output: The LLM Judge will output a Grade and Explanation. 84 | A grade of 1 means the texts are dissimilar, a grade of 2 means 85 | the texts are partially similar, and a text of 3 means the texts 86 | are significantly similar 87 |
    • 88 |
    89 |
  4. 90 | 91 |
  5. 92 | 100 | Multi-turn Evaluation: 101 | 102 |
      103 |
    • 104 | Function: Compare a golden rewritten query to a rewritten 105 | query based on a multi-turn conversation 106 |
    • 107 |
    • 108 | Input: Provide the following: 109 |
    • 110 |
        111 |
      • previous question
      • 112 |
      • previous answer
      • 113 |
      • current question
      • 114 |
      • golden rewritten question
      • 115 |
      • rewritten question
      • 116 |
      117 |
    • 118 | Output: The LLM Judge will output a Grade and Explanation. 119 | A grade of 0 means the texts are dissimilar, while a grade of 1 120 | means the texts are similar. 121 |
    • 122 |
    123 |
  6. 124 |
125 | 128 |
129 | ); 130 | } 131 | 132 | export default SingleInstructions; 133 | -------------------------------------------------------------------------------- /REST-Service/app/src/services/ManagementService.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import Any, Dict 3 | from app.src.models.RequestHistory import RequestHistory 4 | from app.src.models.Experiment import Experiment 5 | from app.src.services.MongoService import MongoService 6 | from bson.json_util import dumps, loads 7 | from bson.objectid import ObjectId 8 | 9 | class ManagementService: 10 | 11 | def __init__(self, mongo_db: MongoService) -> None: 12 | self.experiment_collection = mongo_db.get_experiment_collection() 13 | self.history_collection = mongo_db.get_request_history_collection() 14 | 15 | def get_experiments(self, user_id): 16 | cursor = self.experiment_collection.find({ "user_id": user_id }) 17 | experiments = [self.bson_to_dict(doc) for doc in cursor] 18 | return experiments 19 | 20 | def get_experiments_by_type(self, user_id: str, type: str): 21 | cursor = self.experiment_collection.find({ "user_id": user_id, "type": type }) 22 | experiments = [self.bson_to_dict(doc) for doc in cursor] 23 | return experiments 24 | 25 | def get_experiment_by_name(self, user_id: str, name: str): 26 | cursor = self.experiment_collection.find_one({ "user_id": user_id, "name": name }) 27 | if cursor is not None: 28 | return self.bson_to_dict(cursor) 29 | return None 30 | 31 | def get_experiment_by_name_and_type(self, user_id: str, name: str, type: str): 32 | cursor = self.experiment_collection.find_one({ "user_id": user_id, "name": name, "type": type }) 33 | if cursor is not None: 34 | return self.bson_to_dict(cursor) 35 | return None 36 | 37 | def get_history_by_id(self, user_id: str, doc_id: str): 38 | object_id = ObjectId(doc_id) 39 | cursor = self.history_collection.find_one({"user_id": user_id, "_id": object_id}) 40 | return self.bson_to_dict(cursor) 41 | 42 | def get_histories(self, user_id): 43 | cursor = self.history_collection.find({ "user_id": user_id }) 44 | histories = [self.bson_to_dict(doc) for doc in cursor] 45 | return histories 46 | 47 | def get_histories_by_type(self, user_id: str, type: str): 48 | projection = {'content': 0} 49 | cursor = self.history_collection.find({ "user_id": user_id, "type": type }, projection ) 50 | histories = [self.bson_to_dict(doc) for doc in cursor] 51 | return histories 52 | 53 | def get_histories_by_experiment_name(self, user_id, experiment_name): 54 | cursor = self.history_collection.find({ "user_id": user_id, "experiment_name": experiment_name }) 55 | histories = [self.bson_to_dict(doc) for doc in cursor] 56 | return histories 57 | 58 | def get_histories_by_experiment_name_type(self, user_id: str, experiment_name: str, type: str): 59 | query: dict = { "user_id": user_id, "experiment_name": experiment_name, "type": type } 60 | cursor = self.history_collection.find(query) 61 | print("calling here", query) 62 | histories = [self.bson_to_dict(doc) for doc in cursor] 63 | return histories 64 | 65 | def add_experiment(self, experiment: Experiment): 66 | input = experiment.model_dump() 67 | insertion = self.experiment_collection.insert_one(input) 68 | return str(insertion.inserted_id) 69 | 70 | def add_history(self, request_history: RequestHistory) -> str: 71 | input = request_history.model_dump() 72 | insertion = self.history_collection.insert_one(input) 73 | return str(insertion.inserted_id) 74 | 75 | def delete_experiment(self, doc_id: str, user_id): 76 | object_id = ObjectId(doc_id) 77 | result = self.experiment_collection.delete_one({"_id": object_id, "user_id": user_id}) 78 | return result.deleted_count 79 | 80 | def delete_experiment_by_name(self, experiment_name, user_id): 81 | ## Delete all document under experiment name in request history collection 82 | self.history_collection.delete_many({"experiment_name": experiment_name, "user_id": user_id}) 83 | ## Delete from experiment collections 84 | result = self.experiment_collection.delete_one({"name": experiment_name, "user_id": user_id}) 85 | return result.deleted_count 86 | 87 | def delete_history(self, doc_id: str, user_id:str): 88 | object_id = ObjectId(doc_id) 89 | result = self.history_collection.delete_one({"_id": object_id, "user_id": user_id}) 90 | return result.deleted_count 91 | 92 | # Function to convert BSON document to a dictionary 93 | def bson_to_dict(self, bson_doc) -> Dict[str, Any]: 94 | # Convert ObjectId to string and return as dictionary 95 | doc = bson_doc.copy() # Create a copy to avoid modifying the original 96 | doc['_id'] = str(doc['_id']) # Convert ObjectId to string 97 | return doc -------------------------------------------------------------------------------- /JudgeIt-App/app/pages/single/doc/[doc_id]/page.js: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import { useParams } from "next/navigation"; 3 | import { useSession } from "next-auth/react"; 4 | import { Grid, Box, Button, Typography, CircularProgress } from "@mui/material"; 5 | import EvaluationHistoryLeftBar from "@/components/judge/EvaluationHistoryLeftBar"; 6 | import { useEffect, useRef, useState } from "react"; 7 | import { fetch_request_history_by_id } from "@/services/ManagemenBackendAPI"; 8 | import { 9 | API_TYPE_SINGLETURN, 10 | API_TYPE_MULTITURN, 11 | API_TYPE_RATING, 12 | API_TYPE_SIMILARITY, 13 | } from "@/services/Config"; 14 | import DisplayRequestHistoryRatingSimilarity from "@/components/judge/DisplayRequestHistoryRatingSimilarity"; 15 | import DisplayRequestHistorySingleTurn from "@/components/judge/DisplayRequestHistorySingleTurn"; 16 | import ArrowBackOutlinedIcon from "@mui/icons-material/ArrowBackOutlined"; 17 | import Footer from "@/components/globals/Footer"; 18 | import DisplayRequestHistoryMultiTurnConversation from "@/components/judge/DisplayRequestHistoryMultiTurn"; 19 | 20 | const ItemPage = () => { 21 | const params = useParams(); 22 | const { data: session, status } = useSession(); 23 | const hasEffectRun = useRef(false); 24 | const [serverData, setServerData] = useState(null); 25 | const { doc_id } = params; // Get the 'id' from the URL 26 | 27 | useEffect(() => { 28 | if (hasEffectRun.current) { 29 | return; // Prevents the effect from running again 30 | } 31 | 32 | const fetch_data = async () => { 33 | const data = await fetch_request_history_by_id( 34 | session.user.email, 35 | doc_id 36 | ); 37 | setServerData(data); 38 | }; 39 | 40 | if (session?.user.email) { 41 | fetch_data(); 42 | hasEffectRun.current = true; 43 | } 44 | }, [session?.user.email, doc_id]); // Empty dependency array, runs only once 45 | 46 | if (status === "loading") { 47 | return ( 48 |
56 | 57 |
58 | ); 59 | } 60 | 61 | return ( 62 | <> 63 | 64 | 65 | 66 | 67 | 68 | {session && serverData && ( 69 | 70 | 71 | 77 | 78 | 83 | 92 | Single Answer Evaluation: {serverData.name} 93 | 94 | 101 | 102 | 103 | {(API_TYPE_RATING === serverData.eval_type || 104 | API_TYPE_SIMILARITY === serverData.eval_type) && ( 105 | 108 | )} 109 | {API_TYPE_SINGLETURN === serverData.eval_type && ( 110 | 111 | )} 112 | {API_TYPE_MULTITURN === serverData.eval_type && ( 113 | 114 | )} 115 | 116 | 117 |