├── README.md
├── data
    ├── benchmark_questions.jsonl
    ├── final_model_responses
    │   ├── claude-3-5-sonnet-20241022_responses.jsonl
    │   ├── gemini-1.5-pro-002_responses.jsonl
    │   ├── gpt-4o-2024-08-06_responses.jsonl
    │   ├── llama-3-2-3b-instruct.jsonl
    │   ├── llama-3-3-70b-instruct.jsonl
    │   ├── llama3-1-405b-instruct-v1_responses.jsonl
    │   ├── mistral-large-latest_responses.jsonl
    │   ├── mixtral-8x7b-instruct.jsonl
    │   ├── o1-preview_responses.jsonl
    │   ├── qwen2-5_14b.jsonl
    │   ├── qwen2-5_72b.jsonl
    │   └── qwen2_72b.jsonl
    └── response_template.jsonl
├── main.py
├── requirements.txt
└── src
    ├── conversation.py
    ├── data_loader.py
    ├── evaluator.py
    ├── models
        ├── base.py
        ├── factory.py
        ├── huggingface.py
        └── openai.py
    └── result_parser.py


/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/README.md


--------------------------------------------------------------------------------
/data/benchmark_questions.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/data/benchmark_questions.jsonl


--------------------------------------------------------------------------------
/data/final_model_responses/claude-3-5-sonnet-20241022_responses.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/data/final_model_responses/claude-3-5-sonnet-20241022_responses.jsonl


--------------------------------------------------------------------------------
/data/final_model_responses/gemini-1.5-pro-002_responses.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/data/final_model_responses/gemini-1.5-pro-002_responses.jsonl


--------------------------------------------------------------------------------
/data/final_model_responses/gpt-4o-2024-08-06_responses.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/data/final_model_responses/gpt-4o-2024-08-06_responses.jsonl


--------------------------------------------------------------------------------
/data/final_model_responses/llama-3-2-3b-instruct.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/data/final_model_responses/llama-3-2-3b-instruct.jsonl


--------------------------------------------------------------------------------
/data/final_model_responses/llama-3-3-70b-instruct.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/data/final_model_responses/llama-3-3-70b-instruct.jsonl


--------------------------------------------------------------------------------
/data/final_model_responses/llama3-1-405b-instruct-v1_responses.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/data/final_model_responses/llama3-1-405b-instruct-v1_responses.jsonl


--------------------------------------------------------------------------------
/data/final_model_responses/mistral-large-latest_responses.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/data/final_model_responses/mistral-large-latest_responses.jsonl


--------------------------------------------------------------------------------
/data/final_model_responses/mixtral-8x7b-instruct.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/data/final_model_responses/mixtral-8x7b-instruct.jsonl


--------------------------------------------------------------------------------
/data/final_model_responses/o1-preview_responses.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/data/final_model_responses/o1-preview_responses.jsonl


--------------------------------------------------------------------------------
/data/final_model_responses/qwen2-5_14b.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/data/final_model_responses/qwen2-5_14b.jsonl


--------------------------------------------------------------------------------
/data/final_model_responses/qwen2-5_72b.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/data/final_model_responses/qwen2-5_72b.jsonl


--------------------------------------------------------------------------------
/data/final_model_responses/qwen2_72b.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/data/final_model_responses/qwen2_72b.jsonl


--------------------------------------------------------------------------------
/data/response_template.jsonl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/data/response_template.jsonl


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/main.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/requirements.txt


--------------------------------------------------------------------------------
/src/conversation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/src/conversation.py


--------------------------------------------------------------------------------
/src/data_loader.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/src/data_loader.py


--------------------------------------------------------------------------------
/src/evaluator.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/src/evaluator.py


--------------------------------------------------------------------------------
/src/models/base.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/src/models/base.py


--------------------------------------------------------------------------------
/src/models/factory.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/src/models/factory.py


--------------------------------------------------------------------------------
/src/models/huggingface.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/src/models/huggingface.py


--------------------------------------------------------------------------------
/src/models/openai.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/src/models/openai.py


--------------------------------------------------------------------------------
/src/result_parser.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ekwinox117/multi-challenge/HEAD/src/result_parser.py


--------------------------------------------------------------------------------