├── .github └── workflows │ ├── jekyll-gh-pages.yml │ ├── python-app.yml │ └── scorecard.yml ├── .gitignore ├── .gitmodules ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── README.md ├── _config.yml ├── actionrecognition ├── Dockerfile ├── actionrec.py ├── labels │ ├── label_map_kinetics400.txt │ ├── label_map_kinetics600.txt │ ├── label_map_kinetics700.txt │ ├── label_map_sthv1.txt │ ├── label_map_sthv2.txt │ └── label_map_ucf101.txt └── requirements.txt ├── cognitivesynergy ├── CognitiveSynergyAgent.py ├── CognitiveSynergyAgentManager.py ├── CognitiveSynergyEngine.py ├── Dockerfile ├── README.md ├── debug1 ├── main_detection.py └── requirements.txt ├── config ├── .env └── .env.actionrec ├── dashboards ├── FrameRate.json ├── GPUmetrics.json └── Latency.json ├── data ├── basketball.mp4 ├── liverpool.mp4 ├── race.mp4 ├── street-25fps.mp4 └── street_30fps.mp4 ├── docker-compose.yml ├── docs ├── architecture.md ├── changelog.md ├── cicd.md ├── essentials.md ├── getstarted.md ├── images │ ├── architecture.png │ ├── getstarts.gif │ └── overview.png ├── index.md └── intro.md ├── grafana ├── Dockerfile ├── dashboards │ ├── framerate.json │ └── main.json ├── grafana.ini └── provisioning │ ├── dashboards │ └── dashboard.yml │ └── datasources │ └── redis.yml ├── mkdocs.yml ├── producer.py ├── recallm ├── README.md ├── benchmarks │ └── duoRC_50_percent │ │ ├── aggregate_score.json │ │ ├── test-0.json │ │ ├── test-0_score_gpt.json │ │ ├── test-1.json │ │ ├── test-10.json │ │ ├── test-100.json │ │ ├── test-100_score_gpt.json │ │ ├── test-101.json │ │ ├── test-101_score_gpt.json │ │ ├── test-102.json │ │ ├── test-102_score_gpt.json │ │ ├── test-103.json │ │ ├── test-103_score_gpt.json │ │ ├── test-104.json │ │ ├── test-104_score_gpt.json │ │ ├── test-105.json │ │ ├── test-105_score_gpt.json │ │ ├── test-106.json │ │ ├── test-106_score_gpt.json │ │ ├── test-107.json │ │ ├── test-107_score_gpt.json │ │ ├── test-108.json │ │ ├── test-108_score_gpt.json │ │ ├── test-109.json │ │ ├── test-109_score_gpt.json │ │ ├── test-10_score_gpt.json │ │ ├── test-11.json │ │ ├── test-110.json │ │ ├── test-110_score_gpt.json │ │ ├── test-111.json │ │ ├── test-111_score_gpt.json │ │ ├── test-112.json │ │ ├── test-112_score_gpt.json │ │ ├── test-113.json │ │ ├── test-113_score_gpt.json │ │ ├── test-114.json │ │ ├── test-114_score_gpt.json │ │ ├── test-115.json │ │ ├── test-115_score_gpt.json │ │ ├── test-116.json │ │ ├── test-116_score_gpt.json │ │ ├── test-117.json │ │ ├── test-117_score_gpt.json │ │ ├── test-118.json │ │ ├── test-118_score_gpt.json │ │ ├── test-119.json │ │ ├── test-119_score_gpt.json │ │ ├── test-11_score_gpt.json │ │ ├── test-12.json │ │ ├── test-120.json │ │ ├── test-120_score_gpt.json │ │ ├── test-121.json │ │ ├── test-121_score_gpt.json │ │ ├── test-122.json │ │ ├── test-122_score_gpt.json │ │ ├── test-123.json │ │ ├── test-123_score_gpt.json │ │ ├── test-124.json │ │ ├── test-124_score_gpt.json │ │ ├── test-125.json │ │ ├── test-125_score_gpt.json │ │ ├── test-126.json │ │ ├── test-126_score_gpt.json │ │ ├── test-127.json │ │ ├── test-127_score_gpt.json │ │ ├── test-128.json │ │ ├── test-128_score_gpt.json │ │ ├── test-129.json │ │ ├── test-129_score_gpt.json │ │ ├── test-12_score_gpt.json │ │ ├── test-13.json │ │ ├── test-130.json │ │ ├── test-130_score_gpt.json │ │ ├── test-131.json │ │ ├── test-131_score_gpt.json │ │ ├── test-132.json │ │ ├── test-132_score_gpt.json │ │ ├── test-133.json │ │ ├── test-133_score_gpt.json │ │ ├── test-134.json │ │ ├── test-134_score_gpt.json │ │ ├── test-135.json │ │ ├── test-135_score_gpt.json │ │ ├── test-136.json │ │ ├── test-136_score_gpt.json │ │ ├── test-137.json │ │ ├── test-137_score_gpt.json │ │ ├── test-138.json │ │ ├── test-138_score_gpt.json │ │ ├── test-139.json │ │ ├── test-139_score_gpt.json │ │ ├── test-13_score_gpt.json │ │ ├── test-14.json │ │ ├── test-140.json │ │ ├── test-140_score_gpt.json │ │ ├── test-141.json │ │ ├── test-141_score_gpt.json │ │ ├── test-142.json │ │ ├── test-142_score_gpt.json │ │ ├── test-143.json │ │ ├── test-143_score_gpt.json │ │ ├── test-144.json │ │ ├── test-144_score_gpt.json │ │ ├── test-145.json │ │ ├── test-145_score_gpt.json │ │ ├── test-146.json │ │ ├── test-146_score_gpt.json │ │ ├── test-147.json │ │ ├── test-147_score_gpt.json │ │ ├── test-148.json │ │ ├── test-148_score_gpt.json │ │ ├── test-149.json │ │ ├── test-149_score_gpt.json │ │ ├── test-14_score_gpt.json │ │ ├── test-15.json │ │ ├── test-150.json │ │ ├── test-150_score_gpt.json │ │ ├── test-151.json │ │ ├── test-151_score_gpt.json │ │ ├── test-152.json │ │ ├── test-152_score_gpt.json │ │ ├── test-153.json │ │ ├── test-153_score_gpt.json │ │ ├── test-154.json │ │ ├── test-154_score_gpt.json │ │ ├── test-155.json │ │ ├── test-155_score_gpt.json │ │ ├── test-156.json │ │ ├── test-156_score_gpt.json │ │ ├── test-157.json │ │ ├── test-157_score_gpt.json │ │ ├── test-158.json │ │ ├── test-158_score_gpt.json │ │ ├── test-159.json │ │ ├── test-159_score_gpt.json │ │ ├── test-15_score_gpt.json │ │ ├── test-16.json │ │ ├── test-160.json │ │ ├── test-160_score_gpt.json │ │ ├── test-161.json │ │ ├── test-161_score_gpt.json │ │ ├── test-162.json │ │ ├── test-162_score_gpt.json │ │ ├── test-163.json │ │ ├── test-163_score_gpt.json │ │ ├── test-164.json │ │ ├── test-164_score_gpt.json │ │ ├── test-165.json │ │ ├── test-165_score_gpt.json │ │ ├── test-166.json │ │ ├── test-166_score_gpt.json │ │ ├── test-167.json │ │ ├── test-167_score_gpt.json │ │ ├── test-168.json │ │ ├── test-168_score_gpt.json │ │ ├── test-169.json │ │ ├── test-169_score_gpt.json │ │ ├── test-16_score_gpt.json │ │ ├── test-17.json │ │ ├── test-170.json │ │ ├── test-170_score_gpt.json │ │ ├── test-171.json │ │ ├── test-171_score_gpt.json │ │ ├── test-172.json │ │ ├── test-172_score_gpt.json │ │ ├── test-173.json │ │ ├── test-173_score_gpt.json │ │ ├── test-174.json │ │ ├── test-174_score_gpt.json │ │ ├── test-175.json │ │ ├── test-175_score_gpt.json │ │ ├── test-176.json │ │ ├── test-176_score_gpt.json │ │ ├── test-177.json │ │ ├── test-177_score_gpt.json │ │ ├── test-178.json │ │ ├── test-178_score_gpt.json │ │ ├── test-179.json │ │ ├── test-179_score_gpt.json │ │ ├── test-17_score_gpt.json │ │ ├── test-18.json │ │ ├── test-180.json │ │ ├── test-180_score_gpt.json │ │ ├── test-181.json │ │ ├── test-181_score_gpt.json │ │ ├── test-182.json │ │ ├── test-182_score_gpt.json │ │ ├── test-183.json │ │ ├── test-183_score_gpt.json │ │ ├── test-184.json │ │ ├── test-184_score_gpt.json │ │ ├── test-185.json │ │ ├── test-185_score_gpt.json │ │ ├── test-186.json │ │ ├── test-186_score_gpt.json │ │ ├── test-187.json │ │ ├── test-187_score_gpt.json │ │ ├── test-188.json │ │ ├── test-188_score_gpt.json │ │ ├── test-189.json │ │ ├── test-189_score_gpt.json │ │ ├── test-18_score_gpt.json │ │ ├── test-19.json │ │ ├── test-190.json │ │ ├── test-190_score_gpt.json │ │ ├── test-191.json │ │ ├── test-191_score_gpt.json │ │ ├── test-192.json │ │ ├── test-192_score_gpt.json │ │ ├── test-193.json │ │ ├── test-193_score_gpt.json │ │ ├── test-194.json │ │ ├── test-194_score_gpt.json │ │ ├── test-195.json │ │ ├── test-195_score_gpt.json │ │ ├── test-196.json │ │ ├── test-196_score_gpt.json │ │ ├── test-197.json │ │ ├── test-197_score_gpt.json │ │ ├── test-198.json │ │ ├── test-198_score_gpt.json │ │ ├── test-199.json │ │ ├── test-199_score_gpt.json │ │ ├── test-19_score_gpt.json │ │ ├── test-1_score_gpt.json │ │ ├── test-2.json │ │ ├── test-20.json │ │ ├── test-200.json │ │ ├── test-200_score_gpt.json │ │ ├── test-201.json │ │ ├── test-201_score_gpt.json │ │ ├── test-202.json │ │ ├── test-202_score_gpt.json │ │ ├── test-203.json │ │ ├── test-203_score_gpt.json │ │ ├── test-204.json │ │ ├── test-204_score_gpt.json │ │ ├── test-205.json │ │ ├── test-205_score_gpt.json │ │ ├── test-206.json │ │ ├── test-206_score_gpt.json │ │ ├── test-207.json │ │ ├── test-207_score_gpt.json │ │ ├── test-208.json │ │ ├── test-208_score_gpt.json │ │ ├── test-209.json │ │ ├── test-209_score_gpt.json │ │ ├── test-20_score_gpt.json │ │ ├── test-21.json │ │ ├── test-210.json │ │ ├── test-210_score_gpt.json │ │ ├── test-211.json │ │ ├── test-211_score_gpt.json │ │ ├── test-212.json │ │ ├── test-212_score_gpt.json │ │ ├── test-213.json │ │ ├── test-213_score_gpt.json │ │ ├── test-214.json │ │ ├── test-214_score_gpt.json │ │ ├── test-215.json │ │ ├── test-215_score_gpt.json │ │ ├── test-216.json │ │ ├── test-216_score_gpt.json │ │ ├── test-217.json │ │ ├── test-217_score_gpt.json │ │ ├── test-218.json │ │ ├── test-218_score_gpt.json │ │ ├── test-219.json │ │ ├── test-219_score_gpt.json │ │ ├── test-21_score_gpt.json │ │ ├── test-22.json │ │ ├── test-220.json │ │ ├── test-220_score_gpt.json │ │ ├── test-221.json │ │ ├── test-221_score_gpt.json │ │ ├── test-222.json │ │ ├── test-222_score_gpt.json │ │ ├── test-223.json │ │ ├── test-223_score_gpt.json │ │ ├── test-224.json │ │ ├── test-224_score_gpt.json │ │ ├── test-225.json │ │ ├── test-225_score_gpt.json │ │ ├── test-226.json │ │ ├── test-226_score_gpt.json │ │ ├── test-227.json │ │ ├── test-227_score_gpt.json │ │ ├── test-228.json │ │ ├── test-228_score_gpt.json │ │ ├── test-229.json │ │ ├── test-229_score_gpt.json │ │ ├── test-22_score_gpt.json │ │ ├── test-23.json │ │ ├── test-230.json │ │ ├── test-230_score_gpt.json │ │ ├── test-231.json │ │ ├── test-231_score_gpt.json │ │ ├── test-232.json │ │ ├── test-232_score_gpt.json │ │ ├── test-233.json │ │ ├── test-233_score_gpt.json │ │ ├── test-234.json │ │ ├── test-234_score_gpt.json │ │ ├── test-235.json │ │ ├── test-235_score_gpt.json │ │ ├── test-236.json │ │ ├── test-236_score_gpt.json │ │ ├── test-237.json │ │ ├── test-237_score_gpt.json │ │ ├── test-238.json │ │ ├── test-238_score_gpt.json │ │ ├── test-239.json │ │ ├── test-239_score_gpt.json │ │ ├── test-23_score_gpt.json │ │ ├── test-24.json │ │ ├── test-240.json │ │ ├── test-240_score_gpt.json │ │ ├── test-241.json │ │ ├── test-241_score_gpt.json │ │ ├── test-242.json │ │ ├── test-242_score_gpt.json │ │ ├── test-243.json │ │ ├── test-243_score_gpt.json │ │ ├── test-244.json │ │ ├── test-244_score_gpt.json │ │ ├── test-245.json │ │ ├── test-245_score_gpt.json │ │ ├── test-246.json │ │ ├── test-246_score_gpt.json │ │ ├── test-247.json │ │ ├── test-247_score_gpt.json │ │ ├── test-248.json │ │ ├── test-248_score_gpt.json │ │ ├── test-249.json │ │ ├── test-249_score_gpt.json │ │ ├── test-24_score_gpt.json │ │ ├── test-25.json │ │ ├── test-250.json │ │ ├── test-250_score_gpt.json │ │ ├── test-251.json │ │ ├── test-251_score_gpt.json │ │ ├── test-252.json │ │ ├── test-252_score_gpt.json │ │ ├── test-253.json │ │ ├── test-253_score_gpt.json │ │ ├── test-254.json │ │ ├── test-254_score_gpt.json │ │ ├── test-255.json │ │ ├── test-255_score_gpt.json │ │ ├── test-256.json │ │ ├── test-256_score_gpt.json │ │ ├── test-257.json │ │ ├── test-257_score_gpt.json │ │ ├── test-258.json │ │ ├── test-258_score_gpt.json │ │ ├── test-259.json │ │ ├── test-259_score_gpt.json │ │ ├── test-25_score_gpt.json │ │ ├── test-26.json │ │ ├── test-260.json │ │ ├── test-260_score_gpt.json │ │ ├── test-261.json │ │ ├── test-261_score_gpt.json │ │ ├── test-262.json │ │ ├── test-262_score_gpt.json │ │ ├── test-263.json │ │ ├── test-263_score_gpt.json │ │ ├── test-264.json │ │ ├── test-264_score_gpt.json │ │ ├── test-265.json │ │ ├── test-265_score_gpt.json │ │ ├── test-266.json │ │ ├── test-266_score_gpt.json │ │ ├── test-267.json │ │ ├── test-267_score_gpt.json │ │ ├── test-268.json │ │ ├── test-268_score_gpt.json │ │ ├── test-269.json │ │ ├── test-269_score_gpt.json │ │ ├── test-26_score_gpt.json │ │ ├── test-27.json │ │ ├── test-270.json │ │ ├── test-270_score_gpt.json │ │ ├── test-271.json │ │ ├── test-271_score_gpt.json │ │ ├── test-272.json │ │ ├── test-272_score_gpt.json │ │ ├── test-273.json │ │ ├── test-273_score_gpt.json │ │ ├── test-274.json │ │ ├── test-274_score_gpt.json │ │ ├── test-275.json │ │ ├── test-275_score_gpt.json │ │ ├── test-276.json │ │ ├── test-276_score_gpt.json │ │ ├── test-277.json │ │ ├── test-277_score_gpt.json │ │ ├── test-278.json │ │ ├── test-278_score_gpt.json │ │ ├── test-279.json │ │ ├── test-279_score_gpt.json │ │ ├── test-27_score_gpt.json │ │ ├── test-28.json │ │ ├── test-280.json │ │ ├── test-280_score_gpt.json │ │ ├── test-281.json │ │ ├── test-281_score_gpt.json │ │ ├── test-282.json │ │ ├── test-282_score_gpt.json │ │ ├── test-283.json │ │ ├── test-283_score_gpt.json │ │ ├── test-284.json │ │ ├── test-284_score_gpt.json │ │ ├── test-285.json │ │ ├── test-285_score_gpt.json │ │ ├── test-286.json │ │ ├── test-286_score_gpt.json │ │ ├── test-287.json │ │ ├── test-287_score_gpt.json │ │ ├── test-288.json │ │ ├── test-288_score_gpt.json │ │ ├── test-289.json │ │ ├── test-289_score_gpt.json │ │ ├── test-28_score_gpt.json │ │ ├── test-29.json │ │ ├── test-290.json │ │ ├── test-290_score_gpt.json │ │ ├── test-291.json │ │ ├── test-291_score_gpt.json │ │ ├── test-292.json │ │ ├── test-292_score_gpt.json │ │ ├── test-293.json │ │ ├── test-293_score_gpt.json │ │ ├── test-294.json │ │ ├── test-294_score_gpt.json │ │ ├── test-295.json │ │ ├── test-295_score_gpt.json │ │ ├── test-296.json │ │ ├── test-296_score_gpt.json │ │ ├── test-297.json │ │ ├── test-297_score_gpt.json │ │ ├── test-298.json │ │ ├── test-298_score_gpt.json │ │ ├── test-299.json │ │ ├── test-299_score_gpt.json │ │ ├── test-29_score_gpt.json │ │ ├── test-2_score_gpt.json │ │ ├── test-3.json │ │ ├── test-30.json │ │ ├── test-300.json │ │ ├── test-300_score_gpt.json │ │ ├── test-301.json │ │ ├── test-301_score_gpt.json │ │ ├── test-302.json │ │ ├── test-302_score_gpt.json │ │ ├── test-303.json │ │ ├── test-303_score_gpt.json │ │ ├── test-304.json │ │ ├── test-304_score_gpt.json │ │ ├── test-305.json │ │ ├── test-305_score_gpt.json │ │ ├── test-306.json │ │ ├── test-306_score_gpt.json │ │ ├── test-307.json │ │ ├── test-307_score_gpt.json │ │ ├── test-308.json │ │ ├── test-308_score_gpt.json │ │ ├── test-309.json │ │ ├── test-309_score_gpt.json │ │ ├── test-30_score_gpt.json │ │ ├── test-31.json │ │ ├── test-310.json │ │ ├── test-310_score_gpt.json │ │ ├── test-311.json │ │ ├── test-311_score_gpt.json │ │ ├── test-312.json │ │ ├── test-312_score_gpt.json │ │ ├── test-313.json │ │ ├── test-313_score_gpt.json │ │ ├── test-314.json │ │ ├── test-314_score_gpt.json │ │ ├── test-315.json │ │ ├── test-315_score_gpt.json │ │ ├── test-316.json │ │ ├── test-316_score_gpt.json │ │ ├── test-317.json │ │ ├── test-317_score_gpt.json │ │ ├── test-318.json │ │ ├── test-318_score_gpt.json │ │ ├── test-319.json │ │ ├── test-319_score_gpt.json │ │ ├── test-31_score_gpt.json │ │ ├── test-32.json │ │ ├── test-320.json │ │ ├── test-320_score_gpt.json │ │ ├── test-321.json │ │ ├── test-321_score_gpt.json │ │ ├── test-322.json │ │ ├── test-322_score_gpt.json │ │ ├── test-323.json │ │ ├── test-323_score_gpt.json │ │ ├── test-324.json │ │ ├── test-324_score_gpt.json │ │ ├── test-325.json │ │ ├── test-325_score_gpt.json │ │ ├── test-326.json │ │ ├── test-326_score_gpt.json │ │ ├── test-327.json │ │ ├── test-327_score_gpt.json │ │ ├── test-328.json │ │ ├── test-328_score_gpt.json │ │ ├── test-329.json │ │ ├── test-329_score_gpt.json │ │ ├── test-32_score_gpt.json │ │ ├── test-33.json │ │ ├── test-330.json │ │ ├── test-330_score_gpt.json │ │ ├── test-331.json │ │ ├── test-331_score_gpt.json │ │ ├── test-332.json │ │ ├── test-332_score_gpt.json │ │ ├── test-333.json │ │ ├── test-333_score_gpt.json │ │ ├── test-334.json │ │ ├── test-334_score_gpt.json │ │ ├── test-335.json │ │ ├── test-335_score_gpt.json │ │ ├── test-336.json │ │ ├── test-336_score_gpt.json │ │ ├── test-337.json │ │ ├── test-337_score_gpt.json │ │ ├── test-338.json │ │ ├── test-338_score_gpt.json │ │ ├── test-339.json │ │ ├── test-339_score_gpt.json │ │ ├── test-33_score_gpt.json │ │ ├── test-34.json │ │ ├── test-340.json │ │ ├── test-340_score_gpt.json │ │ ├── test-341.json │ │ ├── test-341_score_gpt.json │ │ ├── test-342.json │ │ ├── test-342_score_gpt.json │ │ ├── test-343.json │ │ ├── test-343_score_gpt.json │ │ ├── test-344.json │ │ ├── test-344_score_gpt.json │ │ ├── test-345.json │ │ ├── test-345_score_gpt.json │ │ ├── test-346.json │ │ ├── test-346_score_gpt.json │ │ ├── test-347.json │ │ ├── test-347_score_gpt.json │ │ ├── test-348.json │ │ ├── test-348_score_gpt.json │ │ ├── test-349.json │ │ ├── test-349_score_gpt.json │ │ ├── test-34_score_gpt.json │ │ ├── test-35.json │ │ ├── test-350.json │ │ ├── test-350_score_gpt.json │ │ ├── test-351.json │ │ ├── test-351_score_gpt.json │ │ ├── test-352.json │ │ ├── test-352_score_gpt.json │ │ ├── test-353.json │ │ ├── test-353_score_gpt.json │ │ ├── test-354.json │ │ ├── test-354_score_gpt.json │ │ ├── test-355.json │ │ ├── test-355_score_gpt.json │ │ ├── test-356.json │ │ ├── test-356_score_gpt.json │ │ ├── test-357.json │ │ ├── test-357_score_gpt.json │ │ ├── test-358.json │ │ ├── test-358_score_gpt.json │ │ ├── test-359.json │ │ ├── test-359_score_gpt.json │ │ ├── test-35_score_gpt.json │ │ ├── test-36.json │ │ ├── test-360.json │ │ ├── test-360_score_gpt.json │ │ ├── test-361.json │ │ ├── test-361_score_gpt.json │ │ ├── test-362.json │ │ ├── test-362_score_gpt.json │ │ ├── test-363.json │ │ ├── test-363_score_gpt.json │ │ ├── test-364.json │ │ ├── test-364_score_gpt.json │ │ ├── test-365.json │ │ ├── test-365_score_gpt.json │ │ ├── test-366.json │ │ ├── test-366_score_gpt.json │ │ ├── test-367.json │ │ ├── test-367_score_gpt.json │ │ ├── test-368.json │ │ ├── test-368_score_gpt.json │ │ ├── test-369.json │ │ ├── test-369_score_gpt.json │ │ ├── test-36_score_gpt.json │ │ ├── test-37.json │ │ ├── test-370.json │ │ ├── test-370_score_gpt.json │ │ ├── test-371.json │ │ ├── test-371_score_gpt.json │ │ ├── test-372.json │ │ ├── test-372_score_gpt.json │ │ ├── test-373.json │ │ ├── test-373_score_gpt.json │ │ ├── test-374.json │ │ ├── test-374_score_gpt.json │ │ ├── test-375.json │ │ ├── test-375_score_gpt.json │ │ ├── test-376.json │ │ ├── test-376_score_gpt.json │ │ ├── test-377.json │ │ ├── test-377_score_gpt.json │ │ ├── test-378.json │ │ ├── test-378_score_gpt.json │ │ ├── test-379.json │ │ ├── test-379_score_gpt.json │ │ ├── test-37_score_gpt.json │ │ ├── test-38.json │ │ ├── test-380.json │ │ ├── test-380_score_gpt.json │ │ ├── test-381.json │ │ ├── test-381_score_gpt.json │ │ ├── test-382.json │ │ ├── test-382_score_gpt.json │ │ ├── test-383.json │ │ ├── test-383_score_gpt.json │ │ ├── test-384.json │ │ ├── test-384_score_gpt.json │ │ ├── test-385.json │ │ ├── test-385_score_gpt.json │ │ ├── test-386.json │ │ ├── test-386_score_gpt.json │ │ ├── test-387.json │ │ ├── test-387_score_gpt.json │ │ ├── test-388.json │ │ ├── test-388_score_gpt.json │ │ ├── test-389.json │ │ ├── test-389_score_gpt.json │ │ ├── test-38_score_gpt.json │ │ ├── test-39.json │ │ ├── test-390.json │ │ ├── test-390_score_gpt.json │ │ ├── test-391.json │ │ ├── test-391_score_gpt.json │ │ ├── test-392.json │ │ ├── test-392_score_gpt.json │ │ ├── test-393.json │ │ ├── test-393_score_gpt.json │ │ ├── test-394.json │ │ ├── test-394_score_gpt.json │ │ ├── test-395.json │ │ ├── test-395_score_gpt.json │ │ ├── test-396.json │ │ ├── test-396_score_gpt.json │ │ ├── test-397.json │ │ ├── test-397_score_gpt.json │ │ ├── test-398.json │ │ ├── test-398_score_gpt.json │ │ ├── test-399.json │ │ ├── test-399_score_gpt.json │ │ ├── test-39_score_gpt.json │ │ ├── test-3_score_gpt.json │ │ ├── test-4.json │ │ ├── test-40.json │ │ ├── test-400.json │ │ ├── test-400_score_gpt.json │ │ ├── test-401.json │ │ ├── test-401_score_gpt.json │ │ ├── test-402.json │ │ ├── test-402_score_gpt.json │ │ ├── test-403.json │ │ ├── test-403_score_gpt.json │ │ ├── test-404.json │ │ ├── test-404_score_gpt.json │ │ ├── test-405.json │ │ ├── test-405_score_gpt.json │ │ ├── test-406.json │ │ ├── test-406_score_gpt.json │ │ ├── test-407.json │ │ ├── test-407_score_gpt.json │ │ ├── test-408.json │ │ ├── test-408_score_gpt.json │ │ ├── test-409.json │ │ ├── test-409_score_gpt.json │ │ ├── test-40_score_gpt.json │ │ ├── test-41.json │ │ ├── test-410.json │ │ ├── test-410_score_gpt.json │ │ ├── test-411.json │ │ ├── test-411_score_gpt.json │ │ ├── test-412.json │ │ ├── test-412_score_gpt.json │ │ ├── test-413.json │ │ ├── test-413_score_gpt.json │ │ ├── test-414.json │ │ ├── test-414_score_gpt.json │ │ ├── test-415.json │ │ ├── test-415_score_gpt.json │ │ ├── test-416.json │ │ ├── test-416_score_gpt.json │ │ ├── test-417.json │ │ ├── test-417_score_gpt.json │ │ ├── test-418.json │ │ ├── test-418_score_gpt.json │ │ ├── test-419.json │ │ ├── test-419_score_gpt.json │ │ ├── test-41_score_gpt.json │ │ ├── test-42.json │ │ ├── test-420.json │ │ ├── test-420_score_gpt.json │ │ ├── test-421.json │ │ ├── test-421_score_gpt.json │ │ ├── test-422.json │ │ ├── test-422_score_gpt.json │ │ ├── test-423.json │ │ ├── test-423_score_gpt.json │ │ ├── test-424.json │ │ ├── test-424_score_gpt.json │ │ ├── test-425.json │ │ ├── test-425_score_gpt.json │ │ ├── test-426.json │ │ ├── test-426_score_gpt.json │ │ ├── test-427.json │ │ ├── test-427_score_gpt.json │ │ ├── test-428.json │ │ ├── test-428_score_gpt.json │ │ ├── test-429.json │ │ ├── test-429_score_gpt.json │ │ ├── test-42_score_gpt.json │ │ ├── test-43.json │ │ ├── test-430.json │ │ ├── test-430_score_gpt.json │ │ ├── test-431.json │ │ ├── test-431_score_gpt.json │ │ ├── test-432.json │ │ ├── test-432_score_gpt.json │ │ ├── test-433.json │ │ ├── test-433_score_gpt.json │ │ ├── test-434.json │ │ ├── test-434_score_gpt.json │ │ ├── test-435.json │ │ ├── test-435_score_gpt.json │ │ ├── test-436.json │ │ ├── test-436_score_gpt.json │ │ ├── test-437.json │ │ ├── test-437_score_gpt.json │ │ ├── test-438.json │ │ ├── test-438_score_gpt.json │ │ ├── test-439.json │ │ ├── test-439_score_gpt.json │ │ ├── test-43_score_gpt.json │ │ ├── test-44.json │ │ ├── test-440.json │ │ ├── test-440_score_gpt.json │ │ ├── test-441.json │ │ ├── test-441_score_gpt.json │ │ ├── test-442.json │ │ ├── test-442_score_gpt.json │ │ ├── test-443.json │ │ ├── test-443_score_gpt.json │ │ ├── test-444.json │ │ ├── test-444_score_gpt.json │ │ ├── test-445.json │ │ ├── test-445_score_gpt.json │ │ ├── test-446.json │ │ ├── test-446_score_gpt.json │ │ ├── test-447.json │ │ ├── test-447_score_gpt.json │ │ ├── test-448.json │ │ ├── test-448_score_gpt.json │ │ ├── test-449.json │ │ ├── test-449_score_gpt.json │ │ ├── test-44_score_gpt.json │ │ ├── test-45.json │ │ ├── test-450.json │ │ ├── test-450_score_gpt.json │ │ ├── test-451.json │ │ ├── test-451_score_gpt.json │ │ ├── test-452.json │ │ ├── test-452_score_gpt.json │ │ ├── test-453.json │ │ ├── test-453_score_gpt.json │ │ ├── test-454.json │ │ ├── test-454_score_gpt.json │ │ ├── test-455.json │ │ ├── test-455_score_gpt.json │ │ ├── test-456.json │ │ ├── test-456_score_gpt.json │ │ ├── test-457.json │ │ ├── test-457_score_gpt.json │ │ ├── test-458.json │ │ ├── test-458_score_gpt.json │ │ ├── test-459.json │ │ ├── test-459_score_gpt.json │ │ ├── test-45_score_gpt.json │ │ ├── test-46.json │ │ ├── test-460.json │ │ ├── test-460_score_gpt.json │ │ ├── test-461.json │ │ ├── test-461_score_gpt.json │ │ ├── test-462.json │ │ ├── test-462_score_gpt.json │ │ ├── test-463.json │ │ ├── test-463_score_gpt.json │ │ ├── test-464.json │ │ ├── test-464_score_gpt.json │ │ ├── test-465.json │ │ ├── test-465_score_gpt.json │ │ ├── test-466.json │ │ ├── test-466_score_gpt.json │ │ ├── test-467.json │ │ ├── test-467_score_gpt.json │ │ ├── test-468.json │ │ ├── test-468_score_gpt.json │ │ ├── test-469.json │ │ ├── test-469_score_gpt.json │ │ ├── test-46_score_gpt.json │ │ ├── test-47.json │ │ ├── test-470.json │ │ ├── test-470_score_gpt.json │ │ ├── test-471.json │ │ ├── test-471_score_gpt.json │ │ ├── test-472.json │ │ ├── test-472_score_gpt.json │ │ ├── test-473.json │ │ ├── test-473_score_gpt.json │ │ ├── test-474.json │ │ ├── test-474_score_gpt.json │ │ ├── test-475.json │ │ ├── test-475_score_gpt.json │ │ ├── test-476.json │ │ ├── test-476_score_gpt.json │ │ ├── test-477.json │ │ ├── test-477_score_gpt.json │ │ ├── test-478.json │ │ ├── test-478_score_gpt.json │ │ ├── test-479.json │ │ ├── test-479_score_gpt.json │ │ ├── test-47_score_gpt.json │ │ ├── test-48.json │ │ ├── test-480.json │ │ ├── test-480_score_gpt.json │ │ ├── test-481.json │ │ ├── test-481_score_gpt.json │ │ ├── test-482.json │ │ ├── test-482_score_gpt.json │ │ ├── test-483.json │ │ ├── test-483_score_gpt.json │ │ ├── test-484.json │ │ ├── test-484_score_gpt.json │ │ ├── test-485.json │ │ ├── test-485_score_gpt.json │ │ ├── test-486.json │ │ ├── test-486_score_gpt.json │ │ ├── test-487.json │ │ ├── test-487_score_gpt.json │ │ ├── test-488.json │ │ ├── test-488_score_gpt.json │ │ ├── test-489.json │ │ ├── test-489_score_gpt.json │ │ ├── test-48_score_gpt.json │ │ ├── test-49.json │ │ ├── test-490.json │ │ ├── test-490_score_gpt.json │ │ ├── test-491.json │ │ ├── test-491_score_gpt.json │ │ ├── test-492.json │ │ ├── test-492_score_gpt.json │ │ ├── test-493.json │ │ ├── test-493_score_gpt.json │ │ ├── test-494.json │ │ ├── test-494_score_gpt.json │ │ ├── test-495.json │ │ ├── test-495_score_gpt.json │ │ ├── test-496.json │ │ ├── test-496_score_gpt.json │ │ ├── test-497.json │ │ ├── test-497_score_gpt.json │ │ ├── test-498.json │ │ ├── test-498_score_gpt.json │ │ ├── test-499.json │ │ ├── test-499_score_gpt.json │ │ ├── test-49_score_gpt.json │ │ ├── test-4_score_gpt.json │ │ ├── test-5.json │ │ ├── test-50.json │ │ ├── test-500.json │ │ ├── test-500_score_gpt.json │ │ ├── test-501.json │ │ ├── test-501_score_gpt.json │ │ ├── test-502.json │ │ ├── test-502_score_gpt.json │ │ ├── test-503.json │ │ ├── test-503_score_gpt.json │ │ ├── test-504.json │ │ ├── test-504_score_gpt.json │ │ ├── test-505.json │ │ ├── test-505_score_gpt.json │ │ ├── test-506.json │ │ ├── test-506_score_gpt.json │ │ ├── test-507.json │ │ ├── test-507_score_gpt.json │ │ ├── test-508.json │ │ ├── test-508_score_gpt.json │ │ ├── test-509.json │ │ ├── test-509_score_gpt.json │ │ ├── test-50_score_gpt.json │ │ ├── test-51.json │ │ ├── test-510.json │ │ ├── test-510_score_gpt.json │ │ ├── test-511.json │ │ ├── test-511_score_gpt.json │ │ ├── test-512.json │ │ ├── test-512_score_gpt.json │ │ ├── test-513.json │ │ ├── test-513_score_gpt.json │ │ ├── test-514.json │ │ ├── test-514_score_gpt.json │ │ ├── test-515.json │ │ ├── test-515_score_gpt.json │ │ ├── test-516.json │ │ ├── test-516_score_gpt.json │ │ ├── test-517.json │ │ ├── test-517_score_gpt.json │ │ ├── test-518.json │ │ ├── test-518_score_gpt.json │ │ ├── test-519.json │ │ ├── test-519_score_gpt.json │ │ ├── test-51_score_gpt.json │ │ ├── test-52.json │ │ ├── test-520.json │ │ ├── test-520_score_gpt.json │ │ ├── test-521.json │ │ ├── test-521_score_gpt.json │ │ ├── test-522.json │ │ ├── test-522_score_gpt.json │ │ ├── test-523.json │ │ ├── test-523_score_gpt.json │ │ ├── test-524.json │ │ ├── test-524_score_gpt.json │ │ ├── test-525.json │ │ ├── test-525_score_gpt.json │ │ ├── test-526.json │ │ ├── test-526_score_gpt.json │ │ ├── test-527.json │ │ ├── test-527_score_gpt.json │ │ ├── test-528.json │ │ ├── test-528_score_gpt.json │ │ ├── test-529.json │ │ ├── test-529_score_gpt.json │ │ ├── test-52_score_gpt.json │ │ ├── test-53.json │ │ ├── test-530.json │ │ ├── test-530_score_gpt.json │ │ ├── test-531.json │ │ ├── test-531_score_gpt.json │ │ ├── test-532.json │ │ ├── test-532_score_gpt.json │ │ ├── test-533.json │ │ ├── test-533_score_gpt.json │ │ ├── test-534.json │ │ ├── test-534_score_gpt.json │ │ ├── test-535.json │ │ ├── test-535_score_gpt.json │ │ ├── test-536.json │ │ ├── test-536_score_gpt.json │ │ ├── test-537.json │ │ ├── test-537_score_gpt.json │ │ ├── test-538.json │ │ ├── test-538_score_gpt.json │ │ ├── test-539.json │ │ ├── test-539_score_gpt.json │ │ ├── test-53_score_gpt.json │ │ ├── test-54.json │ │ ├── test-540.json │ │ ├── test-540_score_gpt.json │ │ ├── test-541.json │ │ ├── test-541_score_gpt.json │ │ ├── test-542.json │ │ ├── test-542_score_gpt.json │ │ ├── test-543.json │ │ ├── test-543_score_gpt.json │ │ ├── test-544.json │ │ ├── test-544_score_gpt.json │ │ ├── test-545.json │ │ ├── test-545_score_gpt.json │ │ ├── test-546.json │ │ ├── test-546_score_gpt.json │ │ ├── test-547.json │ │ ├── test-547_score_gpt.json │ │ ├── test-548.json │ │ ├── test-548_score_gpt.json │ │ ├── test-549.json │ │ ├── test-549_score_gpt.json │ │ ├── test-54_score_gpt.json │ │ ├── test-55.json │ │ ├── test-550.json │ │ ├── test-550_score_gpt.json │ │ ├── test-551.json │ │ ├── test-551_score_gpt.json │ │ ├── test-552.json │ │ ├── test-552_score_gpt.json │ │ ├── test-553.json │ │ ├── test-553_score_gpt.json │ │ ├── test-554.json │ │ ├── test-554_score_gpt.json │ │ ├── test-555.json │ │ ├── test-555_score_gpt.json │ │ ├── test-556.json │ │ ├── test-556_score_gpt.json │ │ ├── test-557.json │ │ ├── test-557_score_gpt.json │ │ ├── test-558.json │ │ ├── test-558_score_gpt.json │ │ ├── test-559.json │ │ ├── test-559_score_gpt.json │ │ ├── test-55_score_gpt.json │ │ ├── test-56.json │ │ ├── test-560.json │ │ ├── test-560_score_gpt.json │ │ ├── test-561.json │ │ ├── test-561_score_gpt.json │ │ ├── test-562.json │ │ ├── test-562_score_gpt.json │ │ ├── test-563.json │ │ ├── test-563_score_gpt.json │ │ ├── test-564.json │ │ ├── test-564_score_gpt.json │ │ ├── test-565.json │ │ ├── test-565_score_gpt.json │ │ ├── test-566.json │ │ ├── test-566_score_gpt.json │ │ ├── test-567.json │ │ ├── test-567_score_gpt.json │ │ ├── test-568.json │ │ ├── test-568_score_gpt.json │ │ ├── test-569.json │ │ ├── test-569_score_gpt.json │ │ ├── test-56_score_gpt.json │ │ ├── test-57.json │ │ ├── test-570.json │ │ ├── test-570_score_gpt.json │ │ ├── test-57_score_gpt.json │ │ ├── test-58.json │ │ ├── test-58_score_gpt.json │ │ ├── test-59.json │ │ ├── test-59_score_gpt.json │ │ ├── test-5_score_gpt.json │ │ ├── test-6.json │ │ ├── test-60.json │ │ ├── test-60_score_gpt.json │ │ ├── test-61.json │ │ ├── test-61_score_gpt.json │ │ ├── test-62.json │ │ ├── test-62_score_gpt.json │ │ ├── test-63.json │ │ ├── test-63_score_gpt.json │ │ ├── test-64.json │ │ ├── test-64_score_gpt.json │ │ ├── test-65.json │ │ ├── test-65_score_gpt.json │ │ ├── test-66.json │ │ ├── test-66_score_gpt.json │ │ ├── test-67.json │ │ ├── test-67_score_gpt.json │ │ ├── test-68.json │ │ ├── test-68_score_gpt.json │ │ ├── test-69.json │ │ ├── test-69_score_gpt.json │ │ ├── test-6_score_gpt.json │ │ ├── test-7.json │ │ ├── test-70.json │ │ ├── test-70_score_gpt.json │ │ ├── test-71.json │ │ ├── test-71_score_gpt.json │ │ ├── test-72.json │ │ ├── test-72_score_gpt.json │ │ ├── test-73.json │ │ ├── test-73_score_gpt.json │ │ ├── test-74.json │ │ ├── test-74_score_gpt.json │ │ ├── test-75.json │ │ ├── test-75_score_gpt.json │ │ ├── test-76.json │ │ ├── test-76_score_gpt.json │ │ ├── test-77.json │ │ ├── test-77_score_gpt.json │ │ ├── test-78.json │ │ ├── test-78_score_gpt.json │ │ ├── test-79.json │ │ ├── test-79_score_gpt.json │ │ ├── test-7_score_gpt.json │ │ ├── test-8.json │ │ ├── test-80.json │ │ ├── test-80_score_gpt.json │ │ ├── test-81.json │ │ ├── test-81_score_gpt.json │ │ ├── test-82.json │ │ ├── test-82_score_gpt.json │ │ ├── test-83.json │ │ ├── test-83_score_gpt.json │ │ ├── test-84.json │ │ ├── test-84_score_gpt.json │ │ ├── test-85.json │ │ ├── test-85_score_gpt.json │ │ ├── test-86.json │ │ ├── test-86_score_gpt.json │ │ ├── test-87.json │ │ ├── test-87_score_gpt.json │ │ ├── test-88.json │ │ ├── test-88_score_gpt.json │ │ ├── test-89.json │ │ ├── test-89_score_gpt.json │ │ ├── test-8_score_gpt.json │ │ ├── test-9.json │ │ ├── test-90.json │ │ ├── test-90_score_gpt.json │ │ ├── test-91.json │ │ ├── test-91_score_gpt.json │ │ ├── test-92.json │ │ ├── test-92_score_gpt.json │ │ ├── test-93.json │ │ ├── test-93_score_gpt.json │ │ ├── test-94.json │ │ ├── test-94_score_gpt.json │ │ ├── test-95.json │ │ ├── test-95_score_gpt.json │ │ ├── test-96.json │ │ ├── test-96_score_gpt.json │ │ ├── test-97.json │ │ ├── test-97_score_gpt.json │ │ ├── test-98.json │ │ ├── test-98_score_gpt.json │ │ ├── test-99.json │ │ ├── test-99_score_gpt.json │ │ └── test-9_score_gpt.json ├── config.py ├── datasets │ ├── duorc │ │ ├── test-0.txt │ │ ├── test-1.txt │ │ ├── test-10.txt │ │ ├── test-100.txt │ │ ├── test-101.txt │ │ ├── test-102.txt │ │ ├── test-103.txt │ │ ├── test-104.txt │ │ ├── test-105.txt │ │ ├── test-106.txt │ │ ├── test-107.txt │ │ ├── test-108.txt │ │ ├── test-109.txt │ │ ├── test-11.txt │ │ ├── test-110.txt │ │ ├── test-111.txt │ │ ├── test-112.txt │ │ ├── test-113.txt │ │ ├── test-114.txt │ │ ├── test-115.txt │ │ ├── test-116.txt │ │ ├── test-117.txt │ │ ├── test-118.txt │ │ ├── test-119.txt │ │ ├── test-12.txt │ │ ├── test-120.txt │ │ ├── test-121.txt │ │ ├── test-122.txt │ │ ├── test-123.txt │ │ ├── test-124.txt │ │ ├── test-125.txt │ │ ├── test-126.txt │ │ ├── test-127.txt │ │ ├── test-128.txt │ │ ├── test-129.txt │ │ ├── test-13.txt │ │ ├── test-130.txt │ │ ├── test-131.txt │ │ ├── test-132.txt │ │ ├── test-133.txt │ │ ├── test-134.txt │ │ ├── test-135.txt │ │ ├── test-136.txt │ │ ├── test-137.txt │ │ ├── test-138.txt │ │ ├── test-139.txt │ │ ├── test-14.txt │ │ ├── test-140.txt │ │ ├── test-141.txt │ │ ├── test-142.txt │ │ ├── test-143.txt │ │ ├── test-144.txt │ │ ├── test-145.txt │ │ ├── test-146.txt │ │ ├── test-147.txt │ │ ├── test-148.txt │ │ ├── test-149.txt │ │ ├── test-15.txt │ │ ├── test-150.txt │ │ ├── test-151.txt │ │ ├── test-152.txt │ │ ├── test-153.txt │ │ ├── test-154.txt │ │ ├── test-155.txt │ │ ├── test-156.txt │ │ ├── test-157.txt │ │ ├── test-158.txt │ │ ├── test-159.txt │ │ ├── test-16.txt │ │ ├── test-160.txt │ │ ├── test-161.txt │ │ ├── test-162.txt │ │ ├── test-163.txt │ │ ├── test-164.txt │ │ ├── test-165.txt │ │ ├── test-166.txt │ │ ├── test-167.txt │ │ ├── test-168.txt │ │ ├── test-169.txt │ │ ├── test-17.txt │ │ ├── test-170.txt │ │ ├── test-171.txt │ │ ├── test-172.txt │ │ ├── test-173.txt │ │ ├── test-174.txt │ │ ├── test-175.txt │ │ ├── test-176.txt │ │ ├── test-177.txt │ │ ├── test-178.txt │ │ ├── test-179.txt │ │ ├── test-18.txt │ │ ├── test-180.txt │ │ ├── test-181.txt │ │ ├── test-182.txt │ │ ├── test-183.txt │ │ ├── test-184.txt │ │ ├── test-185.txt │ │ ├── test-186.txt │ │ ├── test-187.txt │ │ ├── test-188.txt │ │ ├── test-189.txt │ │ ├── test-19.txt │ │ ├── test-190.txt │ │ ├── test-191.txt │ │ ├── test-192.txt │ │ ├── test-193.txt │ │ ├── test-194.txt │ │ ├── test-195.txt │ │ ├── test-196.txt │ │ ├── test-197.txt │ │ ├── test-198.txt │ │ ├── test-199.txt │ │ ├── test-2.txt │ │ ├── test-20.txt │ │ ├── test-200.txt │ │ ├── test-201.txt │ │ ├── test-202.txt │ │ ├── test-203.txt │ │ ├── test-204.txt │ │ ├── test-205.txt │ │ ├── test-206.txt │ │ ├── test-207.txt │ │ ├── test-208.txt │ │ ├── test-209.txt │ │ ├── test-21.txt │ │ ├── test-210.txt │ │ ├── test-211.txt │ │ ├── test-212.txt │ │ ├── test-213.txt │ │ ├── test-214.txt │ │ ├── test-215.txt │ │ ├── test-216.txt │ │ ├── test-217.txt │ │ ├── test-218.txt │ │ ├── test-219.txt │ │ ├── test-22.txt │ │ ├── test-220.txt │ │ ├── test-221.txt │ │ ├── test-222.txt │ │ ├── test-223.txt │ │ ├── test-224.txt │ │ ├── test-225.txt │ │ ├── test-226.txt │ │ ├── test-227.txt │ │ ├── test-228.txt │ │ ├── test-229.txt │ │ ├── test-23.txt │ │ ├── test-230.txt │ │ ├── test-231.txt │ │ ├── test-232.txt │ │ ├── test-233.txt │ │ ├── test-234.txt │ │ ├── test-235.txt │ │ ├── test-236.txt │ │ ├── test-237.txt │ │ ├── test-238.txt │ │ ├── test-239.txt │ │ ├── test-24.txt │ │ ├── test-240.txt │ │ ├── test-241.txt │ │ ├── test-242.txt │ │ ├── test-243.txt │ │ ├── test-244.txt │ │ ├── test-245.txt │ │ ├── test-246.txt │ │ ├── test-247.txt │ │ ├── test-248.txt │ │ ├── test-249.txt │ │ ├── test-25.txt │ │ ├── test-250.txt │ │ ├── test-251.txt │ │ ├── test-252.txt │ │ ├── test-253.txt │ │ ├── test-254.txt │ │ ├── test-255.txt │ │ ├── test-256.txt │ │ ├── test-257.txt │ │ ├── test-258.txt │ │ ├── test-259.txt │ │ ├── test-26.txt │ │ ├── test-260.txt │ │ ├── test-261.txt │ │ ├── test-262.txt │ │ ├── test-263.txt │ │ ├── test-264.txt │ │ ├── test-265.txt │ │ ├── test-266.txt │ │ ├── test-267.txt │ │ ├── test-268.txt │ │ ├── test-269.txt │ │ ├── test-27.txt │ │ ├── test-270.txt │ │ ├── test-271.txt │ │ ├── test-272.txt │ │ ├── test-273.txt │ │ ├── test-274.txt │ │ ├── test-275.txt │ │ ├── test-276.txt │ │ ├── test-277.txt │ │ ├── test-278.txt │ │ ├── test-279.txt │ │ ├── test-28.txt │ │ ├── test-280.txt │ │ ├── test-281.txt │ │ ├── test-282.txt │ │ ├── test-283.txt │ │ ├── test-284.txt │ │ ├── test-285.txt │ │ ├── test-286.txt │ │ ├── test-287.txt │ │ ├── test-288.txt │ │ ├── test-289.txt │ │ ├── test-29.txt │ │ ├── test-290.txt │ │ ├── test-291.txt │ │ ├── test-292.txt │ │ ├── test-293.txt │ │ ├── test-294.txt │ │ ├── test-295.txt │ │ ├── test-296.txt │ │ ├── test-297.txt │ │ ├── test-298.txt │ │ ├── test-299.txt │ │ ├── test-3.txt │ │ ├── test-30.txt │ │ ├── test-300.txt │ │ ├── test-301.txt │ │ ├── test-302.txt │ │ ├── test-303.txt │ │ ├── test-304.txt │ │ ├── test-305.txt │ │ ├── test-306.txt │ │ ├── test-307.txt │ │ ├── test-308.txt │ │ ├── test-309.txt │ │ ├── test-31.txt │ │ ├── test-310.txt │ │ ├── test-311.txt │ │ ├── test-312.txt │ │ ├── test-313.txt │ │ ├── test-314.txt │ │ ├── test-315.txt │ │ ├── test-316.txt │ │ ├── test-317.txt │ │ ├── test-318.txt │ │ ├── test-319.txt │ │ ├── test-32.txt │ │ ├── test-320.txt │ │ ├── test-321.txt │ │ ├── test-322.txt │ │ ├── test-323.txt │ │ ├── test-324.txt │ │ ├── test-325.txt │ │ ├── test-326.txt │ │ ├── test-327.txt │ │ ├── test-328.txt │ │ ├── test-329.txt │ │ ├── test-33.txt │ │ ├── test-330.txt │ │ ├── test-331.txt │ │ ├── test-332.txt │ │ ├── test-333.txt │ │ ├── test-334.txt │ │ ├── test-335.txt │ │ ├── test-336.txt │ │ ├── test-337.txt │ │ ├── test-338.txt │ │ ├── test-339.txt │ │ ├── test-34.txt │ │ ├── test-340.txt │ │ ├── test-341.txt │ │ ├── test-342.txt │ │ ├── test-343.txt │ │ ├── test-344.txt │ │ ├── test-345.txt │ │ ├── test-346.txt │ │ ├── test-347.txt │ │ ├── test-348.txt │ │ ├── test-349.txt │ │ ├── test-35.txt │ │ ├── test-350.txt │ │ ├── test-351.txt │ │ ├── test-352.txt │ │ ├── test-353.txt │ │ ├── test-354.txt │ │ ├── test-355.txt │ │ ├── test-356.txt │ │ ├── test-357.txt │ │ ├── test-358.txt │ │ ├── test-359.txt │ │ ├── test-36.txt │ │ ├── test-360.txt │ │ ├── test-361.txt │ │ ├── test-362.txt │ │ ├── test-363.txt │ │ ├── test-364.txt │ │ ├── test-365.txt │ │ ├── test-366.txt │ │ ├── test-367.txt │ │ ├── test-368.txt │ │ ├── test-369.txt │ │ ├── test-37.txt │ │ ├── test-370.txt │ │ ├── test-371.txt │ │ ├── test-372.txt │ │ ├── test-373.txt │ │ ├── test-374.txt │ │ ├── test-375.txt │ │ ├── test-376.txt │ │ ├── test-377.txt │ │ ├── test-378.txt │ │ ├── test-379.txt │ │ ├── test-38.txt │ │ ├── test-380.txt │ │ ├── test-381.txt │ │ ├── test-382.txt │ │ ├── test-383.txt │ │ ├── test-384.txt │ │ ├── test-385.txt │ │ ├── test-386.txt │ │ ├── test-387.txt │ │ ├── test-388.txt │ │ ├── test-389.txt │ │ ├── test-39.txt │ │ ├── test-390.txt │ │ ├── test-391.txt │ │ ├── test-392.txt │ │ ├── test-393.txt │ │ ├── test-394.txt │ │ ├── test-395.txt │ │ ├── test-396.txt │ │ ├── test-397.txt │ │ ├── test-398.txt │ │ ├── test-399.txt │ │ ├── test-4.txt │ │ ├── test-40.txt │ │ ├── test-400.txt │ │ ├── test-401.txt │ │ ├── test-402.txt │ │ ├── test-403.txt │ │ ├── test-404.txt │ │ ├── test-405.txt │ │ ├── test-406.txt │ │ ├── test-407.txt │ │ ├── test-408.txt │ │ ├── test-409.txt │ │ ├── test-41.txt │ │ ├── test-410.txt │ │ ├── test-411.txt │ │ ├── test-412.txt │ │ ├── test-413.txt │ │ ├── test-414.txt │ │ ├── test-415.txt │ │ ├── test-416.txt │ │ ├── test-417.txt │ │ ├── test-418.txt │ │ ├── test-419.txt │ │ ├── test-42.txt │ │ ├── test-420.txt │ │ ├── test-421.txt │ │ ├── test-422.txt │ │ ├── test-423.txt │ │ ├── test-424.txt │ │ ├── test-425.txt │ │ ├── test-426.txt │ │ ├── test-427.txt │ │ ├── test-428.txt │ │ ├── test-429.txt │ │ ├── test-43.txt │ │ ├── test-430.txt │ │ ├── test-431.txt │ │ ├── test-432.txt │ │ ├── test-433.txt │ │ ├── test-434.txt │ │ ├── test-435.txt │ │ ├── test-436.txt │ │ ├── test-437.txt │ │ ├── test-438.txt │ │ ├── test-439.txt │ │ ├── test-44.txt │ │ ├── test-440.txt │ │ ├── test-441.txt │ │ ├── test-442.txt │ │ ├── test-443.txt │ │ ├── test-444.txt │ │ ├── test-445.txt │ │ ├── test-446.txt │ │ ├── test-447.txt │ │ ├── test-448.txt │ │ ├── test-449.txt │ │ ├── test-45.txt │ │ ├── test-450.txt │ │ ├── test-451.txt │ │ ├── test-452.txt │ │ ├── test-453.txt │ │ ├── test-454.txt │ │ ├── test-455.txt │ │ ├── test-456.txt │ │ ├── test-457.txt │ │ ├── test-458.txt │ │ ├── test-459.txt │ │ ├── test-46.txt │ │ ├── test-460.txt │ │ ├── test-461.txt │ │ ├── test-462.txt │ │ ├── test-463.txt │ │ ├── test-464.txt │ │ ├── test-465.txt │ │ ├── test-466.txt │ │ ├── test-467.txt │ │ ├── test-468.txt │ │ ├── test-469.txt │ │ ├── test-47.txt │ │ ├── test-470.txt │ │ ├── test-471.txt │ │ ├── test-472.txt │ │ ├── test-473.txt │ │ ├── test-474.txt │ │ ├── test-475.txt │ │ ├── test-476.txt │ │ ├── test-477.txt │ │ ├── test-478.txt │ │ ├── test-479.txt │ │ ├── test-48.txt │ │ ├── test-480.txt │ │ ├── test-481.txt │ │ ├── test-482.txt │ │ ├── test-483.txt │ │ ├── test-484.txt │ │ ├── test-485.txt │ │ ├── test-486.txt │ │ ├── test-487.txt │ │ ├── test-488.txt │ │ ├── test-489.txt │ │ ├── test-49.txt │ │ ├── test-490.txt │ │ ├── test-491.txt │ │ ├── test-492.txt │ │ ├── test-493.txt │ │ ├── test-494.txt │ │ ├── test-495.txt │ │ ├── test-496.txt │ │ ├── test-497.txt │ │ ├── test-498.txt │ │ ├── test-499.txt │ │ ├── test-5.txt │ │ ├── test-50.txt │ │ ├── test-500.txt │ │ ├── test-501.txt │ │ ├── test-502.txt │ │ ├── test-503.txt │ │ ├── test-504.txt │ │ ├── test-505.txt │ │ ├── test-506.txt │ │ ├── test-507.txt │ │ ├── test-508.txt │ │ ├── test-509.txt │ │ ├── test-51.txt │ │ ├── test-510.txt │ │ ├── test-511.txt │ │ ├── test-512.txt │ │ ├── test-513.txt │ │ ├── test-514.txt │ │ ├── test-515.txt │ │ ├── test-516.txt │ │ ├── test-517.txt │ │ ├── test-518.txt │ │ ├── test-519.txt │ │ ├── test-52.txt │ │ ├── test-520.txt │ │ ├── test-521.txt │ │ ├── test-522.txt │ │ ├── test-523.txt │ │ ├── test-524.txt │ │ ├── test-525.txt │ │ ├── test-526.txt │ │ ├── test-527.txt │ │ ├── test-528.txt │ │ ├── test-529.txt │ │ ├── test-53.txt │ │ ├── test-530.txt │ │ ├── test-531.txt │ │ ├── test-532.txt │ │ ├── test-533.txt │ │ ├── test-534.txt │ │ ├── test-535.txt │ │ ├── test-536.txt │ │ ├── test-537.txt │ │ ├── test-538.txt │ │ ├── test-539.txt │ │ ├── test-54.txt │ │ ├── test-540.txt │ │ ├── test-541.txt │ │ ├── test-542.txt │ │ ├── test-543.txt │ │ ├── test-544.txt │ │ ├── test-545.txt │ │ ├── test-546.txt │ │ ├── test-547.txt │ │ ├── test-548.txt │ │ ├── test-549.txt │ │ ├── test-55.txt │ │ ├── test-550.txt │ │ ├── test-551.txt │ │ ├── test-552.txt │ │ ├── test-553.txt │ │ ├── test-554.txt │ │ ├── test-555.txt │ │ ├── test-556.txt │ │ ├── test-557.txt │ │ ├── test-558.txt │ │ ├── test-559.txt │ │ ├── test-56.txt │ │ ├── test-560.txt │ │ ├── test-561.txt │ │ ├── test-562.txt │ │ ├── test-563.txt │ │ ├── test-564.txt │ │ ├── test-565.txt │ │ ├── test-566.txt │ │ ├── test-567.txt │ │ ├── test-568.txt │ │ ├── test-569.txt │ │ ├── test-57.txt │ │ ├── test-570.txt │ │ ├── test-58.txt │ │ ├── test-59.txt │ │ ├── test-6.txt │ │ ├── test-60.txt │ │ ├── test-61.txt │ │ ├── test-62.txt │ │ ├── test-63.txt │ │ ├── test-64.txt │ │ ├── test-65.txt │ │ ├── test-66.txt │ │ ├── test-67.txt │ │ ├── test-68.txt │ │ ├── test-69.txt │ │ ├── test-7.txt │ │ ├── test-70.txt │ │ ├── test-71.txt │ │ ├── test-72.txt │ │ ├── test-73.txt │ │ ├── test-74.txt │ │ ├── test-75.txt │ │ ├── test-76.txt │ │ ├── test-77.txt │ │ ├── test-78.txt │ │ ├── test-79.txt │ │ ├── test-8.txt │ │ ├── test-80.txt │ │ ├── test-81.txt │ │ ├── test-82.txt │ │ ├── test-83.txt │ │ ├── test-84.txt │ │ ├── test-85.txt │ │ ├── test-86.txt │ │ ├── test-87.txt │ │ ├── test-88.txt │ │ ├── test-89.txt │ │ ├── test-9.txt │ │ ├── test-90.txt │ │ ├── test-91.txt │ │ ├── test-92.txt │ │ ├── test-93.txt │ │ ├── test-94.txt │ │ ├── test-95.txt │ │ ├── test-96.txt │ │ ├── test-97.txt │ │ ├── test-98.txt │ │ └── test-99.txt │ └── other │ │ └── state_of_the_union.txt ├── datastore_gen.py ├── docker-compose.yml ├── docs │ └── architecture.png ├── extras │ ├── benchmark.py │ ├── benchmark_stats.py │ ├── duorc_datasets.py │ ├── hybrid_memory_llm.py │ └── simple_vectorllm.py ├── knowledge_crawler.py ├── recall.py ├── recall_terminal.py ├── requirements.txt └── utils.py ├── requirements.txt ├── run ├── server.py ├── trackertotimeseries.py ├── tracking ├── Dockerfile ├── Monitor.py ├── requirements.txt └── tracker.py ├── tracklet ├── tailvisualization.py ├── tracklet.py ├── trackletmanager.py └── tsconversion.py ├── utils ├── DVDisplayChannel.py ├── RedisStreamManager.py ├── RedisStreamXreaderWriter.py ├── Utility.py ├── constants.py ├── env_linux └── remote_dashboard.txt └── videosource.py /.github/workflows/scorecard.yml: -------------------------------------------------------------------------------- 1 | name: scorecard 2 | 3 | on: 4 | push: 5 | branches: 6 | # Run on pushes to default branch 7 | - main 8 | schedule: 9 | # Run weekly on Saturdays 10 | - cron: "30 1 * * 6" 11 | # Run when branch protection rules change 12 | branch_protection_rule: 13 | # Run the workflow manually 14 | workflow_dispatch: 15 | 16 | # Declare default permissions as read-only 17 | permissions: read-all 18 | 19 | jobs: 20 | run-scorecard: 21 | # Call reusable workflow file 22 | uses: cisco-ospo/.github/.github/workflows/_scorecard.yml@main 23 | permissions: 24 | id-token: write 25 | security-events: write 26 | secrets: inherit 27 | with: 28 | # Publish results of Scorecard analysis 29 | publish-results: true 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .vscode 3 | __pycache__ 4 | -* 5 | *.pt 6 | *~ 7 | *.chroma/ 8 | 9 | # ignore ALL .log files 10 | *.log 11 | 12 | # ignore ALL files in ANY directory named temp 13 | temp/ 14 | 15 | # RecallM 16 | /recallm/api_keys.json 17 | /recallm/NEO4J_ERROR_LOG.txt -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tracking/mmtracking"] 2 | path = tracking/mmtracking 3 | url = https://github.com/open-mmlab/mmtracking.git 4 | [submodule "cognitivesynergy/ONA"] 5 | path = cognitivesynergy/ONA 6 | url = https://github.com/opennars/OpenNARS-for-Applications 7 | [submodule "recallm/chroma"] 8 | path = recallm/chroma 9 | url = https://github.com/chroma-core/chroma.git 10 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Cisco Systems, Inc. and its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # SPDX-License-Identifier: Apache-2.0 16 | FROM python:3.7 17 | 18 | WORKDIR /app 19 | RUN apt-get update && apt-get install -y python3-opencv 20 | 21 | COPY requirements.txt . 22 | RUN pip install --upgrade pip 23 | RUN pip install -r requirements.txt 24 | COPY . . 25 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | exclude: 2 | - tracking/mmtracking 3 | 4 | -------------------------------------------------------------------------------- /actionrecognition/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PYTORCH="1.13.0" 2 | ARG CUDA="11.6" 3 | ARG CUDNN="8" 4 | 5 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel 6 | 7 | ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" 8 | ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" 9 | 10 | # fetch the key refer to https://forums.developer.nvidia.com/t/18-04-cuda-docker-image-is-broken/212892/9 11 | RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub 32 12 | RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub 13 | RUN apt-get update && apt-get install -y git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 ffmpeg \ 14 | && apt-get clean \ 15 | && rm -rf /var/lib/apt/lists/* 16 | 17 | # Install Openmmlab core 18 | RUN pip install openmim 19 | RUN mim install mmengine mmcv mmaction2 20 | 21 | # Install MMAction2 22 | COPY ./actionrecognition /mmaction2 23 | WORKDIR /mmaction2 24 | RUN pip install -r requirements.txt 25 | ENV FORCE_CUDA="1" 26 | -------------------------------------------------------------------------------- /actionrecognition/requirements.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Cisco Systems, Inc. and its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # SPDX-License-Identifier: Apache-2.0 16 | 17 | redis -------------------------------------------------------------------------------- /cognitivesynergy/CognitiveSynergyAgentManager.py: -------------------------------------------------------------------------------- 1 | from CognitiveSynergyAgent import CognitiveSynergyAgent 2 | 3 | class CognitiveSynergyAgentManager: 4 | def __init__(self): 5 | # Initialize an empty list or dict to store the agents 6 | self.agents = [] 7 | 8 | def createAgent(self, agent_type, config): 9 | # Create an agent of the specified type with the given config 10 | agent = CognitiveSynergyAgent(agent_type=agent_type,config={}) 11 | self.agents.append(agent) 12 | return agent 13 | -------------------------------------------------------------------------------- /cognitivesynergy/CognitiveSynergyEngine.py: -------------------------------------------------------------------------------- 1 | import CognitiveSynergyAgentManager 2 | class CognitiveSynergyEngine: 3 | def __init__(self): 4 | # Initialize a CognitiveSynergyAgentManager 5 | self.agent_manager = CognitiveSynergyAgentManager() 6 | 7 | def start(self): 8 | # Start the system, which might involve starting some agents 9 | pass 10 | 11 | def stop(self): 12 | # Stop the system, which will involve stopping all agents 13 | pass 14 | -------------------------------------------------------------------------------- /cognitivesynergy/debug1: -------------------------------------------------------------------------------- 1 | docker run -it --rm -v /var/run/docker.sock:/var/run/docker.sock -v $(pwd):/app cognitivesynergybase bash -------------------------------------------------------------------------------- /cognitivesynergy/main_detection.py: -------------------------------------------------------------------------------- 1 | from CognitiveSynergyAgent import CognitiveSynergyAgent 2 | from CognitiveSynergyAgentManager import CognitiveSynergyAgentManager 3 | 4 | def main(): 5 | # Create an instance of the CognitiveSynergyAgentManager 6 | agent_manager = CognitiveSynergyAgentManager() 7 | 8 | 9 | # Start the agent 10 | agent = agent_manager.createAgent(agent_type="haidetection", config={}) 11 | agent.start() 12 | 13 | if __name__ == "__main__": 14 | main() 15 | -------------------------------------------------------------------------------- /cognitivesynergy/requirements.txt: -------------------------------------------------------------------------------- 1 | redis 2 | pillow 3 | -------------------------------------------------------------------------------- /data/basketball.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cisco-open/DeepVision/e110a192346be0a5e164d28e3dbce58bc97381f0/data/basketball.mp4 -------------------------------------------------------------------------------- /data/liverpool.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cisco-open/DeepVision/e110a192346be0a5e164d28e3dbce58bc97381f0/data/liverpool.mp4 -------------------------------------------------------------------------------- /data/race.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cisco-open/DeepVision/e110a192346be0a5e164d28e3dbce58bc97381f0/data/race.mp4 -------------------------------------------------------------------------------- /data/street-25fps.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cisco-open/DeepVision/e110a192346be0a5e164d28e3dbce58bc97381f0/data/street-25fps.mp4 -------------------------------------------------------------------------------- /data/street_30fps.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cisco-open/DeepVision/e110a192346be0a5e164d28e3dbce58bc97381f0/data/street_30fps.mp4 -------------------------------------------------------------------------------- /docs/architecture.md: -------------------------------------------------------------------------------- 1 | ![architecture](images/architecture.png) 2 | 3 | ### Video Source Service: 4 | 5 | The Video Source Service is responsible for collecting video streams from various sources, such as cameras or files, and pushing the data into a Redis database. In addition, it stores metadata into another Redis database. 6 | 7 | This service is implemented as a Docker container. 8 | 9 | ### Video Tracking Service: 10 | 11 | The Video Tracking Service is based on the open mmlab tracker. It takes an input stream from Redis and adds tracking information to the stream, which is then saved back to Redis. 12 | 13 | After the Video Tracking Service has processed the data, consumers are able to view the data in the Redis stream (which now includes tracking information) and the metadata through Grafana dashboards and annotated video servers, as well as any Android or React applications. 14 | 15 | Similar to the Video Source Service, the Video Tracking Service and Redis databases are also running as Docker containers. 16 | -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | A placeholder for changelog -------------------------------------------------------------------------------- /docs/images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cisco-open/DeepVision/e110a192346be0a5e164d28e3dbce58bc97381f0/docs/images/architecture.png -------------------------------------------------------------------------------- /docs/images/getstarts.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cisco-open/DeepVision/e110a192346be0a5e164d28e3dbce58bc97381f0/docs/images/getstarts.gif -------------------------------------------------------------------------------- /docs/images/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cisco-open/DeepVision/e110a192346be0a5e164d28e3dbce58bc97381f0/docs/images/overview.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # DeepVision 2 | 3 | ## [Introduction](intro.md) 4 | 5 | ## [Getting Started](getstarted.md) 6 | 7 | ## [Architecture](architecture.md) 8 | 9 | ## [Essential Scripts](essentials.md) 10 | 11 | ## [CI/CD](cicd.md) 12 | 13 | ## [Changelog](changelog.md) 14 | 15 | ## [Contributor Guide](../CONTRIBUTING.md) -------------------------------------------------------------------------------- /docs/intro.md: -------------------------------------------------------------------------------- 1 | ## INTRODUCTION: 2 | 3 | DeepVision is a comprehensive video analytics system that enables users to monitor and analyze video streams from various sources with ease 4 | 5 | ### Features 6 | * #### Scalable and modular serverless open-source framework 7 | * State-of-the-art object detectors, trackers, behavior detectors 8 | * New types of reasoning engines 9 | * 3D semantics projects 10 | * Multi-modal data stream analytics 11 | * Neurosymbolic integration 12 | * Knowledge representation 13 | * Self supervised learning 14 | * Hyperdimensional Computing 15 | 16 | ### Overview 17 | ![img.png](images/overview.png) 18 | 19 | 20 | -------------------------------------------------------------------------------- /grafana/grafana.ini: -------------------------------------------------------------------------------- 1 | [paths] 2 | provisioning = /etc/grafana/provisioning 3 | 4 | [server] 5 | enable_gzip = true 6 | # To add HTTPS support: 7 | #protocol = https 8 | #;http_addr = 9 | http_port = 3000 10 | #domain = localhost 11 | #enforce_domain = false 12 | #root_url = https://localhost:3000 13 | #router_logging = false 14 | #static_root_path = public 15 | #cert_file = /etc/certs/cert.pem 16 | #cert_key = /etc/certs/cert-key.pem 17 | 18 | [security] 19 | # If you want to embed grafana into an iframe for example 20 | allow_embedding = true 21 | 22 | [users] 23 | default_theme = dark -------------------------------------------------------------------------------- /grafana/provisioning/dashboards/dashboard.yml: -------------------------------------------------------------------------------- 1 | # config file version 2 | apiVersion: 1 3 | 4 | providers: 5 | # an unique provider name 6 | - name: DeepVision 7 | # org id. will default to orgId 1 if not specified 8 | org_id: 1 9 | # name of the dashboard folder. Required 10 | folder: 'DeepVision_Folder' 11 | # provider type. Required 12 | type: 'file' 13 | # disable dashboard deletion 14 | disableDeletion: false 15 | # enable dashboard editing 16 | editable: true 17 | # how often Grafana will scan for changed dashboards 18 | updateIntervalSeconds: 5 19 | # allow updating provisioned dashboards from the UI 20 | allowUiUpdates: true 21 | options: 22 | # path to dashboard files on disk. Required 23 | path: /etc/grafana/dashboards 24 | # use folder names from filesystem to create folders in Grafana 25 | foldersFromFilesStructure: true -------------------------------------------------------------------------------- /grafana/provisioning/datasources/redis.yml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | datasources: 4 | - name: Redis 5 | type: redis-datasource 6 | access: proxy 7 | orgId: 1 8 | uid: redis_uid 9 | isDefault: true 10 | version: 1 11 | url: redis://redistimeseries:6379 12 | allowUiUpdates: true 13 | jsonData: 14 | client: standalone 15 | poolSize: 5 16 | timeout: 10 17 | pingInterval: 0 18 | pipelineWindow: 0 19 | editable: true 20 | secureJsonData: 21 | password: "" -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: DeepVision 2 | -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/aggregate_score.json: -------------------------------------------------------------------------------- 1 | { 2 | "total_question": 13450, 3 | "recall_correct": 6474, 4 | "vectordb_correct": 7494, 5 | "hybrid_correct": 7085, 6 | "hybrid_maximum_correct": 9181, 7 | "recall_accuracy": 0.4813382899628253, 8 | "vectordb_accuracy": 0.5571747211895911, 9 | "hybrid_accuracy": 0.5267657992565056, 10 | "hybrid_accuracy_maximum": 0.6826022304832714 11 | } -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-0_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "Who portrays Max Parry?", "answer": "['Kevin Howarth']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What is Max Parry's day job?", "answer": "['Wedding video cameraman']", "recall": 2, "vectordb": 0, "hybrid": 2}, "q2": {"quesiton": "What is Max's day job?", "answer": "['wedding video cameraman']", "recall": 2, "vectordb": 0, "hybrid": 2}, "q3": {"quesiton": "What role does Mark Stevenson play?", "answer": "['unnamed assistant']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 6, "vectordb_correct": 2, "hybrid_correct": 6, "recall_accuracy": 0.75, "vectordb_accuracy": 0.25, "hybrid_accuracy": 0.75} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-101.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "What is the main female character's name?", "answer": "['Valerie Gail']", "recall": "As an AI language model, I cannot answer this question without additional context. Please provide the name of the book, movie, or TV show you are referring to.", "vectordb": "The main female character's name is Valerie Gail.\n", "hybrid": "The main female character's name is Valerie Gail.\n"}, "q1": {"question": "Who plays Valerie?", "answer": "['Geena Davis']", "recall": "The statement does not provide information on who plays Valerie.", "vectordb": "Valerie is played by Deborah Foreman. \n", "hybrid": "Valerie is played by Deborah Foreman. \n"}, "q2": {"question": "What state does the move take place in?", "answer": "['California']", "recall": "The statement does not provide information about the state where the movie takes place.", "vectordb": "The move takes place in Southern California.\n", "hybrid": "The move takes place in Southern California.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-101_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What is the main female character's name?", "answer": "['Valerie Gail']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Who plays Valerie?", "answer": "['Geena Davis']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "What state does the move take place in?", "answer": "['California']", "recall": 0, "vectordb": 1, "hybrid": 1}, "recall_correct": 0, "vectordb_correct": 3, "hybrid_correct": 3, "recall_accuracy": 0.0, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-105.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 1, "sample_0": {"q0": {"question": "Which major religion is the focus of this film?", "answer": "['Islam']", "recall": "The question cannot be answered based on the given statements as they do not provide information about the focus of the film.", "vectordb": "The film focuses on Islam as a major religion.\n", "hybrid": "The film focuses on Islam as a major religion.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-105_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 1, "graded_questions": 3, "q0": {"quesiton": "Which major religion is the focus of this film?", "answer": "['Islam']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-106_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "Who sees the first murder?", "answer": "['Jayakumar and Balagopalan']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Who got killed?", "answer": "['Indu']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "What is the name of the policeman assigned to the case?", "answer": "['Thomas Mathew']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Is Rachel amish?", "answer": "['no']", "recall": 2, "vectordb": 0, "hybrid": 2}, "recall_correct": 2, "vectordb_correct": 6, "hybrid_correct": 8, "recall_accuracy": 0.25, "vectordb_accuracy": 0.75, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-107_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What profession does Billy Zane play in the film?", "answer": "['Sniper Instructor']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Who must Sgt. Brandon Beckett rescue?", "answer": "['Jean van Brunt']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "Where did the sniper ambush Beckett?", "answer": "['A farm in the Democratic Republic of Congo']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 3, "vectordb_correct": 5, "hybrid_correct": 5, "recall_accuracy": 0.5, "vectordb_accuracy": 0.8333333333333334, "hybrid_accuracy": 0.8333333333333334} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-110_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "Who does Hal meet while stuck in an elevator?", "answer": "['Tony Robbins']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What did Mauricio think when he saw Hal dancing with a fat woman?", "answer": "[\"worried about Hal's new taste in women\"]", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "Where did Hal first meet Tony?", "answer": "['trapped in an elevator']", "recall": 0, "vectordb": 1, "hybrid": 0}, "q3": {"quesiton": "What is the name of the famous American life coach Hal meets?", "answer": "['Tony Robbins']", "recall": 0, "vectordb": 2, "hybrid": 2}, "recall_correct": 2, "vectordb_correct": 5, "hybrid_correct": 4, "recall_accuracy": 0.25, "vectordb_accuracy": 0.625, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-113_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "How are Carlo and Sara killed?", "answer": "['Walter']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "What starts a fire in the Countess' apartment?", "answer": "['explosion of an electrical appliance']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "Who is Rose's brother?", "answer": "['Matthew, Mark, Luke, and Bing.']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q3": {"quesiton": "What actress plays Sara", "answer": "['Sarah Wayne Callies']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q4": {"quesiton": "What is the architect's name?", "answer": "['FRIBA']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-115.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 1, "sample_0": {"q0": {"question": "What was the bride-to-be accused of as a child?", "answer": "['Stealing a locket']", "recall": "As an AI language model, I do not have any information about the bride-to-be's childhood or any accusations made against her. Please provide more context or information for me to assist you better.", "vectordb": "The extracted parts do not provide information about what the bride-to-be was accused of as a child.\nSOURCES:", "hybrid": "The extracted parts do not provide information about what the bride-to-be was accused of as a child.\nSOURCES:"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-115_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 1, "graded_questions": 3, "q0": {"quesiton": "What was the bride-to-be accused of as a child?", "answer": "['Stealing a locket']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-116_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What was inside Duane Bradley's locked basket?", "answer": "['Belial']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Why does Belial attack and kill Sharon?", "answer": "[\"Belial doesn't want Duane to have or sex, or be in love, or be independent\"]", "recall": 0, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "What happens when Duane attempts to kill Belial?", "answer": "['die because they fall through the window because of the fight']", "recall": 0, "vectordb": 1, "hybrid": 1}, "recall_correct": 2, "vectordb_correct": 4, "hybrid_correct": 4, "recall_accuracy": 0.3333333333333333, "vectordb_accuracy": 0.6666666666666666, "hybrid_accuracy": 0.6666666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-121_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "What profession is Gina McVey in ?", "answer": "['Radiologist']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What does Gina see in the doppelganger's apartment ?", "answer": "['A picture of her and John']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "Why does Gina get admitted to the hospital ?", "answer": "['Car accident']", "recall": 2, "vectordb": 1, "hybrid": 1}, "q3": {"quesiton": "What happened to the Original Gina ?", "answer": "['She was killed by Evil GIna']", "recall": 2, "vectordb": 1, "hybrid": 1}, "q4": {"quesiton": "What happened to Gina after the accident ?", "answer": "['awakens in a hospital with no memory of the accident']", "recall": 1, "vectordb": 1, "hybrid": 1}, "recall_correct": 7, "vectordb_correct": 6, "hybrid_correct": 6, "recall_accuracy": 0.7, "vectordb_accuracy": 0.6, "hybrid_accuracy": 0.6} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-122.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "Who leads the rival gang that attacks and captures Tanaka?", "answer": "['Iron Claw Yakuza']", "recall": "The statement does not provide information about a rival gang attacking and capturing Tanaka, so the question cannot be answered.", "vectordb": "Yoshida leads the rival gang that attacks and captures Tanaka.\n", "hybrid": "Yoshida leads the rival gang that attacks and captures Tanaka.\n"}, "q1": {"question": "Who is Kenner's new partner?", "answer": "['Johnny Murata']", "recall": "Kenner's new partner is Johnny Murata, an American of partial Japanese descent.", "vectordb": "Kenner's new partner is Johnny Murata.\n", "hybrid": "Kenner's new partner is Johnny Murata, an American of partial Japanese descent."}, "q2": {"question": "Where is Chris Kenner working?", "answer": "['Little Tokyo in Los Angeles']", "recall": "Chris Kenner is working as a Los Angeles (L.A.) cop.", "vectordb": "Chris Kenner is working as a cop in L.A.'s Little Tokyo.\n", "hybrid": "Chris Kenner is working as a cop in L.A.'s Little Tokyo.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-122_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Who leads the rival gang that attacks and captures Tanaka?", "answer": "['Iron Claw Yakuza']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "Who is Kenner's new partner?", "answer": "['Johnny Murata']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Where is Chris Kenner working?", "answer": "['Little Tokyo in Los Angeles']", "recall": 1, "vectordb": 2, "hybrid": 0}, "recall_correct": 3, "vectordb_correct": 5, "hybrid_correct": 3, "recall_accuracy": 0.5, "vectordb_accuracy": 0.8333333333333334, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-124.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "What was Deven Shah's profession in the movie?", "answer": "['artist']", "recall": "The statements do not provide information about Deven Shah's profession in the movie.", "vectordb": "Deven Shah pretended to be an art aficionado named \"Deven Shah\" and told Raina that he had been loaned the painting she needed by reputed businessmen, the Suri brothers, for an art exhibition. \n", "hybrid": "Deven Shah pretended to be an art aficionado named \"Deven Shah\" and told Raina that he had been loaned the painting she needed by reputed businessmen, the Suri brothers, for an art exhibition. \n"}, "q1": {"question": "What was Sunny Singh's profession in the movie?", "answer": "['secretly']", "recall": "Sunny Singh's profession in the movie was a fitness instructor at a local gym.", "vectordb": "Sunny Singh's profession in the movie was a fitness instructor at a local gym.\n", "hybrid": "Sunny Singh's profession in the movie was a fitness instructor at a local gym.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-124_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "What was Deven Shah's profession in the movie?", "answer": "['artist']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "What was Sunny Singh's profession in the movie?", "answer": "['secretly']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 1, "hybrid_correct": 1, "recall_accuracy": 0.0, "vectordb_accuracy": 0.25, "hybrid_accuracy": 0.25} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-127_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "Why does the fox eat the hearts of men?", "answer": "['to become human again']", "recall": 2, "vectordb": 1, "hybrid": 2}, "q1": {"quesiton": "What does the fox eat?", "answer": "['hearts']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "Who does the fox seduce?", "answer": "['Huo']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q3": {"quesiton": "How does the fox seduce men?", "answer": "['magical powers']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "Is the warrior male or female?", "answer": "['female']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q5": {"quesiton": "Who does the fox meet?", "answer": "[\"Que'er a bird demon\"]", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 2, "vectordb_correct": 5, "hybrid_correct": 6, "recall_accuracy": 0.16666666666666666, "vectordb_accuracy": 0.4166666666666667, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-128.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "What character was played by Robert Wagner?", "answer": "['Lieutenant Ed Pell']", "recall": "As an AI language model, I cannot provide a specific answer without additional context. Robert Wagner has played many characters throughout his career in film and television. Please provide more information about the specific project or production you are referring to.", "vectordb": "Robert Wagner played the character of Lieutenant Ed Pell.\n", "hybrid": "Robert Wagner played the character of Lieutenant Ed Pell.\n"}, "q1": {"question": "What type of squadron die Robert Mitchum join?", "answer": "['A jet fighter squadron']", "recall": "The statements do not provide information about Robert Mitchum joining any squadron.", "vectordb": "Robert Mitchum's character, Major Cleve \"Iceman\" Saville, did not join a squadron. He was assigned command of a flight. \n", "hybrid": "Robert Mitchum's character, Major Cleve \"Iceman\" Saville, did not join a squadron. He was assigned command of a flight. \n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-128_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "What character was played by Robert Wagner?", "answer": "['Lieutenant Ed Pell']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What type of squadron die Robert Mitchum join?", "answer": "['A jet fighter squadron']", "recall": 0, "vectordb": 1, "hybrid": 1}, "recall_correct": 0, "vectordb_correct": 3, "hybrid_correct": 3, "recall_accuracy": 0.0, "vectordb_accuracy": 0.75, "hybrid_accuracy": 0.75} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-12_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Who's story does the movie relate ?", "answer": "['Benito Mussolinis first wife']", "recall": 1, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Who does Ida Dalser fall in love with ?", "answer": "['Mussolini']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "What dictator committed his first wife and son to asylums?", "answer": "['the Duce']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 3, "vectordb_correct": 4, "hybrid_correct": 4, "recall_accuracy": 0.5, "vectordb_accuracy": 0.6666666666666666, "hybrid_accuracy": 0.6666666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-130_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Who shoots the poker cheater?", "answer": "['Pedro']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "What happens to the player of the poker game that cheats?", "answer": "['The crook is killed by Pedro.']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "Why is Juanita arrested?", "answer": "['She is blamed for the death of sheriff, who Pedro has killed.']", "recall": 1, "vectordb": 1, "hybrid": 1}, "recall_correct": 2, "vectordb_correct": 3, "hybrid_correct": 3, "recall_accuracy": 0.3333333333333333, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-131_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "Who is Thorne?", "answer": "['A prisoner in the prison where Joe works as a prison warder.']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "Who plays joe?", "answer": "['Ralph Fiennes']", "recall": 0, "vectordb": 2, "hybrid": 2}, "recall_correct": 1, "vectordb_correct": 3, "hybrid_correct": 3, "recall_accuracy": 0.25, "vectordb_accuracy": 0.75, "hybrid_accuracy": 0.75} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-134.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "Whom is the vampire?", "answer": "['Slovak']", "recall": "There are multiple vampires in the statements, including Gustoff Slovak, Derek Washington (after he turns into a vampire), and Slova (who becomes a vampire).", "vectordb": "The extracted parts do not provide enough information to answer the question. \nSOURCES:", "hybrid": "There are multiple vampires in the statements, including Gustoff Slovak, Derek Washington (after he turns into a vampire), and Slova (who becomes a vampire)."}, "q1": {"question": "What must the protagonist become?", "answer": "['Vampire']", "recall": "As an AI language model, I cannot answer this question without additional context about the protagonist and the story they are a part of. Please provide more information for me to give a relevant response.", "vectordb": "The protagonist must become a vampire hunter and master of the martial arts.\n", "hybrid": "The protagonist must become a vampire hunter and master of the martial arts.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-134_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "Whom is the vampire?", "answer": "['Slovak']", "recall": 1, "vectordb": 0, "hybrid": 1}, "q1": {"quesiton": "What must the protagonist become?", "answer": "['Vampire']", "recall": 0, "vectordb": 1, "hybrid": 1}, "recall_correct": 1, "vectordb_correct": 1, "hybrid_correct": 2, "recall_accuracy": 0.25, "vectordb_accuracy": 0.25, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-138_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "Who does Brian race?", "answer": "[\"two of Verone's drivers\"]", "recall": 2, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "What is Brian's result in the race?", "answer": "['He wins']", "recall": 1, "vectordb": 0, "hybrid": 1}, "q2": {"quesiton": "What happens to Brian after the race?", "answer": "['Roman confronts Brian']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q3": {"quesiton": "Who does Tej call to a race?", "answer": "['Brian']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "Who is Brian O'Conner?", "answer": "['A former LAPD officer']", "recall": 1, "vectordb": 2, "hybrid": 2}, "recall_correct": 7, "vectordb_correct": 6, "hybrid_correct": 7, "recall_accuracy": 0.7, "vectordb_accuracy": 0.6, "hybrid_accuracy": 0.7} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-139_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "Where did Mahadev graduate from?", "answer": "['Satna College.']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Who is Kamla's husband?", "answer": "['Bansi Ram.']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "In which town does Mahadev operate a vegetable store?", "answer": "['Sajjanpur']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q3": {"quesiton": "Which university did Mahadev Kuswah graduate from?", "answer": "['Satna college']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "Who is Ramkumar in love with?", "answer": "['Shobha Rani.']", "recall": 0, "vectordb": 1, "hybrid": 1}, "recall_correct": 0, "vectordb_correct": 7, "hybrid_correct": 7, "recall_accuracy": 0.0, "vectordb_accuracy": 0.7, "hybrid_accuracy": 0.7} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-145_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "What kind of celebration are the characters attending?", "answer": "['Day Of The Dead']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "How old was Director Stutz when his mother died?", "answer": "['suicide']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "How did Director Stutz's mother die?", "answer": "['suicide']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Where is the ceremony at the end of the movie?", "answer": "['at the ocean']", "recall": 0, "vectordb": 1, "hybrid": 0}, "q4": {"quesiton": "The messages are about preventing what?", "answer": "['suicides']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 2, "vectordb_correct": 4, "hybrid_correct": 3, "recall_accuracy": 0.2, "vectordb_accuracy": 0.4, "hybrid_accuracy": 0.3} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-151_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "What does Sam do for a living?", "answer": "['Architect']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Who helps Jonah?", "answer": "['Jessica']", "recall": 2, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "How old is Sam's son when they move to Seattle?", "answer": "['8 years old']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Who found the backpack?", "answer": "['Howard']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q4": {"quesiton": "What is Annie doing with Walter?", "answer": "['Dinner']", "recall": 2, "vectordb": 1, "hybrid": 1}, "q5": {"quesiton": "Who left the backpack?", "answer": "['Jonah']", "recall": 0, "vectordb": 2, "hybrid": 2}, "recall_correct": 6, "vectordb_correct": 7, "hybrid_correct": 7, "recall_accuracy": 0.5, "vectordb_accuracy": 0.5833333333333334, "hybrid_accuracy": 0.5833333333333334} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-153_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "Who is Perry's new cellmate?", "answer": "['James Lacey']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Why is Perry's daughter near death?", "answer": "['Drug overdose']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "Who did Perry offer himself to be killed by ?", "answer": "['Rizza']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q3": {"quesiton": "What are the two narratives the film runs?", "answer": "['preparations and escape']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 1, "hybrid_correct": 1, "recall_accuracy": 0.0, "vectordb_accuracy": 0.125, "hybrid_accuracy": 0.125} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-159_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "Who becomes Annaka's lover?", "answer": "['no asnwer']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What party of the female vagina is sold to a wealthy socialite?", "answer": "['no asnwer']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "Who plays Starrbooty?", "answer": "['RuPaul']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "What do Page and Starrbooty go undercover as?", "answer": "['a hooker (the plot says undercover ho)']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 4, "vectordb_correct": 6, "hybrid_correct": 6, "recall_accuracy": 0.5, "vectordb_accuracy": 0.75, "hybrid_accuracy": 0.75} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-15_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "What is the name of the Lapland woman who runs the reindeer school?", "answer": "['Gerda']", "recall": 0, "vectordb": 0, "hybrid": 2}, "q1": {"quesiton": "Who get frozen when they fight the Queen?", "answer": "['Mother Bird']", "recall": 0, "vectordb": 1, "hybrid": 0}, "q2": {"quesiton": "What did the robber promise his daughter?", "answer": "['This question does not have anything to do with the provided movie plot']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Who is Dimly?", "answer": "['This question does not have anything to do with the provided movie plot']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "What did the princess and prince give to Ellie and Peeps?", "answer": "['This question does not have anything to do with the provided movie plot']", "recall": 0, "vectordb": 2, "hybrid": 2}, "recall_correct": 2, "vectordb_correct": 7, "hybrid_correct": 8, "recall_accuracy": 0.2, "vectordb_accuracy": 0.7, "hybrid_accuracy": 0.8} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-166_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "What is the name of Richard's daughter?", "answer": "['Karen']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "How old is their son,Mimi-Siku?", "answer": "['13']", "recall": 1, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who climbed the Statue Of Liberty?", "answer": "['Mimi-Siku']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "What does Michael Cromwell do for a living?", "answer": "['commodities broker']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "In whose house was Mimi-Siku staying?", "answer": "['Richard Kempster (Martin Short)']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q5": {"quesiton": "Who is Michael's first wife?", "answer": "['Patricia (JoBeth Williams)']", "recall": 0, "vectordb": 2, "hybrid": 2}, "recall_correct": 7, "vectordb_correct": 11, "hybrid_correct": 11, "recall_accuracy": 0.5833333333333334, "vectordb_accuracy": 0.9166666666666666, "hybrid_accuracy": 0.9166666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-168.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "What has spread through China?", "answer": "['revolution']", "recall": "As an AI language model, I do not have access to current news or events. Please provide me with more specific information or context so I can provide a relevant answer.", "vectordb": "There is not enough information to answer the question. \n", "hybrid": "There is not enough information to answer the question. \n"}, "q1": {"question": "Where is this movie set?", "answer": "['china']", "recall": "As an AI language model, I cannot answer this question without additional context or information about the specific movie being referred to. Please provide more details or a specific movie title for me to provide an accurate answer.", "vectordb": "The movie is set in Hong Kong, which was a British colony in 1905.\n", "hybrid": "The movie is set in Hong Kong, which was a British colony in 1905.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-168_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "What has spread through China?", "answer": "['revolution']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Where is this movie set?", "answer": "['china']", "recall": 0, "vectordb": 1, "hybrid": 1}, "recall_correct": 0, "vectordb_correct": 1, "hybrid_correct": 1, "recall_accuracy": 0.0, "vectordb_accuracy": 0.25, "hybrid_accuracy": 0.25} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-173.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "Who does Jimmy become a personal chauffeur for?", "answer": "['Devlin']", "recall": "Jimmy becomes a personal chauffeur for Clark Devlin.", "vectordb": "Jimmy becomes a personal chauffeur for Clark Devlin.\n", "hybrid": "Jimmy becomes a personal chauffeur for Clark Devlin.\n"}, "q1": {"question": "What does Devlin have made for Jimmy?", "answer": "['had ordered a second tuxedo']", "recall": "Devlin has ordered a second tuxedo for Jimmy himself, believing that Jimmy could also be a great agent.", "vectordb": "Devlin had a second tuxedo made for Jimmy, believing that Jimmy could also be a great agent.\n", "hybrid": "Devlin had a second tuxedo made for Jimmy, believing that Jimmy could also be a great agent.\n"}, "q2": {"question": "What is Jimmy's former occupation?", "answer": "['taxi driver']", "recall": "Jimmy's former occupation is a taxi driver.", "vectordb": "Jimmy's former occupation was a taxi driver.\n", "hybrid": "Jimmy's former occupation was a taxi driver.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-173_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Who does Jimmy become a personal chauffeur for?", "answer": "['Devlin']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What does Devlin have made for Jimmy?", "answer": "['had ordered a second tuxedo']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "What is Jimmy's former occupation?", "answer": "['taxi driver']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 6, "vectordb_correct": 6, "hybrid_correct": 6, "recall_accuracy": 1.0, "vectordb_accuracy": 1.0, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-174_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "What does Hana do for work?", "answer": "['Grows food for her family and repairs the house']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "How does Hanna meet Wolf Man?", "answer": "['In college']", "recall": 0, "vectordb": 1, "hybrid": 0}, "q2": {"quesiton": "What is the name of Hana and Wolf Man's daughter?", "answer": "['Yuki']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Where does Ame decide to go?", "answer": "['The mountains']", "recall": 0, "vectordb": 1, "hybrid": 1}, "recall_correct": 2, "vectordb_correct": 5, "hybrid_correct": 4, "recall_accuracy": 0.25, "vectordb_accuracy": 0.625, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-177_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "What is wrong with Frank's walk?", "answer": "['Nothing']", "recall": 0, "vectordb": 0, "hybrid": 1}, "q1": {"quesiton": "Who did Frank take a flight to visit?", "answer": "['Pauline']", "recall": 2, "vectordb": 1, "hybrid": 2}, "q2": {"quesiton": "What is the name of the inspector leading the investigation?", "answer": "['Braddock']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "What is the relationship between Frank and Pauline?", "answer": "['friends']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q4": {"quesiton": "What is Pauline selling?", "answer": "['Boat']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q5": {"quesiton": "Where does Frank fly to in order to visit Pauline?", "answer": "['London']", "recall": 0, "vectordb": 2, "hybrid": 2}, "recall_correct": 3, "vectordb_correct": 6, "hybrid_correct": 8, "recall_accuracy": 0.25, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.6666666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-178.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "Who is Arthur's childhood mentor?", "answer": "['Merlyn.', 'Merlyn.']", "recall": "The statement does not provide information about Arthur's childhood mentor.", "vectordb": "The document does not provide information about Arthur's childhood mentor.\nSOURCES:", "hybrid": "The document does not provide information about Arthur's childhood mentor.\nSOURCES:"}, "q1": {"question": "Who is King Arthur preparing a great battle against?", "answer": "['Sir Lancelot.', 'Sir Lancelot.']", "recall": "The statements do not provide information about who King Arthur is preparing a great battle against.", "vectordb": "King Arthur is preparing a great battle against an unknown enemy. There is no information in the given content about who Arthur is preparing to fight against.\n", "hybrid": "King Arthur is preparing a great battle against an unknown enemy. There is no information in the given content about who Arthur is preparing to fight against.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-178_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "Who is Arthur's childhood mentor?", "answer": "['Merlyn.', 'Merlyn.']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Who is King Arthur preparing a great battle against?", "answer": "['Sir Lancelot.', 'Sir Lancelot.']", "recall": 0, "vectordb": 1, "hybrid": 1}, "recall_correct": 0, "vectordb_correct": 1, "hybrid_correct": 1, "recall_accuracy": 0.0, "vectordb_accuracy": 0.25, "hybrid_accuracy": 0.25} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-186.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 1, "sample_0": {"q0": {"question": "what is Rahul's wife's name?", "answer": "[\"Rahul's wife's name is Priety\"]", "recall": "The statement does not provide information about Rahul's wife's name.", "vectordb": "The name of Rahul's wife is not mentioned in the given content. \nSOURCES:", "hybrid": "The name of Rahul's wife is not mentioned in the given content. \nSOURCES:"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-186_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 1, "graded_questions": 3, "q0": {"quesiton": "what is Rahul's wife's name?", "answer": "[\"Rahul's wife's name is Priety\"]", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-192_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What planes take off ahead of the advancing Germans?", "answer": "['British and French Military']", "recall": 0, "vectordb": 1, "hybrid": 0}, "q1": {"quesiton": "Who is the German Diplomat?", "answer": "['Baron von Richter']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "the battle of britain takes place in what year?", "answer": "['1940']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 2, "vectordb_correct": 3, "hybrid_correct": 2, "recall_accuracy": 0.3333333333333333, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.3333333333333333} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-195.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 1, "sample_0": {"q0": {"question": "Where is the Pro-Am MMA tournament held ?", "answer": "['New Orleans']", "recall": "As an AI language model, I do not have access to current information. However, Pro-Am MMA tournaments can be held in various locations depending on the organizers and the event. It is best to check the official website or social media pages of the tournament for the exact location and schedule.", "vectordb": "The location of the Pro-Am MMA tournament is not mentioned in the given content.\nSOURCES:", "hybrid": "As an AI language model, I do not have access to current information. However, Pro-Am MMA tournaments can be held in various locations depending on the organizers and the event. It is best to check the official website or social media pages of the tournament for the exact location and schedule."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-195_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 1, "graded_questions": 3, "q0": {"quesiton": "Where is the Pro-Am MMA tournament held ?", "answer": "['New Orleans']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-196_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "What were the people going to wired to?", "answer": "['Marseille, France']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "What is the name of the radical Neo-Luddite group?", "answer": "['Machine Strikers']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "Who is the security guard?", "answer": "['Samuel Decker']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q3": {"quesiton": "What does the remaining crew destroy upon arrival?", "answer": "['The antennae system of Station 42.']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q4": {"quesiton": "Who discovered the truth?", "answer": "['Baptist']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q5": {"quesiton": "What kind of place is Rhea suppose to be ?", "answer": "['distant planet', 'A planet']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-201_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Blind people want not sympathy but what?", "answer": "['Help']", "recall": 1, "vectordb": 2, "hybrid": 1}, "q1": {"quesiton": "Sparsh doesn't have what?", "answer": "['sight']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "What type of people are featured in the movie Sparsh?", "answer": "['blind children and their principal']", "recall": 1, "vectordb": 2, "hybrid": 1}, "recall_correct": 2, "vectordb_correct": 4, "hybrid_correct": 2, "recall_accuracy": 0.3333333333333333, "vectordb_accuracy": 0.6666666666666666, "hybrid_accuracy": 0.3333333333333333} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-203_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Who did Suzanne's husband hire to build her an office?", "answer": "['Spaniard Ivan']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What does Suzanne do for a living?", "answer": "['sexually involved with Spaniard Ivan']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "With whom was Suzanne in affair with?", "answer": "['Ivan']", "recall": 2, "vectordb": 0, "hybrid": 2}, "recall_correct": 4, "vectordb_correct": 2, "hybrid_correct": 4, "recall_accuracy": 0.6666666666666666, "vectordb_accuracy": 0.3333333333333333, "hybrid_accuracy": 0.6666666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-204_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "when did President Richard Nixon end manned missions to the Moon?", "answer": "['1972']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "when does First World commence?", "answer": "['1969']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "who discovered civilization on the moon?", "answer": "['Apollo 11']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 2, "vectordb_correct": 4, "hybrid_correct": 4, "recall_accuracy": 0.3333333333333333, "vectordb_accuracy": 0.6666666666666666, "hybrid_accuracy": 0.6666666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-205_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Tom has to take care of what animal while his boss is out of town?", "answer": "['The owl O-J']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Lisa's brother has some dealings with the local what?", "answer": "['drug dealers']", "recall": 2, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "How many nights will Tom's boss be out of town?", "answer": "['one night']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 4, "vectordb_correct": 2, "hybrid_correct": 2, "recall_accuracy": 0.6666666666666666, "vectordb_accuracy": 0.3333333333333333, "hybrid_accuracy": 0.3333333333333333} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-208_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "What did Sophie do with the $5000 she got from Oz?", "answer": "['Tried to hire contract killer for her husband']", "recall": 1, "vectordb": 0, "hybrid": 1}, "q1": {"quesiton": "What is Nicolas's profession?", "answer": "['Dentist']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "What is Jimmy Jones nickname", "answer": "['Jimmy \"the tulip\"']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q3": {"quesiton": "Where is Oz from?", "answer": "['Quebec']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q4": {"quesiton": "Who did Oz sleep with in Chicago?", "answer": "['Cynthia']", "recall": 2, "vectordb": 0, "hybrid": 0}, "q5": {"quesiton": "How does Oz recognize Jimmy?", "answer": "['The hitter is distracted']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 3, "vectordb_correct": 2, "hybrid_correct": 3, "recall_accuracy": 0.25, "vectordb_accuracy": 0.16666666666666666, "hybrid_accuracy": 0.25} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-213.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "Who plays Christine?", "answer": "['Erin Chambers.']", "recall": "Erin Chambers plays Christine.", "vectordb": "Erin Chambers plays Christine.\n", "hybrid": "Erin Chambers plays Christine."}, "q1": {"question": "Who plays Jonathan?", "answer": "['Will Swenson.']", "recall": "Will Swenson plays Jonathan.", "vectordb": "Will Swenson plays Jonathan.\n", "hybrid": "Will Swenson plays Jonathan."}, "q2": {"question": "Who plays Dalen?", "answer": "['Kirby Heyborne.']", "recall": "Dalen is played by \"some guy who's been in every LDS movie ever made,\" who looks exactly like Dalen (also played by Heyborne).", "vectordb": "Kirby Heyborne plays Dalen.\n", "hybrid": "Kirby Heyborne plays Dalen.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-213_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Who plays Christine?", "answer": "['Erin Chambers.']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Who plays Jonathan?", "answer": "['Will Swenson.']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who plays Dalen?", "answer": "['Kirby Heyborne.']", "recall": 1, "vectordb": 2, "hybrid": 2}, "recall_correct": 5, "vectordb_correct": 6, "hybrid_correct": 6, "recall_accuracy": 0.8333333333333334, "vectordb_accuracy": 1.0, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-215_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "Why did Zaara goto India?", "answer": "['To scatter the ashes of Bebe, her old governess, in the Sutlej river']", "recall": 1, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What is Veer Pratap Singh's job?", "answer": "['Squadron Leader (pilot) in the Indian Air Force']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "What sort of story is Veer-Zaara?", "answer": "['A love story concerning the demands and the precision of destiny']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Where is Zaara from?", "answer": "['Lahore, Pakistan']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 7, "vectordb_correct": 8, "hybrid_correct": 8, "recall_accuracy": 0.875, "vectordb_accuracy": 1.0, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-216_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "Why did Sriram Abhyankar and the Hindu militia perceive Mahatma Ghandi as a traitor?", "answer": "['treachery to Hindu Indians']", "recall": 1, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Who murders Mahatma Gandhi?", "answer": "['Nathuram Godse']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "What happens after Saket saves the innocent Sikh girl?", "answer": "['A Muslim mob rapes and kills his wife.']", "recall": 1, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Where did Saket pass away?", "answer": "['madras']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "What was given to Saket at his funeral?", "answer": "[\"Gandhi's footwear and spectacles\"]", "recall": 0, "vectordb": 2, "hybrid": 2}, "recall_correct": 6, "vectordb_correct": 10, "hybrid_correct": 10, "recall_accuracy": 0.6, "vectordb_accuracy": 1.0, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-217_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "how is this movie", "answer": "['seems good']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "How does the young mans wife die?", "answer": "['The Young man murders her', 'he murders her']", "recall": 2, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "what to do female teacher", "answer": "['seduce nearly all the men']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q3": {"quesiton": "who arrives in the town to teach", "answer": "['Reza']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q4": {"quesiton": "who kill his wife", "answer": "['Trisa']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 3, "vectordb_correct": 2, "hybrid_correct": 2, "recall_accuracy": 0.3, "vectordb_accuracy": 0.2, "hybrid_accuracy": 0.2} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-223_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "who invites Claude, Sonia and Kevin for an idyllic birthday lunch?", "answer": "['Clive']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What's the name of Claude's wife", "answer": "['Sonia']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "Where does clive host the birthday lunch?", "answer": "['Front lawn']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "what's the name of clive's illegitimate son ?", "answer": "['Kevin']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q4": {"quesiton": "Which year birthday is write clive langham about to celebrate ?", "answer": "['78th']", "recall": 0, "vectordb": 1, "hybrid": 1}, "recall_correct": 4, "vectordb_correct": 5, "hybrid_correct": 5, "recall_accuracy": 0.4, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-227.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "Who is the hero in Neil's popular spy novels?", "answer": "['Tripp Zoome (Jonathan Keltz)']", "recall": "The hero in Neil's popular spy novels is Neil himself.", "vectordb": "The hero in Neil's popular spy novels is Tripp Zoome.\n", "hybrid": "The hero in Neil's popular spy novels is Tripp Zoome.\n"}, "q1": {"question": "What is Melissa's father's name?", "answer": "['Neal']", "recall": "Melissa's father's name is Neal.", "vectordb": "Melissa's father's name is Neal.\n", "hybrid": "Melissa's father's name is Neal.\n"}, "q2": {"question": "Who are the obsessive fans that kidnap Neil the first time?", "answer": "['Wheeze (David Henrie) with his friend, Sheldon (Denzel Whitaker) and his brother, Andre (Moiss Arias)']", "recall": "The obsessive fans that kidnap Neil the first time are Wheeze, Sheldon, and Andre.", "vectordb": "The obsessive fans that kidnap Neil the first time are Wheeze, Sheldon, and Andre.\n", "hybrid": "The obsessive fans that kidnap Neil the first time are Wheeze, Sheldon, and Andre."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-227_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Who is the hero in Neil's popular spy novels?", "answer": "['Tripp Zoome (Jonathan Keltz)']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What is Melissa's father's name?", "answer": "['Neal']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who are the obsessive fans that kidnap Neil the first time?", "answer": "['Wheeze (David Henrie) with his friend, Sheldon (Denzel Whitaker) and his brother, Andre (Moiss Arias)']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 4, "vectordb_correct": 6, "hybrid_correct": 6, "recall_accuracy": 0.6666666666666666, "vectordb_accuracy": 1.0, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-229.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "When does the movie take place?", "answer": "['1920']", "recall": "Question:\n\nCan you provide more information about the movie you are referring to?", "vectordb": "The movie takes place in County Cork, Ireland in 1920.\n", "hybrid": "The movie takes place in County Cork, Ireland in 1920.\n"}, "q1": {"question": "Who does the guerrilla army face?", "answer": "['the British Army']", "recall": "The guerrilla army faces the British Army.", "vectordb": "The guerrilla army faces the Free State forces.\n", "hybrid": "The guerrilla army faces the British Army."}, "q2": {"question": "Who abandons his burgeoning career as a doctor?", "answer": "[\"Damien O'Donovan\"]", "recall": "The person who abandons his burgeoning career as a doctor is someone who has made the personal decision to leave the medical profession and pursue a different path or career.", "vectordb": "Damien O'Donovan abandons his burgeoning career as a doctor.\n", "hybrid": "Damien O'Donovan abandons his burgeoning career as a doctor.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-229_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "When does the movie take place?", "answer": "['1920']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Who does the guerrilla army face?", "answer": "['the British Army']", "recall": 2, "vectordb": 0, "hybrid": 2}, "q2": {"quesiton": "Who abandons his burgeoning career as a doctor?", "answer": "[\"Damien O'Donovan\"]", "recall": 0, "vectordb": 2, "hybrid": 2}, "recall_correct": 2, "vectordb_correct": 2, "hybrid_correct": 4, "recall_accuracy": 0.3333333333333333, "vectordb_accuracy": 0.3333333333333333, "hybrid_accuracy": 0.6666666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-230_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "Was Pluto mistaken for the prince?", "answer": "['Yes']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Did the Prince wreak havoc on his people?", "answer": "['No']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who sneaks into the castle disguised as an executioner?", "answer": "['Goofy']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Who is driving the cart full of sausages?", "answer": "['Captain Pete']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q4": {"quesiton": "What character does the guard think mistake for a Prince?", "answer": "['Mickey', 'for the pauper']", "recall": 1, "vectordb": 1, "hybrid": 1}, "recall_correct": 5, "vectordb_correct": 5, "hybrid_correct": 5, "recall_accuracy": 0.5, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-237_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "Who does Paul choose to live with?", "answer": "['a male roommate']", "recall": 1, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Who was Paul's girlfriend?", "answer": "['Agnes']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "Who goes on a journey of discovery?", "answer": "['about class, cultural differences and sexuality']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q3": {"quesiton": "Who did Agnes suspect to have a relationship with Paul?", "answer": "['Meric']", "recall": 2, "vectordb": 0, "hybrid": 2}, "recall_correct": 3, "vectordb_correct": 0, "hybrid_correct": 2, "recall_accuracy": 0.375, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.25} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-239_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "What did Andrew and Dave see when they opened the door?", "answer": "['Everything outside is gen and replaced by a void.']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What do Andew and Dave do when they open the door?", "answer": "['Step out into the tofu like void.']", "recall": 1, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who frames Dave?", "answer": "['His girlfriend.']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "What is Andrew's job?", "answer": "['A travel agent.']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q4": {"quesiton": "What is Andrew accused of?", "answer": "['Andrew is accused of attempted child molestation.']", "recall": 0, "vectordb": 1, "hybrid": 0}, "recall_correct": 3, "vectordb_correct": 7, "hybrid_correct": 6, "recall_accuracy": 0.3, "vectordb_accuracy": 0.7, "hybrid_accuracy": 0.6} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-245_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "Before the showdown between Von Rotten and Buffalo Bill, what happens before anyone is shot?", "answer": "['Willy and Pablo foil his scheme']", "recall": 2, "vectordb": 1, "hybrid": 2}, "q1": {"quesiton": "How does Von Rotten show the students history?", "answer": "['to be recorded as the greatest name in history', 'Time Machine']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "Who does Willy go back in time to stop?", "answer": "['Professor Von Rotten']", "recall": 2, "vectordb": 1, "hybrid": 1}, "q3": {"quesiton": "Where do Willy and Pablo go to stop Von Rotten?", "answer": "['Egypt']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q4": {"quesiton": "What does Von Rotten disguise himself as?", "answer": "['Chinese interpreter', 'Black Knight']", "recall": 0, "vectordb": 2, "hybrid": 0}, "recall_correct": 4, "vectordb_correct": 6, "hybrid_correct": 5, "recall_accuracy": 0.4, "vectordb_accuracy": 0.6, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-251.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "What state was Ernie Davis raised in?", "answer": "['Pennsylvania']", "recall": "Ernie Davis was raised in Pennsylvania.", "vectordb": "Ernie Davis was raised in Pennsylvania and later moved to Elmira, New York.\n", "hybrid": "Ernie Davis was raised in Pennsylvania and later moved to Elmira, New York.\n"}, "q1": {"question": "At what school was Ernie Davis a running back?", "answer": "['Syracuse University']", "recall": "Ernie Davis was a running back at Syracuse University.", "vectordb": "Ernie Davis was a running back at Syracuse University.\n", "hybrid": "Ernie Davis was a running back at Syracuse University."}, "q2": {"question": "Who was first African-American to win the Heisman Trophy?", "answer": "['Ernie Davis']", "recall": "Ernie Davis was the first African-American to win the Heisman Trophy.", "vectordb": "Ernie Davis was the first African-American to win the Heisman Trophy.\n", "hybrid": "Ernie Davis was the first African-American to win the Heisman Trophy."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-251_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What state was Ernie Davis raised in?", "answer": "['Pennsylvania']", "recall": 2, "vectordb": 2, "hybrid": 1}, "q1": {"quesiton": "At what school was Ernie Davis a running back?", "answer": "['Syracuse University']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who was first African-American to win the Heisman Trophy?", "answer": "['Ernie Davis']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 6, "vectordb_correct": 6, "hybrid_correct": 5, "recall_accuracy": 1.0, "vectordb_accuracy": 1.0, "hybrid_accuracy": 0.8333333333333334} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-254_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "What does Ed do when he learns of Connie's infidelity?", "answer": "['kills paul']", "recall": 2, "vectordb": 1, "hybrid": 2}, "q1": {"quesiton": "What is the relation between Ed and Connie?", "answer": "['husband wife']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Does Ed receive punishment for his crimes?", "answer": "['no']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q3": {"quesiton": "How does Connie meet Paul?", "answer": "['Runs into him during a windstorm']", "recall": 1, "vectordb": 0, "hybrid": 0}, "recall_correct": 5, "vectordb_correct": 4, "hybrid_correct": 5, "recall_accuracy": 0.625, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.625} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-257_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "How did Stephanie get pregnant?", "answer": "['A boy she met at a party']", "recall": 2, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Where did Stephanie deliver her baby?", "answer": "['In the bathroom']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "In whom is the forensic psychologist investigating Stephanie's case?", "answer": "['Lydie Crane']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 5, "vectordb_correct": 3, "hybrid_correct": 3, "recall_accuracy": 0.8333333333333334, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-261.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 4, "sample_0": {"q0": {"question": "Who became a handyman at a Philadelphia apartment complex?", "answer": "['Cleveland Heep']", "recall": "Cleveland Heep became a handyman at a Philadelphia apartment complex.", "vectordb": "Cleveland Heep became a handyman at a Philadelphia apartment complex.\n", "hybrid": "Cleveland Heep became a handyman at a Philadelphia apartment complex."}, "q1": {"question": "Who attacks Story?", "answer": "['The Scrunt']", "recall": "The Scrunt attacks Story.", "vectordb": "The Scrunt attacks Story.\n", "hybrid": "The Scrunt attacks Story."}, "q2": {"question": "Who plays Cleveland Heep?", "answer": "['Paul Giamatti']", "recall": "Paul Giamatti", "vectordb": "Paul Giamatti plays Cleveland Heep.\n", "hybrid": "Paul Giamatti plays Cleveland Heep.\n"}, "q3": {"question": "Who is killed by the Scrunt?", "answer": "['The arrogant movie critic']", "recall": "No one is killed by the Scrunt.", "vectordb": "The Scrunt kills Heep.\n", "hybrid": "The Scrunt kills Heep.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-261_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "Who became a handyman at a Philadelphia apartment complex?", "answer": "['Cleveland Heep']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Who attacks Story?", "answer": "['The Scrunt']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who plays Cleveland Heep?", "answer": "['Paul Giamatti']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Who is killed by the Scrunt?", "answer": "['The arrogant movie critic']", "recall": 1, "vectordb": 0, "hybrid": 0}, "recall_correct": 7, "vectordb_correct": 6, "hybrid_correct": 6, "recall_accuracy": 0.875, "vectordb_accuracy": 0.75, "hybrid_accuracy": 0.75} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-270.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "What kind of busiiness do they start?", "answer": "['wedding planning']", "recall": "It is unclear who \"they\" refers to in the statement. Could you please provide more context or specify the individuals you are referring to?", "vectordb": "They start a wedding planning business called Shaadi Mubaarak.\n", "hybrid": "They start a wedding planning business called Shaadi Mubaarak.\n"}, "q1": {"question": "What is the guys name?", "answer": "['Bittoo Sharma', 'Bittoo']", "recall": "I'm sorry, but I cannot provide the name of a specific person without more information.", "vectordb": "The guy's name is Bittoo Sharma.\n", "hybrid": "The guy's name is Bittoo Sharma.\n"}, "q2": {"question": "What is the girls name?", "answer": "['Shruti', 'Shruti Kakkar']", "recall": "The girl's name is Shruti.", "vectordb": "The girl's name is Shruti Kakkar.\n", "hybrid": "The girl's name is Shruti Kakkar.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-270_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What kind of busiiness do they start?", "answer": "['wedding planning']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "What is the guys name?", "answer": "['Bittoo Sharma', 'Bittoo']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "What is the girls name?", "answer": "['Shruti', 'Shruti Kakkar']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 2, "vectordb_correct": 5, "hybrid_correct": 5, "recall_accuracy": 0.3333333333333333, "vectordb_accuracy": 0.8333333333333334, "hybrid_accuracy": 0.8333333333333334} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-271_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Where does the killings take place?", "answer": "['Whitechapel district']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "The killer's methods are identical to what 19th century psychopath?", "answer": "['Jack the Ripper']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q2": {"quesiton": "Who stars in the movie \"The Lodger?\"", "answer": "['Slade']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 2, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.3333333333333333, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-272_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "What does Valerie do on the full moon?", "answer": "['Turn into a sipder']", "recall": 1, "vectordb": 2, "hybrid": 1}, "q1": {"quesiton": "What is Laura Lockwood like?", "answer": "['She is troubled by a curse, mental illness, and feelings of competitiveness with her twin.']", "recall": 1, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Where do the murder s take place?", "answer": "['Los Angles']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "What is the detective's name?", "answer": "['Mark Higbie']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "What are the names of the fraternal twin sisters?", "answer": "['Leigh and Laura']", "recall": 0, "vectordb": 2, "hybrid": 2}, "recall_correct": 4, "vectordb_correct": 10, "hybrid_correct": 9, "recall_accuracy": 0.4, "vectordb_accuracy": 1.0, "hybrid_accuracy": 0.9} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-274_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Who helps Quatermass track the Carroon-creature to Westminster Abbey?", "answer": "['BBC outside broadcasting']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "What does Quatermass use to conclude that Carroon has been completely taken over by some kind of alien and will eventually release reproductive spores?", "answer": "[\"The fingerprints don't match the astronaut's and are alien fingerprints\"]", "recall": 1, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "How does Quatermass kill the Carroon-creature?", "answer": "['He electrocutes it on the scaffolding']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 2, "vectordb_correct": 3, "hybrid_correct": 3, "recall_accuracy": 0.3333333333333333, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-275_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What supernatural character appears during the car crash?", "answer": "['the devil']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Jean and Paul want to pray over what Saint's remains?", "answer": "['St. James']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q2": {"quesiton": "Jean and Paul are on a pilgrimage to what Spanish city?", "answer": "['Santiago de Compostela']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 2, "vectordb_correct": 4, "hybrid_correct": 2, "recall_accuracy": 0.3333333333333333, "vectordb_accuracy": 0.6666666666666666, "hybrid_accuracy": 0.3333333333333333} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-278_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What does the sheriff order to kill the giant spider?", "answer": "['Frank']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "who falls impaling itself on stalagmites at the bottom of the cave.?", "answer": "['ike and Carol are covered with rocks and dirt. They are s']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "Who is their science teacher?", "answer": "['. It shrieks in pain and falls. Cagle an']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-27_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "Which subject she teach the movie?", "answer": "['Math']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Mona gets a job at an elementary school as what?", "answer": "['math teacher']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Where she gets a job?", "answer": "['Elementary school']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "What was the name of girl in the movie?", "answer": "['Mona Gray']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q4": {"quesiton": "Mona discovers that she has an unorthodox talent for what?", "answer": "['teaching']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q5": {"quesiton": "What was the occupation of Mona's father?", "answer": "['teacher']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 6, "vectordb_correct": 8, "hybrid_correct": 8, "recall_accuracy": 0.5, "vectordb_accuracy": 0.6666666666666666, "hybrid_accuracy": 0.6666666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-280_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "who planned to build a home for their love?", "answer": "['Good']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Does Khushi always want to do her duty?", "answer": "['no']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "what is the name of ranbir's love?", "answer": "['Good']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q3": {"quesiton": "Is Khushi a careful worker?", "answer": "['No mention']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "who gets dismissed from their job?", "answer": "['Good']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q5": {"quesiton": "Do Khushi and Ranbir end up falling in love?", "answer": "['Yes']", "recall": 2, "vectordb": 0, "hybrid": 0}, "recall_correct": 2, "vectordb_correct": 2, "hybrid_correct": 2, "recall_accuracy": 0.16666666666666666, "vectordb_accuracy": 0.16666666666666666, "hybrid_accuracy": 0.16666666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-281.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 1, "sample_0": {"q0": {"question": "how many girls committed suicide at the boarding school?", "answer": "['3 girls']", "recall": "Three girls committed suicide at the boarding school.", "vectordb": "There were three girls who committed suicide at the boarding school.\n", "hybrid": "Three girls committed suicide at the boarding school."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-281_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 1, "graded_questions": 3, "q0": {"quesiton": "how many girls committed suicide at the boarding school?", "answer": "['3 girls']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 2, "vectordb_correct": 2, "hybrid_correct": 2, "recall_accuracy": 1.0, "vectordb_accuracy": 1.0, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-286_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "Where was Varinia from?", "answer": "['Gaul', 'Sparta']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "What school was Spartacus assigned to?", "answer": "[\"Batiatus'\"]", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Spartacus and Antoninus are to engage in a fight to the death. Who was defeated?", "answer": "['Antoninus']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Spartacus leaves Italy and hires a pirate to supply him with what?", "answer": "['money']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q4": {"quesiton": "How many Gladiators were demanded to fight to the death?", "answer": "['10']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q5": {"quesiton": "Where does Spartacus train his men?", "answer": "[\"Batiatus' ludus\"]", "recall": 1, "vectordb": 1, "hybrid": 2}, "recall_correct": 6, "vectordb_correct": 8, "hybrid_correct": 7, "recall_accuracy": 0.5, "vectordb_accuracy": 0.6666666666666666, "hybrid_accuracy": 0.5833333333333334} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-287_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "What is Chantelle's profession?", "answer": "['Nurse']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Where is her family's old home?", "answer": "['Louisiana']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "In what city is May-Alice working as an actress?", "answer": "['Manhattan']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Who is a daytime soap opera actress in this movie?", "answer": "['Mary McDonnell']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 4, "vectordb_correct": 6, "hybrid_correct": 6, "recall_accuracy": 0.5, "vectordb_accuracy": 0.75, "hybrid_accuracy": 0.75} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-289_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "where is the story set?", "answer": "['Bulgaria']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "whose mother is a prostitute?", "answer": "['Jasna']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "Who's mother is a prostitute?", "answer": "[\"Jasna's mother\"]", "recall": 0, "vectordb": 0, "hybrid": 0}, "q3": {"quesiton": "who does tsane meet in the city?", "answer": "[\"grandfather's stepbrother\"]", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-290_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "who was scared of veer?", "answer": "['the chief minister']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What was the dog's name?", "answer": "['what dog?']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "What does Veer start having while living with his parents?", "answer": "['visions']", "recall": 2, "vectordb": 0, "hybrid": 2}, "q3": {"quesiton": "Who was happily living with his parents?", "answer": "['Veer']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "Who did the bad guys kill?", "answer": "['mehak']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q5": {"quesiton": "Who was Veer's brother?", "answer": "['Ali']", "recall": 0, "vectordb": 2, "hybrid": 0}, "recall_correct": 6, "vectordb_correct": 8, "hybrid_correct": 8, "recall_accuracy": 0.5, "vectordb_accuracy": 0.6666666666666666, "hybrid_accuracy": 0.6666666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-296_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "Where must Stevie go to pick up stolen jewelry?", "answer": "['Melbourne']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What does Stevie's job require him to sell?", "answer": "['dodgy cars']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "What kind of fight causes the wrong briefcase to be taken?", "answer": "['gunfight']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Who owns the briefcase the two accidentally grab?", "answer": "['Peter Cho']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 8, "vectordb_correct": 8, "hybrid_correct": 8, "recall_accuracy": 1.0, "vectordb_accuracy": 1.0, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-298_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "What disease does Jenny appear to have?", "answer": "['Again, Jenny is not a character in the plot.']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "How did Jenny find out about her condition?", "answer": "['No condition can be gathered about Jenny because Jenny is not in the plot.']", "recall": 0, "vectordb": 2, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 4, "hybrid_correct": 2, "recall_accuracy": 0.0, "vectordb_accuracy": 1.0, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-299_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "what was the age of vanya solntsev", "answer": "['Six-year-old']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "what are the hopes of vanya solntev", "answer": "['Of getting adopted']", "recall": 0, "vectordb": 1, "hybrid": 0}, "q2": {"quesiton": "what are the challenges taken by vanyasolntev", "answer": "['identify his mother']", "recall": 0, "vectordb": 2, "hybrid": 2}, "recall_correct": 2, "vectordb_correct": 5, "hybrid_correct": 4, "recall_accuracy": 0.3333333333333333, "vectordb_accuracy": 0.8333333333333334, "hybrid_accuracy": 0.6666666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-300.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 1, "sample_0": {"q0": {"question": "How did Tien lose his parents?", "answer": "['In a vicious assassins']", "recall": "Tien lost his parents in a massacre where he became the sole survivor.", "vectordb": "Tien lost his parents in a massacre orchestrated by Lord Rajasena. He was the only survivor and managed to escape with deep vengeance in his heart.\n", "hybrid": "Tien lost his parents in a massacre orchestrated by Lord Rajasena. He was the only survivor and managed to escape with deep vengeance in his heart.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-300_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 1, "graded_questions": 3, "q0": {"quesiton": "How did Tien lose his parents?", "answer": "['In a vicious assassins']", "recall": 1, "vectordb": 2, "hybrid": 2}, "recall_correct": 1, "vectordb_correct": 2, "hybrid_correct": 2, "recall_accuracy": 0.5, "vectordb_accuracy": 1.0, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-301.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "Who lives next door?", "answer": "[\"Charles Brown (Paul Schneider)\u2014Keats' friend, roommate, and associate in writing\"]", "recall": "Question:\n\nWho is your neighbor?", "vectordb": "The content does not provide information about who lives next door.\nSOURCES:", "hybrid": "The content does not provide information about who lives next door.\nSOURCES:"}, "q1": {"question": "What does John Keats teach Fanny?", "answer": "['Poetry']", "recall": "John Keats teaches Fanny poetry.", "vectordb": "John Keats teaches Fanny poetry.\n", "hybrid": "John Keats teaches Fanny poetry.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-301_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "Who lives next door?", "answer": "[\"Charles Brown (Paul Schneider)\u2014Keats' friend, roommate, and associate in writing\"]", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "What does John Keats teach Fanny?", "answer": "['Poetry']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 2, "vectordb_correct": 2, "hybrid_correct": 2, "recall_accuracy": 0.5, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-303_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What does Dr. Kozak kidnap?", "answer": "['Dr. Kozak kidnap a 300-year-old sacred dog named Khyi Yang Po.']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Who rescues the sheepdog?", "answer": "['Dave Douglas rescues the sheepdog.']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "Who is the District Attorney?", "answer": "['Dave Douglas is the District Attorney.']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-304.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "Who is Valerie's new beau?", "answer": "['Michael Morrell']", "recall": "Valerie's new beau is successful architect Michael Morrell.", "vectordb": "The information does not provide the identity of Valerie's new beau.\n", "hybrid": "Valerie's new beau is successful architect Michael Morrell."}, "q1": {"question": "Who does Valerie Stanton kill?", "answer": "['Gordon Dunning']", "recall": "Valerie Stanton kills Gordon Dunning.", "vectordb": "Valerie Stanton kills Gordon Dunning.\n", "hybrid": "Valerie Stanton kills Gordon Dunning."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-304_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "Who is Valerie's new beau?", "answer": "['Michael Morrell']", "recall": 2, "vectordb": 0, "hybrid": 2}, "q1": {"quesiton": "Who does Valerie Stanton kill?", "answer": "['Gordon Dunning']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 4, "vectordb_correct": 2, "hybrid_correct": 4, "recall_accuracy": 1.0, "vectordb_accuracy": 0.5, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-310_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What does this machine do?", "answer": "['For torture and brainwashing']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "After the military takeover of their project, why do the scientists continue working on their project?", "answer": "['Monitoring the equipment']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "What is the name of the mental torture device?", "answer": "['Project Brainstorm']", "recall": 0, "vectordb": 2, "hybrid": 2}, "recall_correct": 1, "vectordb_correct": 5, "hybrid_correct": 5, "recall_accuracy": 0.16666666666666666, "vectordb_accuracy": 0.8333333333333334, "hybrid_accuracy": 0.8333333333333334} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-311.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "What does Jack Rebney do for a living?", "answer": "[\"He sells RV's\"]", "recall": "Jack Rebney was once a news broadcaster and editor.", "vectordb": "Jack Rebney is a retired news broadcaster and editor.\n", "hybrid": "Jack Rebney was once a news broadcaster and editor."}, "q1": {"question": "Who is searching for Jack Rebney?", "answer": "['Steinbauer']", "recall": "Steinbauer is searching for Jack Rebney.", "vectordb": "Steinbauer is searching for Jack Rebney.\n", "hybrid": "Steinbauer is searching for Jack Rebney."}, "q2": {"question": "Where does Jack Rebney live?", "answer": "['A remote mountain area in California']", "recall": "Jack Rebney lives in a remote mountain area in California.", "vectordb": "Jack Rebney lives in a remote mountain area in California.\n", "hybrid": "Jack Rebney lives in a remote mountain area in California."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-311_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What does Jack Rebney do for a living?", "answer": "[\"He sells RV's\"]", "recall": 1, "vectordb": 0, "hybrid": 1}, "q1": {"quesiton": "Who is searching for Jack Rebney?", "answer": "['Steinbauer']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Where does Jack Rebney live?", "answer": "['A remote mountain area in California']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 5, "vectordb_correct": 4, "hybrid_correct": 5, "recall_accuracy": 0.8333333333333334, "vectordb_accuracy": 0.6666666666666666, "hybrid_accuracy": 0.8333333333333334} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-312_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Who are Arcs and Fallens?", "answer": "['archangels (Arcs) from Heaven, fallen angels (Fallen) from Hell']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Which Film reveals the battle between good and evil and the fight for the human soul?", "answer": "['Gabriel']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q2": {"quesiton": "Who is Gabriel predecessor?", "answer": "['Michael', 'Archbishop Serge (Konovaloff)']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 2, "vectordb_correct": 4, "hybrid_correct": 2, "recall_accuracy": 0.3333333333333333, "vectordb_accuracy": 0.6666666666666666, "hybrid_accuracy": 0.3333333333333333} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-317_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "What is Mary's job?", "answer": "['crossword puzzle writer']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What did Mary fall into?", "answer": "['mine']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who is Mary set up on a blind date with?", "answer": "['Steve Miller']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "What is Hartman Hughes prfession in this movie?", "answer": "['CCN news reporter']", "recall": 2, "vectordb": 1, "hybrid": 2}, "q4": {"quesiton": "Who is feeling guilty that Mary is in the mineshaft?", "answer": "['Hartman Hughes']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 10, "vectordb_correct": 9, "hybrid_correct": 10, "recall_accuracy": 1.0, "vectordb_accuracy": 0.9, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-322.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 1, "sample_0": {"q0": {"question": "What is Doob's job?", "answer": "['The DNA tests reveal the killer, Robert Doob, a delivery man with a criminal record.']", "recall": "The statements do not provide any information about Doob's job.", "vectordb": "Doob's job is not mentioned in the provided content.\nSOURCES:", "hybrid": "The statements do not provide any information about Doob's job."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-322_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 1, "graded_questions": 3, "q0": {"quesiton": "What is Doob's job?", "answer": "['The DNA tests reveal the killer, Robert Doob, a delivery man with a criminal record.']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-325_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What country will the film about Columbus be shot in?", "answer": "['Bolivia']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Who was first Indian leader to resist?", "answer": "['Atuey']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "Who does Sebastian want to make a film about?", "answer": "['Christopher Columbus']", "recall": 1, "vectordb": 1, "hybrid": 1}, "recall_correct": 3, "vectordb_correct": 3, "hybrid_correct": 3, "recall_accuracy": 0.5, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-327_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Who is a lifelong member of the Security service?", "answer": "['Johnny Worricker']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Who is Worricker's boss?", "answer": "['Benedict Baron']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "Who is the British Prime Minister?", "answer": "['Alec Beasley']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 1, "vectordb_correct": 1, "hybrid_correct": 1, "recall_accuracy": 0.16666666666666666, "vectordb_accuracy": 0.16666666666666666, "hybrid_accuracy": 0.16666666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-329_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "What does Gerald inherit from his uncle?", "answer": "['Craven Castle', 'The castle']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "Who was Gerald MacTeam engaged to?", "answer": "['Kitty', 'Kitty Murray', 'kitty']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who were Gerald's servants?", "answer": "['Robert and William', 'William and Robert', 'Simon, Robert, William']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Where do they invite a group of friends?", "answer": "['Engagement Party', 'The Maze', 'Macteam home']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q4": {"quesiton": "What do Kitty and her aunt steal a key to?", "answer": "['tower room']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 7, "vectordb_correct": 7, "hybrid_correct": 7, "recall_accuracy": 0.7, "vectordb_accuracy": 0.7, "hybrid_accuracy": 0.7} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-32_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What letter is associated with the first man?", "answer": "['X', 'X.']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "What letter is the woman referred to as?", "answer": "['A', 'A.']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Where did the man, X, claim to have met the woman at?", "answer": "['At this hotel', 'A hotel']", "recall": 2, "vectordb": 0, "hybrid": 2}, "recall_correct": 4, "vectordb_correct": 2, "hybrid_correct": 4, "recall_accuracy": 0.6666666666666666, "vectordb_accuracy": 0.3333333333333333, "hybrid_accuracy": 0.6666666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-333_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "Who plays Stacy Lockwood?", "answer": "['Tori Spelling']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q1": {"quesiton": "what does stacy call angela?", "answer": "['Pathetic']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who is a high school sophomore in the small northern California town?", "answer": "['Angela Delvecchio']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Who writes a letter to the imprisoned Angela?", "answer": "['Jamie']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "Who plays Angela Delvecchio?", "answer": "['Kellie Martin']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 6, "vectordb_correct": 10, "hybrid_correct": 8, "recall_accuracy": 0.6, "vectordb_accuracy": 1.0, "hybrid_accuracy": 0.8} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-343.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "Where did Tommy and Gilbert go AWOL at?", "answer": "['Paris']", "recall": "Tommy and Gilbert went AWOL at the front.", "vectordb": "Tommy and Gilbert went AWOL in Paris.\n", "hybrid": "Tommy and Gilbert went AWOL in Paris.\n"}, "q1": {"question": "Who is Annettes father?", "answer": "['Colonel Marshall']", "recall": "Annette's father is Colonel Marshall.", "vectordb": "Annette's father is Colonel Marshall.\n", "hybrid": "Annette's father is Colonel Marshall."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-343_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "Where did Tommy and Gilbert go AWOL at?", "answer": "['Paris']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Who is Annettes father?", "answer": "['Colonel Marshall']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 2, "vectordb_correct": 4, "hybrid_correct": 4, "recall_accuracy": 0.5, "vectordb_accuracy": 1.0, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-345_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "Who is Harris trying to stop when he is killed?", "answer": "['Holli Would']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "What keeps the Interworld Matrix intact?", "answer": "['Spike of Power']", "recall": 0, "vectordb": 1, "hybrid": 0}, "q2": {"quesiton": "What happens to Harris' mother?", "answer": "['She is killed by a drunk driver']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "What are the animated characters called?", "answer": "['Doodles']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q4": {"quesiton": "Where is Deebs' home?", "answer": "['The real world']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q5": {"quesiton": "Who is Harris in love with?", "answer": "['Lonette']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 2, "vectordb_correct": 7, "hybrid_correct": 4, "recall_accuracy": 0.16666666666666666, "vectordb_accuracy": 0.5833333333333334, "hybrid_accuracy": 0.3333333333333333} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-348_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "What is Mike's relationship to Drey?", "answer": "['Brother']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q1": {"quesiton": "Why is Mike in prison?", "answer": "['Selling drugs for a neighborhood dealing']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "What is Dan's job?", "answer": "['Middle school history teacher at a Brooklyn school']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "What illegal drug is Dan seen smoking?", "answer": "['Cocaine']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q4": {"quesiton": "What sport does Dan coach?", "answer": "['basketball']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q5": {"quesiton": "Who is the most instrumental in helping Dan kick his drug habit?", "answer": "['Drey']", "recall": 2, "vectordb": 1, "hybrid": 1}, "recall_correct": 4, "vectordb_correct": 10, "hybrid_correct": 4, "recall_accuracy": 0.3333333333333333, "vectordb_accuracy": 0.8333333333333334, "hybrid_accuracy": 0.3333333333333333} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-352.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "Who is Miron's wife?", "answer": "['Tanya']", "recall": "Miron's wife is Tanya.", "vectordb": "Miron's wife is Tanya.\n", "hybrid": "Miron's wife is Tanya."}, "q1": {"question": "What animals are brought on the roadtrip?", "answer": "['Bunting Birds']", "recall": "I'm sorry, but as an AI language model, I do not have access to the specific context or information about the road trip you are referring to. Therefore, I cannot provide a direct answer to your question. Could you please provide more details or clarify your query?", "vectordb": "Two Bunting birds are brought on the roadtrip.\n", "hybrid": "I'm sorry, but as an AI language model, I do not have access to the specific context or information about the road trip you are referring to. Therefore, I cannot provide a direct answer to your question. Could you please provide more details or clarify your query?"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-352_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "Who is Miron's wife?", "answer": "['Tanya']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What animals are brought on the roadtrip?", "answer": "['Bunting Birds']", "recall": 0, "vectordb": 1, "hybrid": 0}, "recall_correct": 2, "vectordb_correct": 3, "hybrid_correct": 2, "recall_accuracy": 0.5, "vectordb_accuracy": 0.75, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-353.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 1, "sample_0": {"q0": {"question": "What does Maija DiGiorgio do for a living?", "answer": "['Comic.']", "recall": "Maija DiGiorgio is a stand-up comedian.", "vectordb": "It is unclear what Maija DiGiorgio does for a living based on the given information.\n", "hybrid": "It is unclear what Maija DiGiorgio does for a living based on the given information.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-353_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 1, "graded_questions": 3, "q0": {"quesiton": "What does Maija DiGiorgio do for a living?", "answer": "['Comic.']", "recall": 2, "vectordb": 0, "hybrid": 0}, "recall_correct": 2, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 1.0, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-354_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "Who does Yu Hong fall in love with?", "answer": "['Zhou Wei']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Where does Yo Hong study?", "answer": "['Beiqing University']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "In what country does Yu live?", "answer": "['China']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Who begins to demonstrate, demanding freedom and democracy?", "answer": "['the students in Tiananmen Square']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q4": {"quesiton": "Whose memories are Zhou fleeing?", "answer": "['Yu Hong']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q5": {"quesiton": "Where did Yu Hong leave for the university?", "answer": "['Tumen']", "recall": 0, "vectordb": 2, "hybrid": 0}, "recall_correct": 9, "vectordb_correct": 11, "hybrid_correct": 9, "recall_accuracy": 0.75, "vectordb_accuracy": 0.9166666666666666, "hybrid_accuracy": 0.75} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-359_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "What was Janardan's name changed to?", "answer": "['Jordan']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Who was Heer already arranged to marry?", "answer": "['Jai']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Whose fame did the star chase after?", "answer": "['Jim morrison']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q3": {"quesiton": "Who did Janardan fall in love with?", "answer": "['Heer']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "Where did Jai live?", "answer": "['Italy']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q5": {"quesiton": "Who was the protagonist?", "answer": "['Jordan']", "recall": 0, "vectordb": 1, "hybrid": 1}, "recall_correct": 5, "vectordb_correct": 8, "hybrid_correct": 8, "recall_accuracy": 0.4166666666666667, "vectordb_accuracy": 0.6666666666666666, "hybrid_accuracy": 0.6666666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-364.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "Who do the 2003 turtles defeat ?", "answer": "['88 shredder']", "recall": "The 2003 turtles defeat the '03 Shredder.", "vectordb": "The 2003 turtles defeat Ch'rell, also known as the '03 Shredder.\n", "hybrid": "The 2003 turtles defeat Ch'rell, also known as the '03 Shredder.\n"}, "q1": {"question": "From which series are the Technodrome ?", "answer": "['88']", "recall": "The Technodrome is from the '03 series.", "vectordb": "The Technodrome is from the '88 series.\n", "hybrid": "The Technodrome is from the '88 series.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-364_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "Who do the 2003 turtles defeat ?", "answer": "['88 shredder']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "From which series are the Technodrome ?", "answer": "['88']", "recall": 0, "vectordb": 1, "hybrid": 1}, "recall_correct": 1, "vectordb_correct": 2, "hybrid_correct": 2, "recall_accuracy": 0.25, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-365_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "In what city is the train station where Johnnie enlists the help of the army detachment?", "answer": "['Chattanooga']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "What does Johnie do to the officer?", "answer": "['hides underneath the table']", "recall": 1, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "who stops his locomotive and runs into the forest to hide?", "answer": "['Johnnie']", "recall": 1, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "What is the profession of Johnnie?", "answer": "['train engineer']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q4": {"quesiton": "who try a variety of methods to shake their dogged pursuer?", "answer": "['Cahill']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 3, "vectordb_correct": 4, "hybrid_correct": 4, "recall_accuracy": 0.3, "vectordb_accuracy": 0.4, "hybrid_accuracy": 0.4} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-366.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "What does Janice accidentally do to the baby?", "answer": "['Drowns it']", "recall": "The statement does not provide any information about what Janice accidentally does to the baby.", "vectordb": "Janice accidentally gives birth to a baby girl.\n", "hybrid": "The statement does not provide any information about what Janice accidentally does to the baby."}, "q1": {"question": "Who is now pregnant with Rabbit's child?", "answer": "['Ruth']", "recall": "Janice is now pregnant with Rabbit's child.", "vectordb": "Janice is pregnant with Rabbit's child.\n", "hybrid": "Janice is now pregnant with Rabbit's child."}, "q2": {"question": "What kind of former sports star is Harry?", "answer": "['Basketball']", "recall": "Harry is a former basketball star.", "vectordb": "Harry is a former basketball star.\n", "hybrid": "Harry is a former basketball star.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-366_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What does Janice accidentally do to the baby?", "answer": "['Drowns it']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Who is now pregnant with Rabbit's child?", "answer": "['Ruth']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "What kind of former sports star is Harry?", "answer": "['Basketball']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 2, "vectordb_correct": 2, "hybrid_correct": 2, "recall_accuracy": 0.3333333333333333, "vectordb_accuracy": 0.3333333333333333, "hybrid_accuracy": 0.3333333333333333} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-390_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What did Chuck find in the street?", "answer": "['Wallet']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Where did Chuck Scott return from?", "answer": "['Cuba']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q2": {"quesiton": "What was Chuck Scott?", "answer": "['Driver']", "recall": 1, "vectordb": 1, "hybrid": 1}, "recall_correct": 3, "vectordb_correct": 5, "hybrid_correct": 3, "recall_accuracy": 0.5, "vectordb_accuracy": 0.8333333333333334, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-398_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What do Pia and Eric have in common?", "answer": "['Seat']", "recall": 0, "vectordb": 1, "hybrid": 0}, "q1": {"quesiton": "What is the name of the actress who portrayed Pia?", "answer": "['Angel Locsin']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q2": {"quesiton": "What is Eric's daytime job?", "answer": "[\"School's Janitor\"]", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 2, "vectordb_correct": 5, "hybrid_correct": 2, "recall_accuracy": 0.3333333333333333, "vectordb_accuracy": 0.8333333333333334, "hybrid_accuracy": 0.3333333333333333} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-3_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "Is Anu happy with her company's decision?", "answer": "['no']", "recall": 2, "vectordb": 1, "hybrid": 2}, "q1": {"quesiton": "Who falls in love with Anu?", "answer": "['Chikkumol (Baby Shamili)']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "who does Prakash have to take care of?", "answer": "['Chikkumol (Baby Shamili)']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "How does Anu feel?", "answer": "['Unhappy in the beginning and then happy by the end.']", "recall": 1, "vectordb": 0, "hybrid": 0}, "q4": {"quesiton": "Who is Anu''s child?", "answer": "['Chikkumol (Baby Shamili)']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q5": {"quesiton": "Who plays Anu Pillai", "answer": "['Suchitra Krishnamoorthi']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 6, "vectordb_correct": 6, "hybrid_correct": 7, "recall_accuracy": 0.5, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5833333333333334} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-404_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "What was the team's record during regular season?", "answer": "['undefeated']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q1": {"quesiton": "Do the Oilers win or lose in the playoffs?", "answer": "['lose']", "recall": 2, "vectordb": 0, "hybrid": 2}, "q2": {"quesiton": "What sport does Coach Carter coach?", "answer": "['basketball']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "What team do the Oilers lose to?", "answer": "['St. Francis']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "What is the name of the basketball team?", "answer": "['Oilers']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q5": {"quesiton": "What does Coach Carter initiate on the gym?", "answer": "['Individual contracts']", "recall": 1, "vectordb": 1, "hybrid": 1}, "recall_correct": 7, "vectordb_correct": 9, "hybrid_correct": 7, "recall_accuracy": 0.5833333333333334, "vectordb_accuracy": 0.75, "hybrid_accuracy": 0.5833333333333334} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-406_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "Who is the shuttle commander?", "answer": "['Sharp', 'Jennifer Watts']", "recall": 2, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "What city is devastated by a meteorite?", "answer": "['Paris', 'New York']", "recall": 1, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "What collides with the earth at the opening of movie Armageddon?", "answer": "['asteroid', 'meteor']", "recall": 1, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "What does Harry's crew accidentally hit while drilling?", "answer": "['iron']", "recall": 2, "vectordb": 1, "hybrid": 1}, "recall_correct": 6, "vectordb_correct": 4, "hybrid_correct": 4, "recall_accuracy": 0.75, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-409_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "Where does Wren flee to?", "answer": "['California']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What is Wren trying to pursue?", "answer": "['Career']", "recall": 2, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "What city does Wren flee to?", "answer": "[\"She plans to flee to California, but doesn't.\"]", "recall": 0, "vectordb": 1, "hybrid": 1}, "q3": {"quesiton": "What does Wren paste on subway walls?", "answer": "['Nothing.']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q4": {"quesiton": "How old is Wren?", "answer": "['It is unknown.']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 6, "vectordb_correct": 8, "hybrid_correct": 6, "recall_accuracy": 0.6, "vectordb_accuracy": 0.8, "hybrid_accuracy": 0.6} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-413.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "How old is the heroine in the movie?", "answer": "['17']", "recall": "I'm sorry, but without specific information about the movie or heroine you are referring to, I cannot provide an answer to your question. Could you please provide more details?", "vectordb": "The heroine in the movie is 17 years old.\n", "hybrid": "I'm sorry, but without specific information about the movie or heroine you are referring to, I cannot provide an answer to your question. Could you please provide more details?"}, "q1": {"question": "At what time does the paranormal hour happens?", "answer": "['midnight']", "recall": "The paranormal hour happens at midnight.", "vectordb": "The paranormal hour happens at midnight on David's 21st birthday.\n", "hybrid": "The paranormal hour happens at midnight on David's 21st birthday.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-413_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "How old is the heroine in the movie?", "answer": "['17']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q1": {"quesiton": "At what time does the paranormal hour happens?", "answer": "['midnight']", "recall": 2, "vectordb": 1, "hybrid": 1}, "recall_correct": 2, "vectordb_correct": 3, "hybrid_correct": 1, "recall_accuracy": 0.5, "vectordb_accuracy": 0.75, "hybrid_accuracy": 0.25} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-414.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "Who shoots Johann in the head?", "answer": "['paxton']", "recall": "I'm sorry, but I can't generate a response to that statement.", "vectordb": "Karan shoots Johann in the head.\n", "hybrid": "Karan shoots Johann in the head.\n"}, "q1": {"question": "How many of Paxton's fingers did Johann severe?", "answer": "['2']", "recall": "I'm sorry, but I can't generate a response to that statement as it seems to contain inappropriate or violent content.", "vectordb": "Johann severed all of Paxton's fingers.\n", "hybrid": "Johann severed all of Paxton's fingers.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-414_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "Who shoots Johann in the head?", "answer": "['paxton']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "How many of Paxton's fingers did Johann severe?", "answer": "['2']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-417.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "Who was murdered?", "answer": "['Alice Barlow']", "recall": "I'm sorry, but I can't provide an answer to that question as it requires specific information about a particular incident or case. Could you please provide more context or details?", "vectordb": "Alice Barlow was murdered.\n", "hybrid": "I'm sorry, but I can't provide an answer to that question as it requires specific information about a particular incident or case. Could you please provide more context or details?"}, "q1": {"question": "What is the name of the detective?", "answer": "['B.G. Rough']", "recall": "I'm sorry, but I don't have enough information to answer your question. Could you please provide more context or details about the detective you are referring to?", "vectordb": "The name of the detective is B. G. Rough.\n", "hybrid": "I'm sorry, but I don't have enough information to answer your question. Could you please provide more context or details about the detective you are referring to?"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-417_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "Who was murdered?", "answer": "['Alice Barlow']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q1": {"quesiton": "What is the name of the detective?", "answer": "['B.G. Rough']", "recall": 0, "vectordb": 2, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 4, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 1.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-418_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "What is the name of Robbie's band?", "answer": "['The Stiff Dylans']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What song do the Stiff Dylans perform?", "answer": "['\"Ultraviolet\"']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "What is Georgia's cat's name?", "answer": "['angus']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Where does Georgia decide to move?", "answer": "['New Zealand with her father']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "Where does Georgia apologize to Robbie?", "answer": "['the beach']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q5": {"quesiton": "Who does Georgia bring to the pool?", "answer": "['Her little sister']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 10, "vectordb_correct": 10, "hybrid_correct": 10, "recall_accuracy": 0.8333333333333334, "vectordb_accuracy": 0.8333333333333334, "hybrid_accuracy": 0.8333333333333334} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-422_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "Is this a romance movie?", "answer": "['No']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q1": {"quesiton": "What is the profession of Julian?", "answer": "['No Anwer']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "What is the subject of the movie?", "answer": "['Horror']", "recall": 1, "vectordb": 1, "hybrid": 2}, "q3": {"quesiton": "Who is the director?", "answer": "['Kim Yong-gyun']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q4": {"quesiton": "Who convinces Vicky to dance again?", "answer": "['Lermontov']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 3, "vectordb_correct": 5, "hybrid_correct": 4, "recall_accuracy": 0.3, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.4} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-423_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "Who decides that the best option is to climb the mountains and reach a higher plateau?", "answer": "['Lee']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q1": {"quesiton": "Who does Chuck have a son with?", "answer": "['Charlotte']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "What does Derna drop in the swamp?", "answer": "['the laser gun']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q3": {"quesiton": "Charlotte determines that which plants are particularly poisonous?", "answer": "['the berries']", "recall": 0, "vectordb": 1, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 3, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.375, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-427.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 1, "sample_0": {"q0": {"question": "What is Michael's mission?", "answer": "['Provide a friend for Wolfgang']", "recall": "Michael's mission is to keep Wolfgang captive in his soundproof basement and provide him with basic necessities while pretending to be his friend.", "vectordb": "There is no information provided about Michael's mission.\nSOURCES:", "hybrid": "There is no information provided about Michael's mission.\nSOURCES:"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-427_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 1, "graded_questions": 3, "q0": {"quesiton": "What is Michael's mission?", "answer": "['Provide a friend for Wolfgang']", "recall": 1, "vectordb": 0, "hybrid": 0}, "recall_correct": 1, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 0.5, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-431_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "How is the virus transmitted?", "answer": "['join a political organization opposing bioweapons. His refusal proves the correct response,']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "What is the virus referred to as?", "answer": "['Satan Bug']", "recall": 0, "vectordb": 2, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 2, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-432.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "What does Lane challenge the new captain to?", "answer": "['A ski race down the K-']", "recall": "Lane challenges the new captain to a ski race down the K-12 slope.", "vectordb": "Lane challenges the new captain to a ski race down the K-12.\n", "hybrid": "Lane challenges the new captain to a ski race down the K-12 slope."}, "q1": {"question": "Who does Lane's girlfriend dump him for?", "answer": "['Roy Stalin']", "recall": "Lane's girlfriend, Beth, dumps him for the captain of the ski team, Roy Stalin.", "vectordb": "Lane's girlfriend dumps him for Roy Stalin.\n", "hybrid": "Lane's girlfriend, Beth, dumps him for the captain of the ski team, Roy Stalin."}, "q2": {"question": "Who plays Lane Meyers?", "answer": "['John Cusack']", "recall": "The statement does not provide information about who plays Lane Meyers.", "vectordb": "John Cusack plays Lane Meyers.\n", "hybrid": "John Cusack plays Lane Meyers.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-432_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What does Lane challenge the new captain to?", "answer": "['A ski race down the K-']", "recall": 2, "vectordb": 1, "hybrid": 2}, "q1": {"quesiton": "Who does Lane's girlfriend dump him for?", "answer": "['Roy Stalin']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who plays Lane Meyers?", "answer": "['John Cusack']", "recall": 0, "vectordb": 2, "hybrid": 2}, "recall_correct": 4, "vectordb_correct": 5, "hybrid_correct": 6, "recall_accuracy": 0.6666666666666666, "vectordb_accuracy": 0.8333333333333334, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-433.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "Who is James Wormold's secretary?", "answer": "['Beatrice Severn', 'Beatrice']", "recall": "James Wormold's secretary is Beatrice Severn.", "vectordb": "The secretary's name is Beatrice Severn.\n", "hybrid": "James Wormold's secretary is Beatrice Severn."}, "q1": {"question": "What country does Wormold live in?", "answer": "['Havana', 'Cuba']", "recall": "Wormold lives in Havana, Cuba.", "vectordb": "Wormold lives in Cuba.\n", "hybrid": "Wormold lives in Havana, Cuba."}, "q2": {"question": "What do Wormold's commanders bestow on him?", "answer": "['Honors']", "recall": "Wormold's commanders bestow on him an O.B.E. (Order of the British Empire) and offer him a teaching position in London.", "vectordb": "Wormold's commanders bestow on him an O.B.E. (Order of the British Empire) and offer him a position teaching espionage classes in London.\n", "hybrid": "Wormold's commanders bestow on him an O.B.E. (Order of the British Empire) and offer him a teaching position in London."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-433_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Who is James Wormold's secretary?", "answer": "['Beatrice Severn', 'Beatrice']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What country does Wormold live in?", "answer": "['Havana', 'Cuba']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "What do Wormold's commanders bestow on him?", "answer": "['Honors']", "recall": 1, "vectordb": 1, "hybrid": 1}, "recall_correct": 5, "vectordb_correct": 5, "hybrid_correct": 5, "recall_accuracy": 0.8333333333333334, "vectordb_accuracy": 0.8333333333333334, "hybrid_accuracy": 0.8333333333333334} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-436.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 1, "sample_0": {"q0": {"question": "What is Zach doing while Rex is taking pictures of him?", "answer": "['Watching football']", "recall": "The statement is asking about Zach's activity while Rex is photographing him.", "vectordb": "There is no information provided about what Zach is doing while Rex is taking pictures of him.\n", "hybrid": "There is no information provided about what Zach is doing while Rex is taking pictures of him.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-436_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 1, "graded_questions": 3, "q0": {"quesiton": "What is Zach doing while Rex is taking pictures of him?", "answer": "['Watching football']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-437_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "What does Daniel HIllard convince law enforcement of ?", "answer": "['Their plan to storm the residence is too risky for his family']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "Who will send the package to aid the three fugitives in their escape?", "answer": "[\"Glenn's girlfriend\"]", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "Who is murdered?", "answer": "['Mr. Patterson', 'No one']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q3": {"quesiton": "How many people do Glenn Griffin and the escaped convicts hold hostage?", "answer": "['Four', '2', 'Three']", "recall": 2, "vectordb": 1, "hybrid": 2}, "q4": {"quesiton": "What family is being tormented?", "answer": "[\"The Hilliard's\", 'The Hilliards']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 5, "vectordb_correct": 4, "hybrid_correct": 5, "recall_accuracy": 0.5, "vectordb_accuracy": 0.4, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-443_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "What is Geoffrey Radcliffe convicted for?", "answer": "['The murder of his brother Michael']", "recall": 2, "vectordb": 1, "hybrid": 2}, "q1": {"quesiton": "Who created the invisibility serum?", "answer": "['John Griffin']", "recall": 0, "vectordb": 1, "hybrid": 0}, "q2": {"quesiton": "Who did Radcliffe kidnapped in order to uncover the truth?", "answer": "['Willie Spears']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Who created the invisibility serum that is injected into Radcliffe?", "answer": "['Frank Griffin']", "recall": 1, "vectordb": 2, "hybrid": 1}, "q4": {"quesiton": "Who does Radcliffe kidnap?", "answer": "['Willie Spears']", "recall": 2, "vectordb": 0, "hybrid": 2}, "q5": {"quesiton": "Who is Geoffrey Radcliffe's fiance?", "answer": "['no fiance']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 9, "vectordb_correct": 8, "hybrid_correct": 9, "recall_accuracy": 0.75, "vectordb_accuracy": 0.6666666666666666, "hybrid_accuracy": 0.75} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-445_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "Who does the story revolve around?", "answer": "['the mob']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "The name of the movie is A ____ Tale?", "answer": "['Bronx']", "recall": 1, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "In a lot of ways, what does the movie resemble?", "answer": "['living life next to the mob']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q3": {"quesiton": "Who directed \"A Bronx Tale\"?", "answer": "['Robert Dinero']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 4, "vectordb_correct": 6, "hybrid_correct": 6, "recall_accuracy": 0.5, "vectordb_accuracy": 0.75, "hybrid_accuracy": 0.75} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-452.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "Who gave away Otto's college fund?", "answer": "['His parents']", "recall": "Otto's parents gave away his college fund.", "vectordb": "Otto's parents gave away his college fund to a crooked televangelist.\n", "hybrid": "Otto's parents gave away his college fund to a crooked televangelist.\n"}, "q1": {"question": "What is Bud's job?", "answer": "['Repo man']", "recall": "Bud's job is a repossession agent, or \"repo man\".", "vectordb": "Bud's job is a repossession agent, or \"repo man\".\n", "hybrid": "Bud's job is a repossession agent, or \"repo man\"."}, "q2": {"question": "In what city does Otto live?", "answer": "['Los Angeles']", "recall": "The city in which Otto lives is Los Angeles (LA).", "vectordb": "The city in which Otto lives is not mentioned in the provided information.\nSOURCES:", "hybrid": "The city in which Otto lives is not mentioned in the provided information.\nSOURCES:"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-452_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Who gave away Otto's college fund?", "answer": "['His parents']", "recall": 2, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "What is Bud's job?", "answer": "['Repo man']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "In what city does Otto live?", "answer": "['Los Angeles']", "recall": 2, "vectordb": 0, "hybrid": 0}, "recall_correct": 6, "vectordb_correct": 3, "hybrid_correct": 3, "recall_accuracy": 1.0, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-457_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "In what year does the story occur?", "answer": "['117 AD']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Who is king of the Picts?", "answer": "['Gorlacon']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "Who betrays the Roman legion to Gorlacon?", "answer": "['Etain']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Who kills Etain?", "answer": "['Dias']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "Who do Dias and his followers befriend in the forest?", "answer": "['Arianne']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 6, "vectordb_correct": 8, "hybrid_correct": 8, "recall_accuracy": 0.6, "vectordb_accuracy": 0.8, "hybrid_accuracy": 0.8} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-458_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "who also played a prominent role in the movie?", "answer": "['Jacqueline', 'Dudley']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "what was the name of group of high school pranksters?", "answer": "['the Knights', 'Knights']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "what was Newbomb Turk wearing?", "answer": "['His camera', \"a rented magician's cape\"]", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-459_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "Who does Christiane reach out to?", "answer": "['Jacques Vernon']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "What does the woman pull out of her car?", "answer": "['A corpse']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "What does Louise cover her scar with?", "answer": "['A pearl choker']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Who does the Professor identify the dead woman as?", "answer": "['his missing daughter, Christiane Genessier']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "Who is Louise's employer?", "answer": "['Dr. Genessier']", "recall": 1, "vectordb": 2, "hybrid": 2}, "recall_correct": 8, "vectordb_correct": 9, "hybrid_correct": 9, "recall_accuracy": 0.8, "vectordb_accuracy": 0.9, "hybrid_accuracy": 0.9} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-462_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "What is the name of Tom's daughter?", "answer": "['Gem']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What is Tom's profession?", "answer": "['Former musician']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "What does Tom suffer from?", "answer": "['Multi-farct dementia']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q3": {"quesiton": "What genre is Imaginaerum?", "answer": "['Fantasy']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "How does Gem discover the path she must follow to find her father again?", "answer": "['Notes he left behind']", "recall": 1, "vectordb": 2, "hybrid": 2}, "recall_correct": 5, "vectordb_correct": 9, "hybrid_correct": 7, "recall_accuracy": 0.5, "vectordb_accuracy": 0.9, "hybrid_accuracy": 0.7} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-472.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "Who files a claim against God ?", "answer": "['Steve']", "recall": "Steve files a claim against God.", "vectordb": "Steve files a claim against God.\n", "hybrid": "Steve files a claim against God.\n"}, "q1": {"question": "What is Steve Myers profession ?", "answer": "['lawyer']", "recall": "Steve Myers is an advocate/lawyer.", "vectordb": "Steve Myers is a lawyer.\n", "hybrid": "Steve Myers is an advocate/lawyer."}, "q2": {"question": "What is Steve Myers fed up with ?", "answer": "['corruption within the judicial system']", "recall": "Steve Myers is fed up with the perceived corruption within the judicial system.", "vectordb": "Steve Myers is fed up with the perceived corruption within the judicial system.\n", "hybrid": "Steve Myers is fed up with the perceived corruption within the judicial system."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-472_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Who files a claim against God ?", "answer": "['Steve']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What is Steve Myers profession ?", "answer": "['lawyer']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "What is Steve Myers fed up with ?", "answer": "['corruption within the judicial system']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 6, "vectordb_correct": 6, "hybrid_correct": 6, "recall_accuracy": 1.0, "vectordb_accuracy": 1.0, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-47_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What is the relationship between Fernand Naudin and \"The Mexican\"?", "answer": "['old friends', 'Childhood friend']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "What is \"The Mexican\" the Boss of?", "answer": "['illegal gambling', 'Gangster organisation']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "What does \"The Mexican\" want Fernand to do for him?", "answer": "['take care of his daughter and \"business\"', 'Guardian of his teenage daughter']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 1, "hybrid_correct": 1, "recall_accuracy": 0.0, "vectordb_accuracy": 0.16666666666666666, "hybrid_accuracy": 0.16666666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-480.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "where did the entire film happen", "answer": "['in a corporate meeting room']", "recall": "The statement is asking about the location where the entire film took place.", "vectordb": "The entire film takes place in Madrid, Spain.\n", "hybrid": "The entire film takes place in Madrid, Spain.\n"}, "q1": {"question": "Where are the Dekia offices located?", "answer": "['In Madrid']", "recall": "I'm sorry, but I don't have access to personal data about individuals or specific companies unless it has been shared with me in the course of our conversation.", "vectordb": "The Dekia offices are located in Madrid, Spain.\n", "hybrid": "The Dekia offices are located in Madrid, Spain.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-480_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "where did the entire film happen", "answer": "['in a corporate meeting room']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Where are the Dekia offices located?", "answer": "['In Madrid']", "recall": 0, "vectordb": 2, "hybrid": 2}, "recall_correct": 0, "vectordb_correct": 2, "hybrid_correct": 2, "recall_accuracy": 0.0, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-482_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "What is Ra's greatest adversary?", "answer": "[\"Ra's greatest adversary in his quest is the Overseer\", 'Jimmy', 'Overseer']", "recall": 2, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "What planet is destroyed?", "answer": "['Earth', 'Earth is destroyed in its wake.']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who is The Overseer's minion?", "answer": "['two women', 'Jimmy Fey']", "recall": 1, "vectordb": 0, "hybrid": 1}, "q3": {"quesiton": "What government agents are presumably pursuing Ra?", "answer": "['aerospace technicians from NASA']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q4": {"quesiton": "What is the name of Sun Ra's crew?", "answer": "['the Arkestra']", "recall": 1, "vectordb": 2, "hybrid": 1}, "recall_correct": 7, "vectordb_correct": 5, "hybrid_correct": 5, "recall_accuracy": 0.7, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-488.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "Where does David take Jenny as a birthday gift?", "answer": "['Paris']", "recall": "David takes Jenny to Paris as a birthday gift.", "vectordb": "David takes Jenny to Paris as a birthday gift.\n", "hybrid": "David takes Jenny to Paris as a birthday gift.\n"}, "q1": {"question": "What does David Goldman drive?", "answer": "['Bristol']", "recall": "The statements do not provide any information about what David Goldman drives.", "vectordb": "There is no information provided about what David Goldman drives.\nSOURCES:", "hybrid": "The statements do not provide any information about what David Goldman drives."}, "q2": {"question": "How old is Jenny Mellor?", "answer": "['17']", "recall": "Jenny Mellor is 17 years old.", "vectordb": "Jenny Mellor is 17 years old.\n", "hybrid": "Jenny Mellor is 17 years old."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-488_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Where does David take Jenny as a birthday gift?", "answer": "['Paris']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What does David Goldman drive?", "answer": "['Bristol']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "How old is Jenny Mellor?", "answer": "['17']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 4, "vectordb_correct": 4, "hybrid_correct": 4, "recall_accuracy": 0.6666666666666666, "vectordb_accuracy": 0.6666666666666666, "hybrid_accuracy": 0.6666666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-492.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 1, "sample_0": {"q0": {"question": "Who is not plagued by the mysterious nightmares?", "answer": "['The narrator']", "recall": "There is no definitive answer to this statement as it is impossible to determine who is not plagued by mysterious nightmares. Nightmares can affect individuals differently, and some people may not experience them at all.", "vectordb": "It is not specified who is not plagued by the mysterious nightmares.\n", "hybrid": "There is no definitive answer to this statement as it is impossible to determine who is not plagued by mysterious nightmares. Nightmares can affect individuals differently, and some people may not experience them at all."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-492_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 1, "graded_questions": 3, "q0": {"quesiton": "Who is not plagued by the mysterious nightmares?", "answer": "['The narrator']", "recall": 2, "vectordb": 0, "hybrid": 0}, "recall_correct": 2, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 1.0, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-494.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "WHo is Goldmoon's companion?", "answer": "['Riverwind']", "recall": "Goldmoon's companion is Riverwind.", "vectordb": "Riverwind is Goldmoon's companion.\n", "hybrid": "Goldmoon's companion is Riverwind."}, "q1": {"question": "What is the first name of the dwarf?", "answer": "['Flint Fireforge']", "recall": "The statement is incomplete and does not provide enough information to determine the first name of the dwarf.", "vectordb": "The first name of the dwarf is Bupu.\n", "hybrid": "The statement is incomplete and does not provide enough information to determine the first name of the dwarf."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-494_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "WHo is Goldmoon's companion?", "answer": "['Riverwind']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What is the first name of the dwarf?", "answer": "['Flint Fireforge']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 2, "vectordb_correct": 2, "hybrid_correct": 2, "recall_accuracy": 0.5, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-498_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "How is Sonny related to Jewel?", "answer": "[\"Jewel is Sonny's mother\"]", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "who is forced to return to working ?", "answer": "['Sonny']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q2": {"quesiton": "Who does Carol accept a marriage proposal?", "answer": "['Troy', 'Sonny']", "recall": 2, "vectordb": 1, "hybrid": 2}, "q3": {"quesiton": "Who does Carol hope to go away with?", "answer": "['Sonny']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "Who is Sonny staying with?", "answer": "['Jewel']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q5": {"quesiton": "what is the name of person carol in love?", "answer": "['Sonny']", "recall": 2, "vectordb": 0, "hybrid": 0}, "recall_correct": 9, "vectordb_correct": 8, "hybrid_correct": 7, "recall_accuracy": 0.75, "vectordb_accuracy": 0.6666666666666666, "hybrid_accuracy": 0.5833333333333334} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-500.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "What did Fiske steal?", "answer": "['300,000 in payroll']", "recall": "Fiske stole the payroll.", "vectordb": "Fiske stole the payroll.\n", "hybrid": "Fiske stole the payroll."}, "q1": {"question": "Where did Captain Blake confront Duke Halliday?", "answer": "['an isolated house in the desert']", "recall": "Captain Blake confronted Duke Halliday at his office.", "vectordb": "Captain Blake confronted Duke Halliday at an isolated house in the desert.\n", "hybrid": "Captain Blake confronted Duke Halliday at an isolated house in the desert.\n"}, "q2": {"question": "What is the name of the man being searched by Halliday and Joan?", "answer": "['Fiske']", "recall": "The man being searched by Halliday and Joan is Fiske.", "vectordb": "The name of the man being searched by Halliday and Joan is Jim Fiske.\n", "hybrid": "The man being searched by Halliday and Joan is Fiske."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-500_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What did Fiske steal?", "answer": "['300,000 in payroll']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Where did Captain Blake confront Duke Halliday?", "answer": "['an isolated house in the desert']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "What is the name of the man being searched by Halliday and Joan?", "answer": "['Fiske']", "recall": 2, "vectordb": 1, "hybrid": 2}, "recall_correct": 4, "vectordb_correct": 5, "hybrid_correct": 6, "recall_accuracy": 0.6666666666666666, "vectordb_accuracy": 0.8333333333333334, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-504_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "Who is Bugs Bunny trying to flee from?", "answer": "['cartoons']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "What does Elmer manage to concoct and successfully test?", "answer": "['toxic berry']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "What is the Saber-tooth rabbit munching on?", "answer": "['albeit covered by a rock']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q3": {"quesiton": "What is the name of the hunter in the film Bugs has found?", "answer": "['Elmer Fuddstone']", "recall": 2, "vectordb": 1, "hybrid": 1}, "q4": {"quesiton": "What does Bugs find inside the horn?", "answer": "['giant powder']", "recall": 0, "vectordb": 1, "hybrid": 0}, "q5": {"quesiton": "Who does Elmer shoot?", "answer": "['Bugs']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 3, "vectordb_correct": 3, "hybrid_correct": 2, "recall_accuracy": 0.25, "vectordb_accuracy": 0.25, "hybrid_accuracy": 0.16666666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-505_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 1, "graded_questions": 3, "q0": {"quesiton": "What nationality are the triads?", "answer": "['Chinese']", "recall": 1, "vectordb": 0, "hybrid": 1}, "recall_correct": 1, "vectordb_correct": 0, "hybrid_correct": 1, "recall_accuracy": 0.5, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-509.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "Who summons Marilyn Gregor to the police station?", "answer": "['Dr. Gregor']", "recall": "Inspector Johns summons Marilyn Gregor to the police station.", "vectordb": "Inspector Johns and Lt. Bob Lawrence summon Marilyn Gregor to the police station.\n", "hybrid": "Inspector Johns summons Marilyn Gregor to the police station."}, "q1": {"question": "Who is Don's criminal friend?", "answer": "['Vic Brady', 'brady']", "recall": "Don's criminal friend is Brady.", "vectordb": "The document does not mention the name of Don's criminal friend.\nSOURCES:", "hybrid": "The document does not mention the name of Don's criminal friend.\nSOURCES:"}, "q2": {"question": "What is Marilyne Gregor's brother's name?", "answer": "['Don Gregor']", "recall": "Marilyne Gregor's brother's name is Don Gregor.", "vectordb": "The name of Marilyne Gregor's brother is Don Gregor.\n", "hybrid": "Marilyne Gregor's brother's name is Don Gregor."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-509_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Who summons Marilyn Gregor to the police station?", "answer": "['Dr. Gregor']", "recall": 1, "vectordb": 0, "hybrid": 1}, "q1": {"quesiton": "Who is Don's criminal friend?", "answer": "['Vic Brady', 'brady']", "recall": 2, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "What is Marilyne Gregor's brother's name?", "answer": "['Don Gregor']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 5, "vectordb_correct": 2, "hybrid_correct": 3, "recall_accuracy": 0.8333333333333334, "vectordb_accuracy": 0.3333333333333333, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-511_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "Who does Veronica befriend ?", "answer": "['Flavia']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Who does Veronica live with?", "answer": "['invalid grandmother and her nanny']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "What type of family does Flavia come from ?", "answer": "['a wealthy family']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q3": {"quesiton": "What does Veronica demand from Flavia ?", "answer": "['surrender her beloved pet dog']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q4": {"quesiton": "What does Veronica plan to make for the fairies ?", "answer": "['to make a poison']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 1, "vectordb_correct": 1, "hybrid_correct": 1, "recall_accuracy": 0.1, "vectordb_accuracy": 0.1, "hybrid_accuracy": 0.1} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-513_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "what is Skip Cuddy hired to do?", "answer": "['Work for Lydia at her mansion.']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What does skip stumble across?", "answer": "['Skip finds the baby left in the garage.']", "recall": 0, "vectordb": 2, "hybrid": 0}, "recall_correct": 2, "vectordb_correct": 4, "hybrid_correct": 2, "recall_accuracy": 0.5, "vectordb_accuracy": 1.0, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-519_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "Who is mccaleb's physician?", "answer": "[\"Dr. Fox (Angelica Huston) was McCaleb's physician.\"]", "recall": 0, "vectordb": 2, "hybrid": 0}, "q1": {"quesiton": "Who has buddy kidnapped?", "answer": "['Graciella and her nephew']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "Who is McCaleb's neighbor?", "answer": "[\"McCaleb's neighbor is not mentioned in this plot.\"]", "recall": 0, "vectordb": 0, "hybrid": 0}, "q3": {"quesiton": "What was the name of Gloria's sister?", "answer": "['Gloria was not mentioned in this plot.']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 2, "vectordb_correct": 5, "hybrid_correct": 3, "recall_accuracy": 0.25, "vectordb_accuracy": 0.625, "hybrid_accuracy": 0.375} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-521_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "Who takes away Riff's ticket?", "answer": "['Principal Togar']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "Why does Vince Lombardi High School keep losing principals ?", "answer": "['The students keep driving them crazy, causing them to go to the mental hospital.', 'to nervous breakdowns']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who are made honorary students?", "answer": "['Ramones', 'Riff']", "recall": 2, "vectordb": 0, "hybrid": 0}, "q3": {"quesiton": "What year is the movie set in ?", "answer": "[\"Sometime in the 2000's. Post 1960, but in a time with noise-cancelling headphones.\", '1980']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 5, "vectordb_correct": 3, "hybrid_correct": 3, "recall_accuracy": 0.625, "vectordb_accuracy": 0.375, "hybrid_accuracy": 0.375} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-523_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "What city does Stevie get a job in?", "answer": "['London']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "What type of site does Stevie work at?", "answer": "['building site']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who's handbag does Stevie find?", "answer": "['Susan']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q3": {"quesiton": "the music for the film is scored by whom?", "answer": "['Stewart Copeland']", "recall": 2, "vectordb": 0, "hybrid": 2}, "q4": {"quesiton": "who is the owner of the handbag that stevie finds?", "answer": "['Susan']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q5": {"quesiton": "What is Susan's job", "answer": "['Irish actress and singer']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 6, "vectordb_correct": 6, "hybrid_correct": 6, "recall_accuracy": 0.5, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-525_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "What planet was blocked during the solar eclipse?", "answer": "['Saturn']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Who is Debbie's sister?", "answer": "['Beverly']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who does Debbie blame the scrapbook on?", "answer": "['Curtis']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Who does Joyce find a note from?", "answer": "['Timmy']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "Who finds the Ms. Davis's body?", "answer": "['Joyce']", "recall": 2, "vectordb": 0, "hybrid": 0}, "q5": {"quesiton": "Who kills Ms. Davis?", "answer": "['Curtis']", "recall": 0, "vectordb": 2, "hybrid": 0}, "recall_correct": 8, "vectordb_correct": 8, "hybrid_correct": 6, "recall_accuracy": 0.6666666666666666, "vectordb_accuracy": 0.6666666666666666, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-526.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 1, "sample_0": {"q0": {"question": "how many children does Betty Ann Waters have?", "answer": "['2']", "recall": "The statements do not provide information about the number of children Betty Ann Waters has.", "vectordb": "Betty Ann Waters has two sons.\n", "hybrid": "The statements do not provide information about the number of children Betty Ann Waters has."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-526_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 1, "graded_questions": 3, "q0": {"quesiton": "how many children does Betty Ann Waters have?", "answer": "['2']", "recall": 0, "vectordb": 2, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 2, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 1.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-529.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "In what state did Dale S. Rogers live?", "answer": "['Texas']", "recall": "Dale S. Rogers lived in Texas.", "vectordb": "Dale S. Rogers lived in Sublime, Texas.\n", "hybrid": "Dale S. Rogers lived in Sublime, Texas.\n"}, "q1": {"question": "What type of job did Dale S. Rogers ave and lose?", "answer": "['welding']", "recall": "Dale S. Rogers was a welder and he unexpectedly lost his job as a welder.", "vectordb": "Dale S. Rogers had a welding job and lost it.\n", "hybrid": "Dale S. Rogers was a welder and he unexpectedly lost his job as a welder."}, "q2": {"question": "What apparatus does Dale's wife Jean use?", "answer": "['wheelchair']", "recall": "Dale's wife Jean is severely invalid and uses a wheelchair.", "vectordb": "Dale's wife Jean is wheelchair-bound.\n", "hybrid": "Dale's wife Jean is severely invalid and uses a wheelchair."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-529_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "In what state did Dale S. Rogers live?", "answer": "['Texas']", "recall": 2, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "What type of job did Dale S. Rogers ave and lose?", "answer": "['welding']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "What apparatus does Dale's wife Jean use?", "answer": "['wheelchair']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 6, "vectordb_correct": 5, "hybrid_correct": 5, "recall_accuracy": 1.0, "vectordb_accuracy": 0.8333333333333334, "hybrid_accuracy": 0.8333333333333334} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-530_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "What does Ed Okin encounter at the airport ?", "answer": "['A jewel smuggler named Diana']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "Who plays Ed Okin ?", "answer": "['Jeff Goldblum']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who's wife is cheating on him ?", "answer": "[\"Ed Okin's wife\"]", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 3, "vectordb_correct": 3, "hybrid_correct": 3, "recall_accuracy": 0.5, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-532_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "Who said killing the newborn would not be good for the kingdom?", "answer": "['Chinnavadayaan']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "How is Sagilimayan related to the king' wife?", "answer": "['Siblings']", "recall": 2, "vectordb": 0, "hybrid": 2}, "recall_correct": 4, "vectordb_correct": 2, "hybrid_correct": 4, "recall_accuracy": 1.0, "vectordb_accuracy": 0.5, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-535_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "Who is Danny's first love?", "answer": "['Carter']", "recall": 2, "vectordb": 0, "hybrid": 2}, "q1": {"quesiton": "What triggers Danny's memories of his first love?", "answer": "['History of seizures and the painful relationship he has with his unsympathetic, recovering alcoholic father']", "recall": 0, "vectordb": 1, "hybrid": 0}, "q2": {"quesiton": "Who is the young artist?", "answer": "['Tye Olson']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q3": {"quesiton": "Who is Danny's best friend?", "answer": "['Carter']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "What was Danny's profession?", "answer": "['Artist']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q5": {"quesiton": "Who was Danny's first love?", "answer": "['Carter']", "recall": 2, "vectordb": 0, "hybrid": 2}, "recall_correct": 8, "vectordb_correct": 5, "hybrid_correct": 8, "recall_accuracy": 0.6666666666666666, "vectordb_accuracy": 0.4166666666666667, "hybrid_accuracy": 0.6666666666666666} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-540.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 1, "sample_0": {"q0": {"question": "Who is promoted to Sergeant?", "answer": "['Danny']", "recall": "The specific individuals who are promoted to the rank of Sergeant can vary depending on the organization or context in question. In general, a Sergeant is a non-commissioned officer (NCO) rank in the military or law enforcement agencies. The promotion to Sergeant is typically based on a combination of factors such as experience, performance, leadership abilities, and meeting specific criteria set by the organization.", "vectordb": "Danny is promoted to Sergeant.\n", "hybrid": "Danny is promoted to Sergeant.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-540_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 1, "graded_questions": 3, "q0": {"quesiton": "Who is promoted to Sergeant?", "answer": "['Danny']", "recall": 0, "vectordb": 2, "hybrid": 2}, "recall_correct": 0, "vectordb_correct": 2, "hybrid_correct": 2, "recall_accuracy": 0.0, "vectordb_accuracy": 1.0, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-541.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "Who stops the ghost from its killing spree ?", "answer": "['Laura']", "recall": "The ghost is stopped by a team of paranormal investigators who use their knowledge and expertise to find a way to banish or neutralize the ghost, putting an end to its killing spree.", "vectordb": "The ghost is stopped by Zane.\n", "hybrid": "The ghost is stopped by a team of paranormal investigators who use their knowledge and expertise to find a way to banish or neutralize the ghost, putting an end to its killing spree."}, "q1": {"question": "How are Rene and Zane related ?", "answer": "['Cousins']", "recall": "The statements do not provide any information about the relationship between Rene and Zane.", "vectordb": "Rene and Zane are cousins.\n", "hybrid": "The statements do not provide any information about the relationship between Rene and Zane."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-541_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "Who stops the ghost from its killing spree ?", "answer": "['Laura']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "How are Rene and Zane related ?", "answer": "['Cousins']", "recall": 0, "vectordb": 2, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 2, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-547_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "Who hits Barry on his face?", "answer": "['Wesley']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "What is Wesley shown?", "answer": "[\"Cross' apartment\"]", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "What does Wesley discovery when he wakes up one morning?", "answer": "['He discovers that his bank account now contains several million dollars']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q3": {"quesiton": "Who is the group's leader?", "answer": "['Sloan']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "Who awakens in the headquarters of the Fraternity?", "answer": "['Wesley']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q5": {"quesiton": "Does Sloan escape?", "answer": "['Wesley']", "recall": 0, "vectordb": 1, "hybrid": 1}, "recall_correct": 4, "vectordb_correct": 5, "hybrid_correct": 5, "recall_accuracy": 0.3333333333333333, "vectordb_accuracy": 0.4166666666666667, "hybrid_accuracy": 0.4166666666666667} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-548_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "What does David teach?", "answer": "['Romantic poets']", "recall": 1, "vectordb": 2, "hybrid": 1}, "q1": {"quesiton": "Where does Lucy own a farm?", "answer": "['The bush']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who is Lucy's farm manager?", "answer": "['Petrus']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Where is David a professor?", "answer": "['South Africa']", "recall": 2, "vectordb": 0, "hybrid": 0}, "q4": {"quesiton": "How many men attack David and Lucy?", "answer": "['3']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 9, "vectordb_correct": 8, "hybrid_correct": 7, "recall_accuracy": 0.9, "vectordb_accuracy": 0.8, "hybrid_accuracy": 0.7} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-552_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "Did society encourage love and freedom beteen the characters?", "answer": "['No']", "recall": 2, "vectordb": 0, "hybrid": 2}, "q1": {"quesiton": "What hppaned toChandni and Amit?", "answer": "['They dont get married to each other']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "Did their love last a long time?", "answer": "['Yes.']", "recall": 1, "vectordb": 2, "hybrid": 1}, "q3": {"quesiton": "What were the names of the three human beings bound by love?", "answer": "['Shashi Kapoor, Amitabh Bachchan, Jaya Bachchan']", "recall": 0, "vectordb": 1, "hybrid": 1}, "recall_correct": 3, "vectordb_correct": 3, "hybrid_correct": 4, "recall_accuracy": 0.375, "vectordb_accuracy": 0.375, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-553.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "Who attempts to take over the factory?", "answer": "['Ravidge']", "recall": "The statement does not provide enough information to determine who attempts to take over the factory.", "vectordb": "The goons attempt to take over the factory.\n", "hybrid": "The goons attempt to take over the factory.\n"}, "q1": {"question": "What war is Alan a veteran of?", "answer": "['Korean War']", "recall": "The statement does not provide any information about Alan's war veteran status.", "vectordb": "Alan is a veteran of an unknown war.\n", "hybrid": "The statement does not provide any information about Alan's war veteran status."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-553_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "Who attempts to take over the factory?", "answer": "['Ravidge']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "What war is Alan a veteran of?", "answer": "['Korean War']", "recall": 0, "vectordb": 1, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 2, "hybrid_correct": 1, "recall_accuracy": 0.0, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.25} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-556_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "who play a game with suction cup dart toy guns?", "answer": "['a group of students', 'A group of college students']", "recall": 0, "vectordb": 0, "hybrid": 1}, "q1": {"quesiton": "what is the name of the student?", "answer": "['Alex, Susan', 'Loren Gersh']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "Who does Gersh transform?", "answer": "['He transforms himself from a avid player to a cold blooded killer', 'James Bond']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q3": {"quesiton": "dart gun hits whom?", "answer": "['Gersh', \"Duane's hits Loren\", 'victims']", "recall": 0, "vectordb": 1, "hybrid": 0}, "q4": {"quesiton": "Gersh kills whom?", "answer": "['victims', 'Duane Swanson', 'Nancy']", "recall": 1, "vectordb": 0, "hybrid": 0}, "recall_correct": 1, "vectordb_correct": 2, "hybrid_correct": 2, "recall_accuracy": 0.1, "vectordb_accuracy": 0.2, "hybrid_accuracy": 0.2} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-55_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "Davey is captured on his way to rescue who?", "answer": "['Gloria']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Where does the train ticket Davey buys go to?", "answer": "['Seattle']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who does Davey see attacking Gloria?", "answer": "['Rapallos men']", "recall": 2, "vectordb": 1, "hybrid": 2}, "q3": {"quesiton": "Who kidnaps Gloria?", "answer": "['Rapallo']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "Who does Davey gaze upon ?", "answer": "['Gloria']", "recall": 0, "vectordb": 2, "hybrid": 0}, "recall_correct": 6, "vectordb_correct": 9, "hybrid_correct": 8, "recall_accuracy": 0.6, "vectordb_accuracy": 0.9, "hybrid_accuracy": 0.8} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-563.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "Who works with Ismael?", "answer": "['Alice']", "recall": "The statement is a question and it is asking about the people who work with Ismael.", "vectordb": "Ismael works with Alice.\n", "hybrid": "The statement is a question and it is asking about the people who work with Ismael."}, "q1": {"question": "Who does Ismael live with?", "answer": "['He lives with Julie until she dies.']", "recall": "The statement is a question and it is asking about the person or people that Ismael lives with.", "vectordb": "Ismael lives with Jeanne.\n", "hybrid": "Ismael lives with Jeanne.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-563_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "Who works with Ismael?", "answer": "['Alice']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q1": {"quesiton": "Who does Ismael live with?", "answer": "['He lives with Julie until she dies.']", "recall": 0, "vectordb": 1, "hybrid": 1}, "recall_correct": 0, "vectordb_correct": 3, "hybrid_correct": 1, "recall_accuracy": 0.0, "vectordb_accuracy": 0.75, "hybrid_accuracy": 0.25} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-567.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 1, "sample_0": {"q0": {"question": "Where did the live for decades?", "answer": "['New York City']", "recall": "This statement is incomplete and does not provide enough information to determine where someone lived for decades.", "vectordb": "The Beale women lived at their Long Island summer home/estate Grey Gardens for decades.\n", "hybrid": "The Beale women lived at their Long Island summer home/estate Grey Gardens for decades.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-567_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 1, "graded_questions": 3, "q0": {"quesiton": "Where did the live for decades?", "answer": "['New York City']", "recall": 0, "vectordb": 1, "hybrid": 1}, "recall_correct": 0, "vectordb_correct": 1, "hybrid_correct": 1, "recall_accuracy": 0.0, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-568_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "Who gives Poe a letter from the killer?", "answer": "['The newspaper editor']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "To whom Ivan Reynolds offer the drink?", "answer": "['Poe']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "Where is Ivan when he gets off of the train?", "answer": "['Paris, France']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q3": {"quesiton": "Where does he go to die?", "answer": "['a park bench']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q4": {"quesiton": "Who is real killer?", "answer": "['Ivan']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 2, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.2, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-569.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 1, "sample_0": {"q0": {"question": "Where does the plane crash?", "answer": "['The waters around the Bahamas']", "recall": "The plane crashes underwater.", "vectordb": "The plane crash occurs in the waters around the Bahamas.\n", "hybrid": "The plane crash occurs in the waters around the Bahamas.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-569_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 1, "graded_questions": 3, "q0": {"quesiton": "Where does the plane crash?", "answer": "['The waters around the Bahamas']", "recall": 1, "vectordb": 2, "hybrid": 2}, "recall_correct": 1, "vectordb_correct": 2, "hybrid_correct": 2, "recall_accuracy": 0.5, "vectordb_accuracy": 1.0, "hybrid_accuracy": 1.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-58_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "Undercover who spent the last seven years?", "answer": "['Nick', 'Special Agent Nick']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "what was the ailing kwan makes?", "answer": "['Nick his prot\u00e9g\u00e9']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q2": {"quesiton": "What is Officer Nick's issue?", "answer": "['power and money corrupt him', 'sway before money and power']", "recall": 2, "vectordb": 1, "hybrid": 1}, "q3": {"quesiton": "What drug is the addict hooked on?", "answer": "['heroin']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 2, "vectordb_correct": 1, "hybrid_correct": 1, "recall_accuracy": 0.25, "vectordb_accuracy": 0.125, "hybrid_accuracy": 0.125} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-60.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "What is a twitch?", "answer": "['Witch']", "recall": "A twitch is a sudden, involuntary muscle contraction or spasm. It is typically brief and can occur in any muscle group in the body. Twitches can be caused by various factors such as stress, fatigue, caffeine, or certain medical conditions.", "vectordb": "I don't know what a twitch is based on the given information.\n", "hybrid": "A twitch is a sudden, involuntary muscle contraction or spasm. It is typically brief and can occur in any muscle group in the body. Twitches can be caused by various factors such as stress, fatigue, caffeine, or certain medical conditions."}, "q1": {"question": "Which twin character wants a more \"normal\" life involving college?", "answer": "['Alex']", "recall": "It is not specified which twin character wants a more \"normal\" life involving college.", "vectordb": "Camryn is the twin character who wants a more \"normal\" life involving college.\n", "hybrid": "Camryn is the twin character who wants a more \"normal\" life involving college.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-60_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "What is a twitch?", "answer": "['Witch']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Which twin character wants a more \"normal\" life involving college?", "answer": "['Alex']", "recall": 0, "vectordb": 0, "hybrid": 0}, "recall_correct": 0, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 0.0, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-61_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "What was George's symptom of impending explosion?", "answer": "['Dreams']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q1": {"quesiton": "Where did George work?", "answer": "['Jeffers Corporation']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Where does George work?", "answer": "['Jeffers Corporation']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "What time of film is Visioneers?", "answer": "['Science Fiction']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q4": {"quesiton": "What did George become in one of his dreams?", "answer": "['is the first president of the United States']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 8, "vectordb_correct": 8, "hybrid_correct": 8, "recall_accuracy": 0.8, "vectordb_accuracy": 0.8, "hybrid_accuracy": 0.8} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-62_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Where does the story take place?", "answer": "['Ukraine']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q1": {"quesiton": "What is Marian's medical condition?", "answer": "['brain tumour']", "recall": 2, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "Where are Henry and Igor?", "answer": "['Ukraine']", "recall": 1, "vectordb": 2, "hybrid": 2}, "recall_correct": 3, "vectordb_correct": 5, "hybrid_correct": 3, "recall_accuracy": 0.5, "vectordb_accuracy": 0.8333333333333334, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-67_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "Who is a mob lawyer?", "answer": "['Charlie Arglist']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Who has stolen a duffel bag full of money?", "answer": "['Charlie', 'Charlie Arglist & Vic Cavanaugh']", "recall": 1, "vectordb": 1, "hybrid": 1}, "recall_correct": 1, "vectordb_correct": 1, "hybrid_correct": 1, "recall_accuracy": 0.25, "vectordb_accuracy": 0.25, "hybrid_accuracy": 0.25} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-74_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 4, "graded_questions": 12, "q0": {"quesiton": "Where does Makoto's aunt work?", "answer": "['Tokyo National Museum']", "recall": 2, "vectordb": 1, "hybrid": 2}, "q1": {"quesiton": "What does Makoto fall upon ?", "answer": "['a walnut-shaped object', 'Guard rail']", "recall": 0, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "What does Kazuko explain to Makoto ?", "answer": "['that she has the power to \"time-leap\", to literally leap through time']", "recall": 2, "vectordb": 1, "hybrid": 1}, "q3": {"quesiton": "What is Makoto hit by ?", "answer": "['train', 'Guard rail']", "recall": 2, "vectordb": 0, "hybrid": 0}, "recall_correct": 6, "vectordb_correct": 3, "hybrid_correct": 4, "recall_accuracy": 0.75, "vectordb_accuracy": 0.375, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-82_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 5, "graded_questions": 15, "q0": {"quesiton": "What year is Thoms born?", "answer": "['1986']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "What year did Julieta die?", "answer": "['2008']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q2": {"quesiton": "Where does Julieta work?", "answer": "['working in a hospital emergency department.']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q3": {"quesiton": "Who is Thom's brother?", "answer": "['Francisco']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "Who is Thoms's brother?", "answer": "['Francisco']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 5, "vectordb_correct": 7, "hybrid_correct": 5, "recall_accuracy": 0.5, "vectordb_accuracy": 0.7, "hybrid_accuracy": 0.5} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-83_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 6, "graded_questions": 18, "q0": {"quesiton": "Where did Akari move?", "answer": "['Tochigi']", "recall": 2, "vectordb": 1, "hybrid": 2}, "q1": {"quesiton": "What is Takaki's job in the year 2008?", "answer": "['computer programmer']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q2": {"quesiton": "Who has feelings for Takaki?", "answer": "['Kanae Sumida']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q3": {"quesiton": "Who was Takaki Tono's close friend?", "answer": "['Akari']", "recall": 0, "vectordb": 2, "hybrid": 2}, "q4": {"quesiton": "In 2008, who is preparing to get married?", "answer": "['Akari']", "recall": 0, "vectordb": 2, "hybrid": 0}, "q5": {"quesiton": "Where did Takaki attend junior high?", "answer": "['Tanegashima']", "recall": 0, "vectordb": 2, "hybrid": 2}, "recall_correct": 6, "vectordb_correct": 11, "hybrid_correct": 10, "recall_accuracy": 0.5, "vectordb_accuracy": 0.9166666666666666, "hybrid_accuracy": 0.8333333333333334} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-85.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 3, "sample_0": {"q0": {"question": "Where does the movie take place?", "answer": "['Lands end', 'gulls island', 'an isolated island']", "recall": "The movie takes place in a fishing town called Land's End.", "vectordb": "The movie takes place on an isolated island.\n", "hybrid": "The movie takes place on an isolated island.\n"}, "q1": {"question": "Where has a similar attack of birds been seen 30 years before?", "answer": "[\"Helen's nephew\", 'a fishing town']", "recall": "A similar attack of birds has been seen on the island 30 years before.", "vectordb": "A similar attack of birds was seen 30 years before in the seaside.\n", "hybrid": "A similar attack of birds was seen 30 years before in the seaside.\n"}, "q2": {"question": "How many daughters do the Hocken's have", "answer": "['two daughters Jill and Joanna', 'two', 'Three girls mentioned']", "recall": "The Hocken's have two daughters.", "vectordb": "The document does not mention the number of daughters the Hocken's have.\nSOURCES:", "hybrid": "The Hocken's have two daughters."}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-85_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Where does the movie take place?", "answer": "['Lands end', 'gulls island', 'an isolated island']", "recall": 2, "vectordb": 2, "hybrid": 2}, "q1": {"quesiton": "Where has a similar attack of birds been seen 30 years before?", "answer": "[\"Helen's nephew\", 'a fishing town']", "recall": 1, "vectordb": 1, "hybrid": 1}, "q2": {"quesiton": "How many daughters do the Hocken's have", "answer": "['two daughters Jill and Joanna', 'two', 'Three girls mentioned']", "recall": 2, "vectordb": 0, "hybrid": 2}, "recall_correct": 5, "vectordb_correct": 3, "hybrid_correct": 5, "recall_accuracy": 0.8333333333333334, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.8333333333333334} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-92.json: -------------------------------------------------------------------------------- 1 | {"question_samples": 1, "num_questions": 2, "sample_0": {"q0": {"question": "What city are Zack, Jack and Bob arrested in?", "answer": "['Italy']", "recall": "The statements do not provide information about the city where Zack, Jack, and Bob are arrested.", "vectordb": "The city where Zack, Jack, and Bob are arrested is not mentioned in the provided content.\nSOURCES:", "hybrid": "The statements do not provide information about the city where Zack, Jack, and Bob are arrested."}, "q1": {"question": "Who escapes the prison with Zack and Jack?", "answer": "['Roberto']", "recall": "Roberto escapes the prison with Zack and Jack.", "vectordb": "The information provided does not specify who escapes the prison with Zack and Jack.\n", "hybrid": "The information provided does not specify who escapes the prison with Zack and Jack.\n"}}} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-92_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 2, "graded_questions": 6, "q0": {"quesiton": "What city are Zack, Jack and Bob arrested in?", "answer": "['Italy']", "recall": 0, "vectordb": 0, "hybrid": 0}, "q1": {"quesiton": "Who escapes the prison with Zack and Jack?", "answer": "['Roberto']", "recall": 2, "vectordb": 0, "hybrid": 0}, "recall_correct": 2, "vectordb_correct": 0, "hybrid_correct": 0, "recall_accuracy": 0.5, "vectordb_accuracy": 0.0, "hybrid_accuracy": 0.0} -------------------------------------------------------------------------------- /recallm/benchmarks/duoRC_50_percent/test-9_score_gpt.json: -------------------------------------------------------------------------------- 1 | {"grader_name": "gpt", "question_samples": 1, "num_questions": 3, "graded_questions": 9, "q0": {"quesiton": "Frederick Douglas escaped from Ireland to America in what year?", "answer": "['1840']", "recall": 0, "vectordb": 1, "hybrid": 0}, "q1": {"quesiton": "Who was Frederick Douglas's friend?", "answer": "[\"Daniel O'Connell\"]", "recall": 2, "vectordb": 0, "hybrid": 2}, "q2": {"quesiton": "How did Douglas buy freedom in America?", "answer": "['With money raised in Ireland and Britain']", "recall": 2, "vectordb": 2, "hybrid": 2}, "recall_correct": 4, "vectordb_correct": 3, "hybrid_correct": 4, "recall_accuracy": 0.6666666666666666, "vectordb_accuracy": 0.5, "hybrid_accuracy": 0.6666666666666666} -------------------------------------------------------------------------------- /recallm/config.py: -------------------------------------------------------------------------------- 1 | verbose = False 2 | show_revisions = False 3 | compare = False -------------------------------------------------------------------------------- /recallm/datasets/duorc/test-106.txt: -------------------------------------------------------------------------------- 1 | PARAGRAPH 2 | Two friends, Balagopalan (Jayaram) and Jayakumar (Jagathy Sreekumar), own a call service called "We Help" to help people with the assistance of lawyer Madhavan Thampi (Madhu). Balu gets a call from Indu R. Nair, who is the daughter of a businessman. Later, Indu asks Balagoapan to be her bodyguard. But when Jayakumar and Balagopalan visit her home, they discover that Indu has been murdered. Officer Thomas Mathew takes charge of the case. Madhavan tries to find her killer to save Balagopalan and Jayakumar, before it is too late. 3 | 4 | 5 | QUESTIONS 6 | q: Who sees the first murder? 7 | a: ['Jayakumar and Balagopalan'] 8 | 9 | q: Who got killed? 10 | a: ['Indu'] 11 | 12 | q: What is the name of the policeman assigned to the case? 13 | a: ['Thomas Mathew'] 14 | 15 | q: Is Rachel amish? 16 | a: ['no'] 17 | 18 | -------------------------------------------------------------------------------- /recallm/datasets/duorc/test-113.txt: -------------------------------------------------------------------------------- 1 | PARAGRAPH 2 | Set inside a "Quake" like video game, one of the game's cannon-fodder grunts falls for the Lara Croft-inspired heroine and, in a constantly looping game level, tries time and again to catch her attention before she can "chain gun" him. 3 | 4 | 5 | QUESTIONS 6 | q: How are Carlo and Sara killed? 7 | a: ['Walter'] 8 | 9 | q: What starts a fire in the Countess' apartment? 10 | a: ['explosion of an electrical appliance'] 11 | 12 | q: Who is Rose's brother? 13 | a: ['Matthew, Mark, Luke, and Bing.'] 14 | 15 | q: What actress plays Sara 16 | a: ['Sarah Wayne Callies'] 17 | 18 | q: What is the architect's name? 19 | a: ['FRIBA'] 20 | 21 | -------------------------------------------------------------------------------- /recallm/datasets/duorc/test-304.txt: -------------------------------------------------------------------------------- 1 | PARAGRAPH 2 | When renowned Broadway actress Valerie Stanton (Rosalind Russell) decides to leave her ex-lover producer Gordon Dunning (Leon Ames) to do serious drama with a new producer Dunning threatens her with slanderous actions. Dunning has been the producer of all the big plays Valerie has appeared in for the past 10 years and threatens to poison her relationship with her current beau, successful architect Michael Morrell (Leo Genn). In a fit of rage Valerie fatally strikes Dunning with a bronze statuette, and just by chance fellow actress and competitor Marian Webster (Claire Trevor) is discovered with the body and is held for the murder. Valerie is in shock over her own actions, and when police Capt. Danbury (Sydney Greenstreet) investigates the tension begins to mount as the actresss conscience begins to erode her nerves and a game of cat and mouse with the police ensues. 3 | 4 | 5 | QUESTIONS 6 | q: Who is Valerie's new beau? 7 | a: ['Michael Morrell'] 8 | 9 | q: Who does Valerie Stanton kill? 10 | a: ['Gordon Dunning'] 11 | 12 | -------------------------------------------------------------------------------- /recallm/datasets/duorc/test-352.txt: -------------------------------------------------------------------------------- 1 | PARAGRAPH 2 | Aist is a middle-aged bachelor who leads a lonely life in the northern town of Neya. Like many of his neighbours, he identifies himself as a Meryan and strives to keep alive the ancient traditions of his people. One day his boss, Miron, informs Aist of the death of his wife Tanya. Later, the pair spend quite some time washing her body and putting coloured threads in her pubic hair. (In their culture, the same ritual is performed on brides-to-be). The two men take her body to Gorbatov (the smallest town in Russia), in order to perform cremation rites on the banks of the Oka River. In the car, they carry with them two Bunting birds. On their way back to Neya, they get lost and are approached by two prostitutes, with whom they have sex. Later on, while crossing "the great Meryan river" (The Volga), on the Kineshma Bridge, the Buntings fly around the car, causing it to crash into the river. Both men drown. 3 | 4 | 5 | QUESTIONS 6 | q: Who is Miron's wife? 7 | a: ['Tanya'] 8 | 9 | q: What animals are brought on the roadtrip? 10 | a: ['Bunting Birds'] 11 | 12 | -------------------------------------------------------------------------------- /recallm/datasets/duorc/test-436.txt: -------------------------------------------------------------------------------- 1 | PARAGRAPH 2 | Franck Bordoni (Patrick Timsit) loses job as a night watchman when he finds himself inadvertently on the front cover of a popular magazine. The photograph was taken while he was enjoying a football match instead of working. Deciding to punish the photographer, Franck visits the magazine’s offices and finds Michel Verdier (Vincent Lindon), a member of the paparazzi pack. Franck clings to Michel, and fascinated by work and eager to serve as an apprentice. Franck immerses himself in exciting new life (and Isabelle Adjani’s dustbins), however he hardly notices former life (including wife and son) disappear. 3 | 4 | 5 | QUESTIONS 6 | q: What is Zach doing while Rex is taking pictures of him? 7 | a: ['Watching football'] 8 | 9 | -------------------------------------------------------------------------------- /recallm/datasets/duorc/test-505.txt: -------------------------------------------------------------------------------- 1 | PARAGRAPH 2 | Oscar Bonsetter tells a dying prisoner that he will take revenge on the sadistic guard who killed him. In exchange, Oscar is told of a stash of money. Oscar is eventually released from prison but when he goes to get his revenge, he gets sidetracked by the now-handicapped guard and his alluring wife, Rose. The tension builds as Oscar becomes more and more attracted to Rose. 3 | 4 | 5 | QUESTIONS 6 | q: What nationality are the triads? 7 | a: ['Chinese'] 8 | 9 | -------------------------------------------------------------------------------- /recallm/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '1.0' 2 | services: 3 | neo4j: 4 | image: neo4j:latest 5 | ports: 6 | - 7474:7474 7 | - 7687:7687 8 | volumes: 9 | - /neo4j_data:/data 10 | environment: 11 | NEO4J_AUTH: neo4j/password 12 | NEO4J_ACCEPT_LICENSE_AGREEMENT: "yes" 13 | NEO4J_PLUGINS: '["apoc"]' -------------------------------------------------------------------------------- /recallm/docs/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cisco-open/DeepVision/e110a192346be0a5e164d28e3dbce58bc97381f0/recallm/docs/architecture.png -------------------------------------------------------------------------------- /recallm/requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.0.157 2 | neo4j 3 | chromadb==0.3.21 4 | transformers 5 | sentence_transformers 6 | openai 7 | deprecated 8 | bs4 9 | readability-lxml 10 | stanza 11 | nltk 12 | datasets 13 | 14 | # For MPT7B 15 | Xformers 16 | accelerate 17 | einops -------------------------------------------------------------------------------- /recallm/utils.py: -------------------------------------------------------------------------------- 1 | class TextColor: 2 | BLACK = '\033[30m' 3 | RED = '\033[31m' 4 | GREEN = '\033[32m' 5 | YELLOW = '\033[33m' 6 | BLUE = '\033[34m' 7 | MAGENTA = '\033[35m' 8 | CYAN = '\033[36m' 9 | WHITE = '\033[37m' 10 | RESET = '\033[0m' -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Cisco Systems, Inc. and its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # SPDX-License-Identifier: Apache-2.0 16 | 17 | numpy 18 | jinja2==3.1.1 19 | opencv-python>=4.0.1.23 20 | redis 21 | 22 | torch 23 | flask 24 | Pillow==9.3.0 25 | seaborn 26 | redistimeseries 27 | 28 | 29 | -------------------------------------------------------------------------------- /tracking/requirements.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Cisco Systems, Inc. and its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # SPDX-License-Identifier: Apache-2.0 16 | 17 | redis -------------------------------------------------------------------------------- /tracklet/tsconversion.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Cisco Systems, Inc. and its affiliates 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # SPDX-License-Identifier: Apache-2.0 16 | 17 | # Different algos to convert tracker tracking info to timeseries 18 | def aspect_ratio(tl_x, tl_y, br_x, br_y): 19 | width = br_x - tl_x 20 | height = br_y - tl_y 21 | 22 | return round(height / width) 23 | 24 | 25 | def perspective_invariant(): 26 | pass 27 | -------------------------------------------------------------------------------- /utils/RedisStreamManager.py: -------------------------------------------------------------------------------- 1 | import redis 2 | import json 3 | 4 | class RedisStreamManager: 5 | def __init__(self, stream_name: str): 6 | self.stream_name = stream_name 7 | self.redis_conn = redis.Redis(host='redis', port=6379, db=0) 8 | self.clear_stream() 9 | 10 | def write_message(self, message: dict) -> None: 11 | self.redis_conn.xadd(self.stream_name, message) 12 | 13 | def _read_latest_message(self) -> dict: 14 | messages = self.redis_conn.xrevrange(self.stream_name, count=1) 15 | if messages: 16 | _, message = messages[0] 17 | return message 18 | else: 19 | return None 20 | 21 | def clear_stream(self) -> None: 22 | self.redis_conn.delete(self.stream_name) -------------------------------------------------------------------------------- /utils/Utility.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | def convert_redis_entry_id_to_mls(entry_id): 5 | return int(entry_id.split("-")[0]) 6 | 7 | 8 | def diff_since_epoch_mls_with_current(timestamp): 9 | current_time_ms = int(time.time() * 1000) 10 | return (current_time_ms - timestamp) / 1000 11 | 12 | 13 | def is_lt_eq_threshold(entry_id, threshold): 14 | if not entry_id: 15 | return False 16 | id_mls = convert_redis_entry_id_to_mls(entry_id.decode()) 17 | diff = diff_since_epoch_mls_with_current(id_mls) 18 | return diff < threshold 19 | -------------------------------------------------------------------------------- /utils/constants.py: -------------------------------------------------------------------------------- 1 | # Tracking module constants 2 | REDISTIMESERIES = "redistimeseries" 3 | REDISTIMESERIES_PORT = 6379 4 | MODEL_RUN_LATENCY = "model_run_latency" 5 | BOUNDING_BOXES_LATENCY = "bounding_boxes_latency" 6 | GPU_METRICS_CMD = "nvidia-smi --query-gpu=memory.used,memory.total,utilization.gpu,utilization.memory,temperature.gpu --format=csv,noheader,nounits" 7 | MEMORY_USED = "memory_used" 8 | MEMORY_TOTAL = "memory_total" 9 | GPU_UTILIZATION = "gpu_utilization" 10 | MEMORY_UTILIZATION = "memory_utilization" 11 | GPU_TEMP = "gpu_temp" 12 | FRAMERATE = "framerate" 13 | 14 | # Action recognition module constants 15 | TOP_LABEL_COUNT = 3 16 | INPUT_FORMAT = "array" 17 | LABELS_PREFIX = "labels/label_map_" 18 | LABELS_SUFFIX = ".txt" 19 | 20 | # Producing module constants 21 | PROFILE_NAME_VALUE_HAI = "haidetection" 22 | -------------------------------------------------------------------------------- /utils/env_linux: -------------------------------------------------------------------------------- 1 | export DOCKER_BUILDKIT=1 2 | -------------------------------------------------------------------------------- /utils/remote_dashboard.txt: -------------------------------------------------------------------------------- 1 | ssh -L3000:localhost:3000 -L5002:localhost:5002 hostname 2 | localhost:3000 3 | --------------------------------------------------------------------------------