├── requirements.txt ├── Exams.xlsx ├── .~lock.FTFStudents.xlsx# ├── .~lock.OStudents.xlsx# ├── OStudents.xlsx ├── FTFStudents.xlsx ├── README.md ├── fraud_detection.py └── data_generator.py /requirements.txt: -------------------------------------------------------------------------------- 1 | persian-names 2 | openpyxl 3 | radar -------------------------------------------------------------------------------- /Exams.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Revisto/kanoon-fraud-detection/master/Exams.xlsx -------------------------------------------------------------------------------- /.~lock.FTFStudents.xlsx#: -------------------------------------------------------------------------------- 1 | ,rev,pop-os,12.07.2023 10:14,file:///home/rev/.config/libreoffice/4; -------------------------------------------------------------------------------- /.~lock.OStudents.xlsx#: -------------------------------------------------------------------------------- 1 | ,rev,pop-os,12.07.2023 10:52,file:///home/rev/.config/libreoffice/4; -------------------------------------------------------------------------------- /OStudents.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Revisto/kanoon-fraud-detection/master/OStudents.xlsx -------------------------------------------------------------------------------- /FTFStudents.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Revisto/kanoon-fraud-detection/master/FTFStudents.xlsx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Exam Fraud Detection 2 | ## _Set of multiple algorithms to detect fraud and cheating in online and face to face exams_ 3 | 4 | MTB is a fast, customizable, useful telegram bot for your Minecraft server. 5 | 6 | ![](https://cdn.dribbble.com/users/923409/screenshots/4791490/media/3756a7c5ec57b6699be5bf70a7244374.jpg?compress=1&resize=800x600) 7 | 8 | ## ✨ Methods ( till now ) 9 | 10 | - Similarity to other answer sheets (analyze ranges as well) 11 | - Check if an ip address has been used multiple times 12 | 13 | ## ⚙️ How To Use 14 | 15 | Install dependencies. 16 | 17 | ```sh 18 | pip3 install -r requirements.txt 19 | ``` 20 | 21 | Now clone the repo: 22 | ```sh 23 | git clone https://github.com/revisto/fraud-detection 24 | cd fraud-detection 25 | ``` 26 | 27 | Let's take care of xlsx files... 28 | 29 | ```sh 30 | python3 data_generator.py 31 | ``` 32 | Now our our xlsx files are updated, FTFStudents.xlsx, OStudents.xlsx, Exams.xlsx 33 | 34 | Here we can run fraud_detection.py file to see suspects of cheating. 35 | ``` 36 | python3 fraud_detection.py 37 | ``` 38 | 39 | ![](https://ketabchi.com/blog/wp-content/uploads/2020/12/35-scaled.jpg) 40 | 41 | ## Show your support 42 | 43 | Please ⭐️ this repository if this project helped you! 44 | 45 | 46 | ## 📝 License 47 | 48 | GNUv2 49 | 50 | **Free Software, Hell Yeah!** -------------------------------------------------------------------------------- /fraud_detection.py: -------------------------------------------------------------------------------- 1 | import openpyxl 2 | import json 3 | from pprint import pprint 4 | 5 | class FetchData: 6 | def __init__(self, exam_file_path="Exams.xlsx", face_to_face_students_file_path="FTFStudents.xlsx", online_students_file_path="OStudents.xlsx"): 7 | self.exams_workbook = openpyxl.load_workbook(exam_file_path) 8 | self.face_to_face_students_answers_workbook = openpyxl.load_workbook(face_to_face_students_file_path) 9 | self.online_students_answers_workbook = openpyxl.load_workbook(online_students_file_path) 10 | 11 | def read_exams_answersheets(self): 12 | sheet = self.exams_workbook.worksheets[0] 13 | exams_answer_sheet = dict() 14 | for row in sheet: 15 | exam_name = row[0].value 16 | exam_answer_sheet = row[1].value 17 | exams_answer_sheet[exam_name] = json.loads(exam_answer_sheet) 18 | return exams_answer_sheet 19 | 20 | 21 | def read_face_to_face_students_answers(self): 22 | sheet = self.face_to_face_students_answers_workbook.worksheets[0] 23 | students_answers_to_exams = dict() 24 | for row in sheet: 25 | exam_name = row[0].value 26 | if exam_name not in students_answers_to_exams: 27 | students_answers_to_exams[exam_name] = list() 28 | 29 | student_answers = { 30 | "Exam": row[0].value, 31 | "Name": row[1].value, 32 | "Sheet": json.loads(row[2].value), 33 | "Date": row[3].value, 34 | "Duration": row[4].value 35 | } 36 | students_answers_to_exams[exam_name].append(student_answers) 37 | return students_answers_to_exams 38 | 39 | def read_online_students_answers(self): 40 | sheet = self.online_students_answers_workbook.worksheets[0] 41 | students_answers_to_exams = dict() 42 | for row in sheet: 43 | exam_name = row[0].value 44 | if exam_name not in students_answers_to_exams: 45 | students_answers_to_exams[exam_name] = list() 46 | 47 | student_answers = { 48 | "Exam": row[0].value, 49 | "Name": row[1].value, 50 | "Sheet": json.loads(row[2].value), 51 | "Date": row[3].value, 52 | "Durations": row[4].value, 53 | "IPs": row[5].value 54 | } 55 | students_answers_to_exams[exam_name].append(student_answers) 56 | return students_answers_to_exams 57 | 58 | 59 | 60 | 61 | class FraudDetection: 62 | 63 | def __init__(self): 64 | self.suspects = dict() 65 | 66 | def add_red_flag(self, suspects, flag): 67 | for suspect in suspects: 68 | if suspect in self.suspects: 69 | self.suspects[suspect].append(flag) 70 | else: 71 | self.suspects[suspect] = list() 72 | self.suspects[suspect].append(flag) 73 | 74 | 75 | def find_similarity(self, sheet1, sheet2): 76 | suspect_rate = 0 77 | identical_answers_string = str() 78 | for answer_index in range(len(sheet1)): 79 | if sheet1[answer_index] == sheet2[answer_index]: 80 | identical_answers_string += "1" 81 | else: 82 | identical_answers_string += "0" 83 | identical_answers_ranges = identical_answers_string.split("0") 84 | identical_answers_ranges = [x for x in identical_answers_ranges if x] 85 | for identical_answer_range in identical_answers_ranges: 86 | suspect_rate += len(identical_answer_range) ** 2 87 | 88 | suspect_rate = suspect_rate ** .5 89 | suspect_rate = suspect_rate / len(sheet1) 90 | return suspect_rate 91 | 92 | def find_similar_sheets(self, exam_dict): 93 | cheaters = set() 94 | for suspect_index in range(len(exam_dict)): 95 | suspect = exam_dict[suspect_index] 96 | 97 | for second_suspect_index in range(suspect_index + 1, len(exam_dict)): 98 | second_suspect = exam_dict[second_suspect_index] 99 | similarity = self.find_similarity(suspect["Sheet"], second_suspect["Sheet"]) 100 | if similarity > 00.08: 101 | cheaters.add(suspect["Name"]) 102 | cheaters.add(second_suspect["Name"]) 103 | self.add_red_flag(cheaters, "SIMILAR_ANSWER_SHEETS") 104 | return cheaters 105 | 106 | def find_same_ip_for_multiple_users(self, exam_dict): 107 | cheaters = set() 108 | all_ips = list() 109 | for student in exam_dict: 110 | student_ips = student["IPs"] 111 | student_ips = json.loads(student_ips) 112 | student_ips = list(set(student_ips)) 113 | all_ips += student_ips 114 | 115 | for student in exam_dict: 116 | student_ips = student["IPs"] 117 | student_ips = json.loads(student_ips) 118 | student_ips = list(set(student_ips)) 119 | for student_ip in student_ips: 120 | if all_ips.count(student_ip) > 1: 121 | cheaters.add(student["Name"]) 122 | 123 | self.add_red_flag(cheaters, "SAME IP") 124 | return cheaters 125 | 126 | face_to_face_exams = FetchData().read_face_to_face_students_answers() 127 | online_exams = FetchData().read_online_students_answers() 128 | fraud_detection = FraudDetection() 129 | 130 | #for exam_name in face_to_face_exams: 131 | # print(fraud_detection.find_similar_sheets(face_to_face_exams[exam_name])) 132 | # print(fraud_detection.suspects) 133 | 134 | for exam_name in online_exams: 135 | print(fraud_detection.find_same_ip_for_multiple_users(online_exams[exam_name])) 136 | print(fraud_detection.suspects) -------------------------------------------------------------------------------- /data_generator.py: -------------------------------------------------------------------------------- 1 | import openpyxl 2 | from random import randint, choice 3 | from persian_names import fullname_en 4 | import radar 5 | import string 6 | import json 7 | 8 | 9 | class Sheet: 10 | 11 | def __init__(self, questions_count=100): 12 | self.questions_count = questions_count 13 | 14 | def generate_random_answer_sheet(self): 15 | sheet = list() 16 | for i in range(self.questions_count): 17 | sheet.append(randint(0, 4)) 18 | return json.dumps(sheet) 19 | 20 | def generate_random_name(self,gender="r"): 21 | random_name = fullname_en(gender) 22 | return random_name 23 | 24 | def generate_random_date(self): 25 | random_date = radar.random_date() 26 | return str(random_date) 27 | 28 | 29 | def generate_random_exam_name(self): 30 | random_letter = choice(string.ascii_letters).upper() 31 | random_numbers = randint(1000,9999) 32 | 33 | random_exam_name = random_letter + str(random_numbers) 34 | return random_exam_name 35 | 36 | def generate_random_duration(self): 37 | random_duration = randint(1, 7200) 38 | return random_duration 39 | 40 | def generate_random_ip(self): 41 | return '.'.join( 42 | str(randint(0, 255)) for _ in range(4) 43 | ) 44 | 45 | def generate_random_list_of_ips(self): 46 | ip_count = randint(1, 10) 47 | ips = list() 48 | for i in range(ip_count): 49 | ips.append(Sheet().generate_random_ip()) 50 | ips = json.dumps(ips) 51 | return ips 52 | 53 | 54 | def generate_online_question_solve_duration(self): 55 | durations = list() 56 | for i in range(self.questions_count): 57 | duration = randint(0, 15000) 58 | duration = duration / 100 59 | durations.append(duration) 60 | durations = json.dumps(durations) 61 | return durations 62 | 63 | class Exam: 64 | def generate_random_face_to_face_student_exam(exam_name=None, exam_date=None): 65 | row = list() 66 | if exam_name == None: 67 | exam_name = Sheet().generate_random_exam_name() 68 | if exam_date == None: 69 | exam_date = Sheet().generate_random_date() 70 | row.append(exam_name) 71 | row.append(Sheet().generate_random_name()) 72 | row.append(Sheet().generate_random_answer_sheet()) 73 | row.append(exam_date) 74 | row.append(Sheet().generate_random_duration()) 75 | return row 76 | 77 | 78 | def generate_face_to_face_exam(students_count=20, exam_name=None, exam_date=None): 79 | studemt_rows = list() 80 | if exam_name == None: 81 | exam_name = Sheet().generate_random_exam_name() 82 | if exam_date == None: 83 | exam_date = Sheet().generate_random_date() 84 | exam_answer_row = [exam_name, Sheet().generate_random_answer_sheet()] 85 | for i in range(students_count): 86 | studemt_rows.append(Exam.generate_random_face_to_face_student_exam(exam_name, exam_date)) 87 | return studemt_rows, exam_answer_row 88 | 89 | 90 | def generate_random_online_student_exam(exam_name=None, exam_date=None): 91 | row = list() 92 | if exam_name == None: 93 | exam_name = Sheet().generate_random_exam_name() 94 | if exam_date == None: 95 | exam_date = Sheet().generate_random_date() 96 | row.append(exam_name) 97 | row.append(Sheet().generate_random_name()) 98 | row.append(Sheet().generate_random_answer_sheet()) 99 | row.append(exam_date) 100 | row.append(Sheet().generate_online_question_solve_duration()) 101 | row.append(Sheet().generate_random_list_of_ips()) 102 | return row 103 | 104 | 105 | def generate_online_exam(students_count=20, exam_name=None, exam_date=None): 106 | studemt_rows = list() 107 | if exam_name == None: 108 | exam_name = Sheet().generate_random_exam_name() 109 | if exam_date == None: 110 | exam_date = Sheet().generate_random_date() 111 | exam_answer_row = [exam_name, Sheet().generate_random_answer_sheet()] 112 | for i in range(students_count): 113 | studemt_rows.append(Exam.generate_random_online_student_exam(exam_name, exam_date)) 114 | return studemt_rows, exam_answer_row 115 | 116 | 117 | class Excel: 118 | def __init__(self): 119 | self.wb = openpyxl.Workbook() 120 | self.sheet = self.wb.active 121 | 122 | def insert_rows(self, rows): 123 | for row_index in range(len(rows)): 124 | for value_index in range(len(rows[row_index])): 125 | self.sheet.cell(row = row_index + 1, column = value_index + 1).value = rows[row_index][value_index] 126 | return True 127 | 128 | def save_file(self, path): 129 | self.wb.save(path) 130 | return path 131 | 132 | def generate_database(exam_count=5, students_count=20, file_name="sample.xlsx"): 133 | """ 134 | colnames are ExamName, StudentName, AnswerSheet, Date, Duration 135 | colnames are ExamName, StudentName, AnswerSheet, Date, DurationPerQuestion, IPs 136 | """ 137 | 138 | face_to_face_student_excel = Excel() 139 | online_student_excel = Excel() 140 | exam_answers_excel = Excel() 141 | all_face_to_face_student_rows = list() 142 | all_online_student_rows = list() 143 | all_exam_rows = list() 144 | for i in range(exam_count): 145 | exam_name = Sheet().generate_random_exam_name() 146 | exam_date = Sheet().generate_random_date() 147 | student_rows, exam_answer_row = Exam.generate_face_to_face_exam(students_count, exam_name, exam_date) 148 | all_face_to_face_student_rows += student_rows 149 | 150 | student_rows, exam_answer_row = Exam.generate_online_exam(students_count, exam_name, exam_date) 151 | all_online_student_rows += student_rows 152 | 153 | all_exam_rows.append(exam_answer_row) 154 | 155 | 156 | face_to_face_student_excel.insert_rows(all_face_to_face_student_rows) 157 | online_student_excel.insert_rows(all_online_student_rows) 158 | exam_answers_excel.insert_rows(all_exam_rows) 159 | face_to_face_student_excel.save_file("FTFStudents.xlsx") 160 | online_student_excel.save_file("OStudents.xlsx") 161 | exam_answers_excel.save_file("Exams.xlsx") 162 | 163 | 164 | generate_database() --------------------------------------------------------------------------------