├── .gitignore ├── LICENSE ├── README.md ├── nectaapi ├── __init__.py ├── comparison.py ├── schools.py ├── student.py ├── student_name.py ├── students.py └── summary.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Custom made files 2 | test.py 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | cover/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | .pybuilder/ 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | # For a library or package, you might want to ignore these files since the code is 90 | # intended to run in multiple environments; otherwise, check them in: 91 | # .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # poetry 101 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 102 | # This is especially recommended for binary packages to ensure reproducibility, and is more 103 | # commonly ignored for libraries. 104 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 105 | #poetry.lock 106 | 107 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 108 | __pypackages__/ 109 | 110 | # Celery stuff 111 | celerybeat-schedule 112 | celerybeat.pid 113 | 114 | # SageMath parsed files 115 | *.sage.py 116 | 117 | # Environments 118 | .env 119 | .venv 120 | env/ 121 | venv/ 122 | ENV/ 123 | env.bak/ 124 | venv.bak/ 125 | 126 | # Spyder project settings 127 | .spyderproject 128 | .spyproject 129 | 130 | # Rope project settings 131 | .ropeproject 132 | 133 | # mkdocs documentation 134 | /site 135 | 136 | # mypy 137 | .mypy_cache/ 138 | .dmypy.json 139 | dmypy.json 140 | 141 | # Pyre type checker 142 | .pyre/ 143 | 144 | # pytype static type analyzer 145 | .pytype/ 146 | 147 | # Cython debug symbols 148 | cython_debug/ 149 | 150 | # PyCharm 151 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 152 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 153 | # and can be added to the global gitignore or merged into this file. For a more nuclear 154 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 155 | #.idea/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 vincent laizer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Necta-API 2 | 3 | Get a formated data of examination results scrapped from necta results website. 4 | 5 | Note this is not an official [NECTA](https://necta.go.tz/) API and is still in development 6 | 7 | Current version is `Beta 2.0.6` 8 | 9 | This Version comes with a more modular structure compared to the previsious ones 10 | 11 | Developed by [**Tanzania Programmers**](https://tanzaniaprogrammers.com/), written *by Vincent Laizer.* 12 | 13 | --- 14 | 15 | --- 16 | 17 | ## Usage 18 | 19 | - [x] Get the package via pip 20 | 21 | ```bash 22 | pip install nectaapi 23 | ``` 24 | 25 | - In any return value **None** indicates that no data could be scrapped 26 | 27 | - [x] Get a list of all schools in a given year and exam type. 28 | 29 | exam type can be **acsee** or **csee** (for now, more to be added) 30 | 31 | ```python 32 | from nectaapi import schools 33 | 34 | data = schools.schools(2017, 'csee') 35 | ``` 36 | 37 | The function returns a dictionary in the form 38 | 39 | ```python 40 | { 41 | "exam_type": "examamination type", 42 | "year_of_exam": "year of examination", 43 | "number_of_schools": "number of schools in this exam and year", 44 | "schools": [ 45 | { 46 | "school_name": "school name 1", 47 | "registration_number":"registration number 1" 48 | }, 49 | { 50 | "school_name": "school name 2", 51 | "registration_number":"registration number 2" 52 | }, 53 | ...] 54 | } 55 | ``` 56 | 57 | - [x] Get a highlight of school overal results 58 | 59 | ```python 60 | from nectaapi import summary 61 | 62 | data = summary.summary(year, examType, schoolNumber) 63 | 64 | # schoolNumber is the schools registration number ie s3881 or s1268 65 | ``` 66 | 67 | The function returns a dictionary in the form 68 | 69 | ```python 70 | { 71 | "school_name": "name of school", 72 | "school_number": "school_number", 73 | "exam_type": "exam_type", 74 | "year_of_exam": "year", 75 | "school_category":"category based on number of students", 76 | "number_of_students": "total number of students", 77 | "school_region":"regional location of the school", 78 | "male_students": "number of male students", 79 | "female_students": "number of female students", 80 | "absentees": "number of students who missed the exam", 81 | "division_one": "number of division one", 82 | "division_two": "number of division two", 83 | "division_three": "number of division three", 84 | "division_four": "number of division four", 85 | "division_zero":"number of division zero", 86 | "national_position": "school's national position", 87 | "regional_position": "school's regional position", 88 | "total_national_schools":"number of schools national wise", 89 | "total_regional_schools":"number of schools regional wise", 90 | "gpa": "school's GPA" 91 | } 92 | ``` 93 | 94 | - [x] Get a single students results 95 | 96 | ```python 97 | from nectaapi import student 98 | 99 | results = student.student(year, examType, schoolNumber, studentNumber) 100 | 101 | # student number is the students part of their examination number eg 0040 or 0553 102 | ``` 103 | 104 | The 'student' function returns a dictionary of this form 105 | 106 | ```python 107 | { 108 | "examination_number":"students examination number", 109 | "year_of_exam":"year", 110 | "exam_type":"exam type", 111 | "school_name":"name of student's school", 112 | "gender":"student's gender", 113 | "division":"students division", 114 | "points":"grade points", 115 | "subjects": 116 | { 117 | "subject1":"score1", 118 | "subject2":"score2", 119 | ... 120 | } 121 | } 122 | ``` 123 | 124 | - [x] Compare schools performance over a range of years or of just a single school 125 | 126 | _not present in perivious versions_ 127 | 128 | The parameters of the function are, the start year, end year of comparison, exam type and a list of schools to compare. start year is always less than end year, suppose they are equal a one year comparison is returned 129 | 130 | ```python 131 | from nectaapi import comparison 132 | data = comparison.comparison(startYear, endYear, examType, ["school_number1", "school_number2", ...]) 133 | ``` 134 | 135 | It then returns a dictionary with school comparable data like, gpa, national_position and number_of_students in the form 136 | 137 | ```python 138 | { 139 | "year1":{ 140 | "school_number1":{ 141 | "gpa":"", 142 | "national_position":"", 143 | "number_of_students":"" 144 | }, 145 | "school_number2":{ 146 | "gpa":"", 147 | "national_position":"", 148 | "number_of_students":"" 149 | }, 150 | ... 151 | }, 152 | "year2":{ 153 | "school_number1":{ 154 | "gpa":"", 155 | "national_position":"", 156 | "number_of_students":"" 157 | }, 158 | "school_number2":{ 159 | "gpa":"", 160 | "national_position":"", 161 | "number_of_students":"" 162 | }, 163 | ... 164 | } 165 | ... 166 | } 167 | ``` 168 | 169 | As one of my teachers said, **"Academics is one of the 3 areas in life where competition is allowed"** *Mr. H. Masegense*, so don't mind comparing performance of schools over the years 170 | 171 | + Comparison module comes with a bonus function to check if a school participated in national examinations of a given type and year. Returns a boolean value 172 | 173 | ```python 174 | from nectaapi import comparison 175 | isPresent = comparison.schoolPresent(year, exam_type, school_number) 176 | ``` 177 | 178 | ## What's New 179 | 180 | ## Version 2.0.6 181 | 182 | - Compatibility with 2023 **CSEE** results format 183 | - Compatibility with 2023 **ACSEE** results format 184 | - Minor bug fixes 185 | 186 | ## Version 2.0.5 187 | 188 | - Minor bug fixes 189 | 190 | ## Version 2.0.4 191 | 192 | - Compatibility with 2022 **ACSEE** results format 193 | 194 | ## Version 2.0.3 195 | 196 | - Compatibility with 2021 **CSEE** results format 197 | 198 | ## Version 2.0.0 199 | 200 | - Bug fixes on the school summary function 201 | - proper handling of the year 2015 where GPA system was used. 202 | - note, in this year, distinction is counted as division one, merit as division two, credit as division three, pass as division four and fail as division zero. 203 | - school comparison function 204 | - code modularity improvement 205 | 206 | --- 207 | 208 | check out video tutorial on [YouTube](https://www.youtube.com/channel/UCuMUw-djxHqOHrvnnFGYtZA) for demos. 209 | 210 | --- 211 | 212 | ### contributions are awaited for **GitHub repo [NECTA-API](https://github.com/vincent-laizer/NECTA-API)** 213 | -------------------------------------------------------------------------------- /nectaapi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vincent-laizer/NECTA-API/aa8ee08fbe9f618b970c50effcd663951f7bde10/nectaapi/__init__.py -------------------------------------------------------------------------------- /nectaapi/comparison.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Comparison of different schools' performance across years 3 | takes parameters exam_type and a list of school numbers. 4 | returns a dictionary of comparable data btn the school(s) in the given years range. 5 | comparable data => gpa, national_position, no_of_students, division_count(not neccessary) 6 | ''' 7 | 8 | from nectaapi import summary, schools 9 | from typing import List,Union,Dict 10 | import datetime 11 | 12 | def comparison(start_year:int, end_year:int, exam_type:str, school_list:List[str])->Dict[str,Union[str,None]]: 13 | """Comparison of different schools' performance across years 14 | 15 | Args: 16 | start_year: int, end_year: int, exam_type:str and a list of school numbers 17 | 18 | Returns 19 | Dict[str, Union[str,None]] 20 | 21 | a dictionary of comparable data btn the school(s) in the given years range. 22 | comparable data => gpa, national_position, no_of_students, division_count(not neccessary) 23 | """ 24 | data = {} 25 | 26 | for year in range(start_year, end_year+1): 27 | data[str(year)] = {} 28 | for school in school_list: 29 | if schoolPresent(year, exam_type, school): 30 | school_data = summary.summary(year, exam_type, school) 31 | data[str(year)][school] = { 32 | "national_position":school_data["national_position"], 33 | "number_of_students":school_data["number_of_students"], 34 | "gpa":school_data["gpa"] 35 | } 36 | else: 37 | # school not present in this year,the data returned will be None 38 | data[str(year)][school] = { 39 | "national_position":None, 40 | "number_of_students":None, 41 | "gpa":None 42 | } 43 | return data 44 | 45 | # function to check if a school participated in a national exam 46 | def schoolPresent(year:int, exam_type:str, school_number:str)->bool: 47 | """check if a school participated in a national exam 48 | 49 | Args: 50 | year(int), exam_type(str), school_number(str) 51 | 52 | Returns 53 | Boolean : True if participated otherwise False 54 | """ 55 | 56 | all_schools = schools.schools(year, exam_type)["schools"] 57 | for school in all_schools: 58 | if school_number.lower() == school["school_number"].lower(): 59 | return True 60 | return False -------------------------------------------------------------------------------- /nectaapi/schools.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A list of all schools and their registration numbers in a given year and exam type 3 | 4 | return format 5 | type: dictionary 6 | example: { 7 | "exam_type":"", 8 | "year_of_exam":"", 9 | "number_of_schools":"", 10 | "schools":[ 11 | {"school_name":"school name 1", "school_number":"school number 1"}, 12 | {"school_name":"school name 2", "school_number":"school number 2"}, 13 | ... 14 | ] 15 | } 16 | ''' 17 | from bs4 import BeautifulSoup 18 | import requests 19 | from typing import Dict,Any 20 | 21 | def schools(year:int, exam_type:str)->Dict[str,Any]: 22 | """Gets all schools and their registration numbers in a given year and exam type 23 | 24 | Args: 25 | year(int),exam_type(str) 26 | 27 | Returns: 28 | Dict 29 | """ 30 | 31 | url = "" 32 | 33 | # the number of waste rows to skip (letters in the home page), if available 34 | skip = 0 35 | 36 | if exam_type.lower() == "csee": 37 | if int(year) == 2023: 38 | url = f"https://onlinesys.necta.go.tz/results/2023/csee/index.htm" 39 | elif int(year) == 2016 or int(year) == 2022: 40 | url = f"https://onlinesys.necta.go.tz/results/{year}/csee/index.htm" # f"http://127.0.0.1/necta/{year}/csee" 41 | else: 42 | url = f"https://onlinesys.necta.go.tz/results/{year}/csee/csee.htm" # f"http://127.0.0.1/necta/{year}/csee" 43 | 44 | if int(year) > 2014: 45 | skip = 28 46 | 47 | elif exam_type.lower() == "acsee": 48 | if int(year) == 2023: 49 | url = f"https://onlinesys.necta.go.tz/results/2023/acsee/index.htm" 50 | elif int(year) <= 2022 and int(year) >= 2020: 51 | url = f"https://onlinesys.necta.go.tz/results/{year}/acsee/index.htm" 52 | elif int(year) <= 2019 and int(year) >= 2016: 53 | url = f"https://onlinesys.necta.go.tz/results/{year}/acsee/acsee.htm" 54 | elif int(year) == 2014: 55 | url = f"https://onlinesys.necta.go.tz/results/2014/acsee/" # f"http://127.0.0.1/necta/{year}/acsee" 56 | else: 57 | url = f"https://onlinesys.necta.go.tz/results/{year}/acsee/acsee.htm" # f"http://127.0.0.1/necta/{year}/acsee" 58 | 59 | if int(year) > 2015: 60 | skip = 28 61 | else: 62 | # invalid exam type 63 | raise Exception(f"Invalid Exam Type {exam_type}") 64 | 65 | data = requests.get(url) 66 | if data.status_code == 200: 67 | soup = BeautifulSoup(data.text, 'html.parser') 68 | 69 | # a list of dictionaries to hold school's registration number and name 70 | schools = [] 71 | 72 | # get all the data present in the tables i.e list of all schools and centers 73 | for font in soup.find_all('font'): 74 | for a in font.find_all('a'): 75 | clean = a.text.strip('\n\r') 76 | school = clean.split(' ') 77 | 78 | school_name = "" 79 | for s in school[1:]: 80 | school_name = f"{school_name} {s}" 81 | 82 | schools.append({"school_name": school_name, "school_number":school[0]}) 83 | 84 | # eliminate initial dirt, the first letters that were extracted as school names 85 | schools = schools[skip:] 86 | 87 | schools_data = { 88 | "exam_type": exam_type, 89 | "year_of_exam": year, 90 | "number_of_schools": len(schools), 91 | "description": f"a list of all schools and centers that participated in {exam_type} in {year}", 92 | "schools": schools 93 | } 94 | 95 | # return a dictionary of all schools and more info 96 | return schools_data 97 | else: 98 | # upon error return raise an exception 99 | raise Exception(f"Failed to access {url}\nResponse code: {data.status_code}") 100 | -------------------------------------------------------------------------------- /nectaapi/student.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Results of a single student 3 | returns a dictionary 4 | { 5 | examination_number, year_of_exam, exam_type, school_name, gender, 6 | division, points, subjects: 7 | { 8 | subject1:score1, 9 | subject2:score2, 10 | ... 11 | } 12 | } 13 | ''' 14 | 15 | import requests 16 | from bs4 import BeautifulSoup 17 | from nectaapi import summary 18 | from nectaapi.students import splitAfter 19 | from typing import Dict,Any 20 | from nectaapi.student_name import student_names 21 | 22 | def student(year:int, exam_type:str, school_number:str, student_number:int)->Dict[str,Any]: 23 | """Results of a single student 24 | 25 | Args: 26 | year(int), exam_type(str), school_number(str), student_number(int) 27 | 28 | Returns: 29 | Dict 30 | 31 | """ 32 | 33 | url = "" 34 | exam_type = exam_type.lower() 35 | school_number = school_number.lower() 36 | year = int(year) 37 | index = 0 38 | 39 | if exam_type == "acsee": 40 | if year == 2023: 41 | url = f"https://matokeo.necta.go.tz/results/2023/acsee/results/{school_number}.htm" 42 | else: 43 | url = f"https://onlinesys.necta.go.tz/results/{year}/acsee/results/{school_number}.htm" 44 | 45 | if school_number.startswith("p"): 46 | if year > 2019: 47 | index = 2 48 | else: 49 | index = 0 50 | else: 51 | if year >= 2019: 52 | index = 2 53 | else: 54 | index = 0 55 | 56 | elif exam_type == "csee": 57 | if int(year) == 2023: 58 | url = f"https://matokeo.necta.go.tz/results/2023/csee/CSEE2023/results/{school_number}.htm" 59 | elif int(year) == 2021: 60 | url = f"https://onlinesys.necta.go.tz/results/2021/csee/results/{school_number}.htm" 61 | elif int(year) > 2014: 62 | url = f"https://onlinesys.necta.go.tz/results/{year}/csee/results/{school_number}.htm" 63 | # http://127.0.0.1/necta/{year}/csee/s3881.php 64 | else: 65 | url = f"https://onlinesys.necta.go.tz/results/{year}/csee/{school_number}.htm" 66 | # http://127.0.0.1/necta/{year}/csee/s3881.php 67 | 68 | if school_number.startswith("p"): 69 | if year > 2018: 70 | index = 2 71 | else: 72 | index = 0 73 | else: 74 | if year > 2018: 75 | index = 2 76 | else: 77 | index = 0 78 | 79 | data = requests.get(url) 80 | soup = BeautifulSoup(data.text, 'html.parser') 81 | 82 | if data.status_code != 200: 83 | raise Exception(f"failed to connect to server\nError code {data.status_code}") 84 | else: 85 | s = summary.summary(year, exam_type, school_number) 86 | 87 | student_data = { 88 | "examination_number":f"{school_number.upper()}/{student_number}", 89 | "year_of_exam":year, 90 | "exam_type":exam_type, 91 | "gender":"*", 92 | "school_name":s["school_name"], 93 | "division":"*", 94 | "points":"*", 95 | "subjects":{} 96 | } 97 | 98 | found = False 99 | 100 | studentsTable = soup.find_all("table")[index] 101 | for tr in studentsTable.find_all("tr"): 102 | row = [] 103 | for td in tr.find_all("td"): 104 | row.append(td.text.strip('\n')) 105 | 106 | # search for student number 107 | print(row) 108 | if row[0] == student_data["examination_number"]: 109 | student_data["gender"] = row[1] 110 | student_data["division"] = row[3] 111 | student_data["points"] = row[2] 112 | student_data["subjects"] = splitAfter(row[4]) 113 | found = True 114 | 115 | if not found: 116 | raise Exception(f"Wrong Examination Number {student_data['examination_number']}") 117 | else: 118 | # get student names 119 | names = student_names(student_number, school_number, year, exam_type) 120 | if names != None: 121 | student_data["firstname"] = names["firstname"] 122 | student_data["middlename"] = names["middlename"] 123 | student_data["lastname"] = names["lastname"] 124 | student_data["sex"] = names["sex"] 125 | 126 | return student_data -------------------------------------------------------------------------------- /nectaapi/student_name.py: -------------------------------------------------------------------------------- 1 | ''' 2 | return student names from heslb api, return None if student is not found 3 | works for csee only 4 | 5 | return format: 6 | type: dictionary 7 | example: { 8 | "firstname":"", 9 | "middlename":"", 10 | "lastname":"", 11 | "sex":"" 12 | } 13 | ''' 14 | 15 | import requests 16 | import json 17 | 18 | def student_names(student_number, school_number, year, exam_type): 19 | if exam_type == "csee": 20 | headersList = { 21 | "Content-Type": "application/json" 22 | } 23 | 24 | try: 25 | url = "https://olas.heslb.go.tz/appli/api/application/search-applicant/" 26 | payload = json.dumps({ 27 | "index_no": f"{school_number}-{student_number}", 28 | "app_year": "", 29 | "exam_year": f"{year}", 30 | "applicant_type": "necta" 31 | }) 32 | 33 | response = requests.request("POST", url, data=payload, headers=headersList, verify=False) 34 | details = response.json().get("data").get("applicant") 35 | return { 36 | "firstname": details.get("first_name"), 37 | "middlename": details.get("middle_name"), 38 | "lastname": details.get("last_name"), 39 | "sex": details.get("sex") 40 | } 41 | except Exception as e: 42 | print(e) 43 | return None 44 | else: 45 | return None 46 | -------------------------------------------------------------------------------- /nectaapi/students.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A list of all students with their performance in a particular school or center 3 | returns a dictionary 4 | school_name, school_number, number_of_students, year_of_exam, exam_type, students[ 5 | { 6 | examination_number, 7 | gender, 8 | division, 9 | points, 10 | subjects:{ 11 | subject1:score1, 12 | subject2:score2, 13 | ... 14 | } 15 | } 16 | ... 17 | ] 18 | ''' 19 | 20 | import requests 21 | from bs4 import BeautifulSoup 22 | from nectaapi import summary 23 | from typing import Dict,Any,List 24 | 25 | def students(year:int, exam_type:str, school_number:str)->Dict[str,Any]: 26 | """Get all students with their performance in a particular school or center 27 | 28 | Args: 29 | year(int),exam_type(str), school_number(str) 30 | 31 | Returns: 32 | Dict 33 | """ 34 | url = "" 35 | exam_type = exam_type.lower() 36 | school_number = school_number.lower() 37 | year = int(year) 38 | index = 0 39 | 40 | if exam_type == "acsee": 41 | if year == 2023: 42 | url = f"https://matokeo.necta.go.tz/results/2023/acsee/results/{school_number}.htm" 43 | else: 44 | url = f"https://onlinesys.necta.go.tz/results/{year}/acsee/results/{school_number}.htm" 45 | 46 | if school_number.startswith("p"): 47 | if year >= 2019: 48 | index = 2 49 | else: 50 | index = 0 51 | else: 52 | if year >= 2019: 53 | index = 2 54 | else: 55 | index = 0 56 | 57 | elif exam_type == "csee": 58 | if int(year) == 2023: 59 | url = f"https://matokeo.necta.go.tz/results/2023/csee/CSEE2023/results/{school_number}.htm" 60 | elif int(year) > 2018: 61 | url = f"https://onlinesys.necta.go.tz/results/{year}/csee/results/{school_number}.htm" 62 | elif int(year) > 2014: 63 | url = f"https://onlinesys.necta.go.tz/results/{year}/csee/results/{school_number}.htm" 64 | # http://127.0.0.1/necta/{year}/csee/s3881.php 65 | else: 66 | url = f"https://onlinesys.necta.go.tz/results/{year}/csee/{school_number}.htm" 67 | # http://127.0.0.1/necta/{year}/csee/s3881.php 68 | 69 | if school_number.startswith("p"): 70 | if year > 2018: 71 | index = 2 72 | else: 73 | index = 0 74 | else: 75 | if year > 2018: 76 | index = 2 77 | else: 78 | index = 0 79 | 80 | data = requests.get(url) 81 | soup = BeautifulSoup(data.text, 'html.parser') 82 | 83 | if data.status_code != 200: 84 | raise Exception(f"failed to connect to server\nError code {data.status_code}") 85 | else: 86 | # get some data from summary function 87 | school_summary = summary.summary(year, exam_type, school_number) 88 | 89 | students = { 90 | "school_number":school_number, 91 | "school_name":school_summary["school_name"], 92 | "year_of_exam":year, 93 | "exam_type":exam_type, 94 | "number_of_students":school_summary["number_of_students"], 95 | "students":[] 96 | } 97 | 98 | student_data = scrapStudents(soup, index) 99 | students["students"] = student_data 100 | 101 | return students 102 | 103 | def scrapStudents(soup, index)->List[Dict[str,Any]]: 104 | studentsTable = soup.find_all("table")[index] 105 | data = [] 106 | 107 | # [1:] -> eliminate the first row containing titles 108 | for tr in studentsTable.find_all("tr")[1:]: 109 | # row[reg_no, sex, points, division, subjects] 110 | row = [] 111 | for td in tr.find_all("td"): 112 | row.append(td.text.strip('\n')) 113 | 114 | subjects = splitAfter(row[4]) 115 | student = { 116 | "examination_number":row[0], 117 | "gender":row[1], 118 | "division":row[3], 119 | "points":row[2], 120 | "subjects":subjects 121 | } 122 | 123 | # print(student, end='\n') 124 | data.append(student) 125 | 126 | return data 127 | 128 | # assisting function in obtaining a dictionary of candidates subjects and grades 129 | def splitAfter(text)->Dict[str,str]: 130 | subjects = {} # a dictionary of subject grade pair 131 | values = [] 132 | temp = "" 133 | for i in range(0, len(text)-1): 134 | temp += text[i] 135 | if text[i] == '\'' and text[i+1] == ' ': 136 | values.append(temp) 137 | temp = "" 138 | 139 | for v in values: 140 | q = v.split('-') 141 | subject = q[0].strip() 142 | grade = q[1].strip().strip('\'') 143 | subjects.update({subject: grade}) 144 | 145 | return subjects -------------------------------------------------------------------------------- /nectaapi/summary.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Summary of a school's performance 3 | 4 | returns a dictionary with these keys and their values 5 | school_name, school_number, exam_type, year_of_exam, school_category, number_of_students, school_region, 6 | male_students, female_students, absentees, division_one, division_two, division_three, division_four, 7 | division_zero, national_position, regional_position, total_national_schools, total_regional_schools, gpa 8 | ''' 9 | import requests 10 | from bs4 import BeautifulSoup 11 | 12 | def summary(year:int, exam_type:str, school_number:str): 13 | 14 | """Summary of a school's performance 15 | 16 | Args: 17 | year(int), exam_type(str), school_number(str) 18 | 19 | Returns: 20 | Dict 21 | 22 | returns a dictionary with these keys and their values 23 | school_name, school_number, exam_type, year_of_exam, school_category, number_of_students, school_region, 24 | male_students, female_students, absentees, division_one, division_two, division_three, division_four, 25 | division_zero, national_position, regional_position, total_national_schools, total_regional_schools, gpa 26 | """ 27 | 28 | url = "" 29 | exam_type = exam_type.lower() 30 | school_number = school_number.lower() 31 | 32 | if exam_type == "acsee": 33 | if year == 2023: 34 | url = f"https://matokeo.necta.go.tz/results/2023/acsee/results/{school_number}.htm" 35 | else: 36 | url = f"https://onlinesys.necta.go.tz/results/{year}/acsee/results/{school_number}.htm" 37 | 38 | elif exam_type == "csee": 39 | if int(year) == 2023: 40 | url = f"https://matokeo.necta.go.tz/results/2023/csee/CSEE2023/results/{school_number}.htm" 41 | elif int(year) == 2021: 42 | url = f"https://onlinesys.necta.go.tz/results/2021/csee/results/{school_number}.htm" 43 | elif int(year) > 2014: 44 | url = f"https://onlinesys.necta.go.tz/results/{year}/{exam_type}/results/{school_number}.htm" 45 | # f"http://127.0.0.1/necta/{year}/csee/s3881.php" 46 | # https://onlinesys.necta.go.tz/results/2015/csee/results/s3881.htm 47 | else: 48 | url = f"https://onlinesys.necta.go.tz/results/{year}/{exam_type}/{school_number}.htm" 49 | # f"http://127.0.0.1/necta/{year}/csee/s3881.php" 50 | # https://onlinesys.necta.go.tz/results/2014/csee/s1674.htm 51 | 52 | data = requests.get(url) 53 | soup = BeautifulSoup(data.text, 'html.parser') 54 | 55 | summary = { 56 | "school_name": "*", 57 | "school_number": school_number, 58 | "exam_type": exam_type, 59 | "year_of_exam": year, 60 | "school_category":"*", 61 | "number_of_students": "*", 62 | "school_region":"*", 63 | "male_students": "*", 64 | "female_students": "*", 65 | "absentees": "*", 66 | "division_one": "*", 67 | "division_two": "*", 68 | "division_three": "*", 69 | "division_four": "*", 70 | "division_zero":"*", 71 | "national_position": "*", 72 | "regional_position": "*", 73 | "total_national_schools":"*", 74 | "total_regional_schools":"*", 75 | "gpa": "*" 76 | } 77 | 78 | if data.status_code == 200: 79 | # scrap school name 80 | name = "" 81 | for n in soup.find_all('h3')[0].text.split(' ')[1: ]: 82 | if '\n' in n: 83 | n = n.split('\r')[0] 84 | n = n.split('\n')[0] 85 | name = f"{name} {n}" 86 | break 87 | name = f"{name} {n}" 88 | # print(name, soup.find_all('h3')[0].text.split(' ')[1: ], sep=" --> ") 89 | 90 | summary["school_name"] = name.strip() 91 | 92 | summary = set_zero(summary) 93 | summary["absentees"] = 0 #not modified in set_zero function 94 | 95 | # check to see if the school registration number is a center or not and act accordingly 96 | # centers dont have a bottom summary table and so should not provide these data 97 | 98 | if school_number.startswith("p"): 99 | # handle center 100 | summary = handleCenter(summary, soup) 101 | else: 102 | # handle a school 103 | if year != 2015 or exam_type == "acsee": 104 | # 2015 has no bottom performance analysis table in csee 105 | summary = handleSchool(summary, soup) 106 | else: 107 | # failed to fetch data, raise exception 108 | raise Exception(f"Failed to access {url}\nResponse Code {data.status_code}") 109 | 110 | return summary 111 | 112 | # initialize numerical values to zero 113 | # this function is called in between any call to scrapTop and scrapManual 114 | # this is to avoid doubling of number of students and division 115 | def set_zero(summary): 116 | # initialize numbers to 0 117 | summary["division_one"] = 0 118 | summary["division_two"] = 0 119 | summary["division_three"] = 0 120 | summary["division_four"] = 0 121 | summary["division_zero"] = 0 122 | 123 | summary["female_students"] = 0 124 | summary["male_students"] = 0 125 | summary["number_of_students"] = 0 126 | # summary["absentees"] = 0 not altered any way 127 | 128 | return summary 129 | 130 | # function to scrap center data accordingly 131 | # centers dont have bottom performance analysis table 132 | def handleCenter(summary, soup): 133 | year = int(summary["year_of_exam"]) 134 | exam_type = summary["exam_type"].lower() 135 | 136 | if exam_type == "acsee": 137 | if year > 2019: 138 | # has top summary table, scrap the data 139 | summary = scrapManual(soup, summary, 2) # just for absentees 140 | summary = set_zero(summary) 141 | summary = scrapTopTable(soup, summary) 142 | else: 143 | # has no top summary table, count divisions and student gender manually 144 | # this is done by scrapping all student data and counting the data they have 145 | summary = scrapManual(soup, summary, 0) 146 | 147 | elif exam_type == "csee": 148 | if year > 2018: 149 | # has top summary table, scrap the data 150 | summary = scrapManual(soup, summary, 2) # just for absentees 151 | summary = set_zero(summary) 152 | summary = scrapTopTable(soup, summary) 153 | 154 | else: 155 | # has no top summary table 156 | summary = scrapManual(soup, summary, 0) 157 | 158 | else: 159 | raise Exception(f"Invalid Exam Type {exam_type}") 160 | 161 | summary["number_of_students"] = summary["female_students"] + summary["male_students"] 162 | 163 | return summary 164 | 165 | # function to scrap school data accordingly 166 | def handleSchool(summary, soup): 167 | year = int(summary["year_of_exam"]) 168 | exam_type = summary["exam_type"].lower() 169 | 170 | if exam_type == "acsee": 171 | if year >= 2019: 172 | # has a top analysis table 173 | summary = scrapManual(soup, summary, 2) # just for absentees 174 | summary = set_zero(summary) 175 | summary = scrapTopTable(soup, summary) 176 | summary = scrapBottomPerformance(soup, summary, 4) 177 | else: 178 | # has no top analysis table get data manually 179 | summary = scrapManual(soup, summary, 0) 180 | summary = scrapBottomPerformance(soup, summary, 2) 181 | 182 | elif exam_type == "csee": 183 | if year > 2018: 184 | # has a top analysis table 185 | summary = scrapManual(soup, summary, 2) # just for absentees 186 | summary = set_zero(summary) 187 | summary = scrapTopTable(soup, summary) 188 | summary = scrapBottomPerformance(soup, summary, 4) 189 | else: 190 | # has no top analysis table get data manually 191 | summary = scrapManual(soup, summary, 0) 192 | summary = scrapBottomPerformance(soup, summary, 2) 193 | 194 | else: 195 | raise Exception(f"Invalid Exam Type {exam_type}") 196 | 197 | return summary 198 | 199 | # count divisions and student gender manually 200 | # this is done by scrapping all student data and increementing specific values avaialble 201 | def scrapManual(soup, summary, index): 202 | tables = soup.find_all('table') 203 | for tr in tables[index].find_all("tr"): 204 | # row[reg_no, sex, points, division, subjects] 205 | row = [] 206 | for td in tr.find_all("td"): 207 | row.append(td.text.strip('\n')) 208 | 209 | if row[1].lower() == "f": 210 | summary["female_students"] += 1 211 | else: 212 | summary["male_students"] += 1 213 | 214 | if row[3] == "I" or "DISTINCTION" in row[3]: 215 | summary["division_one"] += 1 216 | elif row[3] == "II" or "MERIT" in row[3]: 217 | summary["division_two"] += 1 218 | elif row[3] == "III" or "CREDIT" in row[3]: 219 | summary["division_three"] += 1 220 | elif row[3] == "IV" or "PASS" in row[3]: 221 | summary["division_four"] += 1 222 | elif row[3] == "0" or "FAIL" in row[3]: 223 | summary["division_zero"] += 1 224 | elif "ABS" in row[3]: 225 | summary["absentees"] += 1 226 | 227 | return summary 228 | 229 | # scrap the top table that has gender based performance analysis 230 | def scrapTopTable(soup, summary): 231 | tables = soup.find_all('table') 232 | rows = [] 233 | 234 | for tr in tables[0].find_all('tr'): 235 | r = [] 236 | for td in tr.find_all('td'): 237 | r.append(td.text.strip('\n')) 238 | rows.append(r) 239 | 240 | summary["division_one"] = int(rows[3][1].strip()) 241 | summary["division_two"] = int(rows[3][2].strip()) 242 | summary["division_three"] = int(rows[3][3].strip()) 243 | summary["division_four"] = int(rows[3][4].strip()) 244 | summary["division_zero"] = int(rows[3][5].strip()) 245 | 246 | # total number of males and females 247 | for i in range(1, 6): 248 | summary["female_students"] += int(rows[1][i]) 249 | summary["male_students"] += int(rows[2][i]) 250 | 251 | return summary 252 | 253 | # scrap school performance analaysis table at the bottom 254 | def scrapBottomPerformance(soup, summary, index): 255 | tables = soup.find_all('table') 256 | tds = tables[index].find_all('td') 257 | for i in range(0, int(len(tds)/2)): 258 | if "NATIONWIDE" in tds[2*i].text or "NATIONWISE" in tds[2*i].text: 259 | position = tds[2*i+1].text.strip().split('/') 260 | summary["national_position"] = position[0] 261 | summary["total_national_schools"] = position[1] 262 | elif "REGIONWIDE" in tds[2*i].text or "REGIONWISE" in tds[2*i].text: 263 | position = tds[2*i+1].text.strip().split('/') 264 | summary["regional_position"] = position[0] 265 | summary["total_regional_schools"] = position[1] 266 | elif "GPA" in tds[2*i].text: 267 | summary["gpa"] = tds[2*i+1].text.strip() 268 | elif "CATEGORY" in tds[2*i].text: 269 | summary["school_category"] = tds[2*i+1].text.strip() 270 | elif "TOTAL" in tds[2*i].text: 271 | summary["number_of_students"] = tds[2*i+1].text.strip() 272 | elif "REGION" in tds[2*i].text: 273 | summary["school_region"] = tds[2*i+1].text.strip() 274 | 275 | return summary -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | VERSION = '2.0.6' 4 | DESCRIPTION = 'Fetch results of various national examinations done in Tanzania' 5 | LONG_DESCRIPTION = "" 6 | with open('README.md') as rm: 7 | LONG_DESCRIPTION = rm.read() 8 | 9 | # Setting up 10 | setup( 11 | name="nectaapi", 12 | version=VERSION, 13 | author="Tanzania Programmers (Vincent Laizer)", 14 | author_email="", 15 | url="https://github.com/vincent-laizer/NECTA-API", 16 | description=DESCRIPTION, 17 | long_description_content_type="text/markdown", 18 | long_description=LONG_DESCRIPTION, 19 | packages=find_packages(), 20 | install_requires=[ 21 | 'requests', 22 | 'beautifulsoup4' 23 | ], 24 | keywords=['python', 'necta', 'api', 'necta api', 'necta tanzania', 'tanzania programmers'], 25 | classifiers=[ 26 | "Development Status :: 4 - Beta", 27 | "Intended Audience :: Developers", 28 | "Programming Language :: Python :: 3", 29 | "Operating System :: Unix", 30 | "Operating System :: MacOS :: MacOS X", 31 | "Operating System :: Microsoft :: Windows", 32 | ] 33 | ) --------------------------------------------------------------------------------