├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── MANIFEST.in ├── README.md ├── examples ├── demo.gif ├── main.py └── stalker.py ├── requirements.txt ├── setup.py ├── sigpy ├── __init__.py ├── cache.py ├── classes │ ├── __init__.py │ ├── model.py │ ├── picture.py │ └── timetable.py ├── faculties │ ├── __init__.py │ ├── feup │ │ ├── __init__.py │ │ ├── classes.json │ │ ├── course.json │ │ ├── room.json │ │ ├── student.json │ │ ├── study_plan.json │ │ ├── subject.json │ │ └── teacher.json │ └── fmup │ │ ├── __init__.py │ │ ├── classes.json │ │ ├── course.json │ │ ├── room.json │ │ ├── student.json │ │ ├── study_plan.json │ │ ├── subject.json │ │ └── teacher.json ├── interface.py ├── parser.py └── utils.py └── test ├── __init__.py ├── classes ├── __init__.py └── test_model.py └── faculties ├── __init__.py └── feup ├── __init__.py └── test_feup.py /.gitignore: -------------------------------------------------------------------------------- 1 | */__pycache__/* 2 | */*/__pycache__/* 3 | */*/*/__pycache__/* 4 | __pycache__/utils.cpython-36.pyc 5 | images/* 6 | */images/* 7 | ignore/* 8 | \.coverage 9 | *_cache.json 10 | main.py 11 | htmlcov 12 | env/ -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.6" 4 | cache: pip 5 | install: 6 | - pip install -r requirements.txt 7 | - pip install python-coveralls 8 | script: 9 | - coverage run --include="sigpy/*" -m unittest 10 | - coverage report 11 | notifications: 12 | email: false 13 | after_success: 14 | - coveralls 15 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | How you can help: 3 | 1. Write examples using this tool for others to have a better work basis 4 | 1. Improve the completeness of the tool (write or increment JSON classes, see how [below]()) 5 | 1. (Extend the tool to other faculties, mostly copy-paste and testing) 6 | 1. Write tests for the existing code and also to your JSON contributions 7 | 8 | 9 | 10 | ## Project Organization 11 | Before contributing, let's see how sigpy is organized. 12 | 13 | Inside the sigpy folder you will find: 14 | * 📁 classes 15 | * [model.py](sigpy/classes/model.py) (wrapper class for parsed objects) 16 | * [picture.py](sigpy/classes/picture.py) (class for picture handling) 17 | * 📁 faculties (contains one folder per faculty) 18 | * 📁 feup (contains `.json` files specifying parsing rules) 19 | * [__init__.py](sigpy/faculties/feup/__init__.py) (simple subclass creation script, can be copy-pasted to other faculties) 20 | * course.json 21 | * room.json 22 | * student.json 23 | * ... 24 | * 📁 fcup ... 25 | * __init__.py 26 | * ... 27 | * ... 28 | 29 | ## How the Parsing Magic Works 30 | Each faculty has its `.json` files that specify the "parseable" classes of that faculty. To add more fields or create new classes you just need to edit or create new `.json` files. The interface script takes care of finding them and creating the magical `get_CLASSNAME` from the `CLASSNAME.json` files! 31 | 32 | Each of the `CLASSNAME.json` files has the following format: 33 | ```json 34 | { 35 | "url": "the url for the page to parse, maybe with?get_parameters=1&...", 36 | "help": "This URL requires: (tuple of GET parameters needed)", 37 | "picture": "picture (this is optional and usually just for people)", 38 | "attributes": { 39 | "attr1": {"css": "some css selector"}, 40 | "attr2": {"regex": "some regex expression"}, 41 | "attr3": {"xpath": "some xpath selector"}, 42 | "attr4": {"derivate": "string for format, eg: up%s@fe.up.pt for email", "from": ["attr1"]}, 43 | "attr5": {"css|regex|xpath": "if we only care about if it was empty or not, 44 | regex must include a catch group", "boolean": "True"}, 45 | "attr6": { 46 | "list": "True", 47 | "model": "a list of what?", 48 | "css": "to find the first element, ", 49 | "attributes: { 50 | ... 51 | } 52 | } 53 | ... 54 | } 55 | } 56 | ``` 57 | Some notes: 58 | * The attributes can be extracted through css selectors, regex expressions and xpath selectors, which is enough for most tasks (so far nothing has been impossible to parse with them). 59 | * The attributes can be lists, for instance a student can have a list of courses it has enrolled in, etc... 60 | * notice that it is recursive and one attribute can be a model and have some other attribute that is also a model, _ad infinitum_. 61 | 62 | 63 | Here is a complete example of a `faculties/feup/student.json` file with useful comments: 64 | ```python 65 | { 66 | "url": "https://sigarra.up.pt/feup/pt/fest_geral.cursos_list?pv_num_unico=%s", 67 | "help": "This URL requires: (student_id)", 68 | # url for getting pictures 69 | "picture": "https://sigarra.up.pt/feup/pt/fotografias_service.foto?pct_cod=%s", 70 | "attributes": { # attributes a student can have 71 | # the name is filtered through a css selector 72 | "name": {"css": "div.estudante-info-nome"}, 73 | 74 | # the same goes for id 75 | "id": {"css": "div.estudante-info-numero a"}, 76 | 77 | # derivate means it will be formatted using another attribute after loading 78 | # using python formatting features like: student.email = "up%s@fe.up.pt" % student.id 79 | # this is a rare attribute type 80 | "email": {"derivate": "up%s@fe.up.pt", "from": ["id"]}, 81 | 82 | "courses": { # if the attribute is a list instead of a value 83 | "model": "course", # model works as class 84 | "list": "True", # omission means single, so this is a list of "course" 85 | 86 | # how to find each element of the list to iterate 87 | "css": "div.estudante-lista-curso-activo", 88 | # this is just as the student model, its just inside another model, recursivity!! 89 | "attributes": { 90 | "name": {"css": "div.estudante-lista-curso-nome"}, 91 | "institution": {"css": "div.estudante-lista-curso-instit"}, 92 | 93 | # in this case REGEX is used to search the HTML for 94 | # the attribute id (must be in a REGEX capture group) 95 | "id": {"regex": ".*pv_curso_id=(\d+).*"}, 96 | 97 | # if CSS and REGEX are not enough, you can get all the power of XPATH 98 | "enrolled": {"xpath": ".//td[text()='Ano da primeira inscrição:']/following::td[1]"}, 99 | "year": {"xpath": ".//td[text()='Ano curricular atual:']/following::td[1]"}, 100 | "state": {"xpath": ".//td[text()='Estado atual:']/following::td[1]"} 101 | } 102 | }, 103 | "inactive_courses": { # Another attribute that is a list 104 | "model": "course", 105 | "list": "True", 106 | "css": "div.tabela-longa", 107 | "attributes": { 108 | "name": {"css": "td.t.k"}, 109 | "id": {"regex": ".*pv_curso_id=(\d+).*"}, 110 | "institution": {"xpath": ".//tr[@class='i']/td[2]/a/@title"}, 111 | "old_id": {"css": "td.l"}, 112 | "type": {"css": "td.t", "index": 2}, 113 | "started": {"css": "td.l", "index": 1} 114 | } 115 | } 116 | } 117 | ``` 118 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include sigpy/README.md 2 | include sigpy/faculties/*/*.json -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sigpy 🔨 2 | 3 | [![Build Status](https://travis-ci.org/msramalho/sigpy.svg)](https://travis-ci.org/msramalho/sigpy) 4 | [![Coverage Status](https://coveralls.io/repos/github/msramalho/sigpy/badge.svg?branch=master)](https://coveralls.io/github/msramalho/sigpy?branch=master) 5 | 6 | This is a Sigarra Python API based on Recursive Web Scraping Parser (wtf). Essentially, it performs requests as needed (cached by default) and parses the html information into objects you can use. 7 | 8 | The parser and interpreters are already there. All scrapping rules are specified in `.json` files that are automatically found, processed and magically made code-accessible, so if you extend it you barely have to do anything other than editing `.json` files!! 9 | 10 | The concept behind this tool can be extended to other websites and one could ponder upon the interest of building something more general, like... "scraping for APIs"... _\*cough\*LDSO\*cough\*_ 11 | 12 | > In the end, this is yet another Sigarra-based project that I wished existed before I needed something like it. 13 | 14 | # Demo 15 | 16 |

17 | 18 | # Installation 19 | ```bash 20 | pip install git+https://github.com/msramalho/sigpy 21 | ``` 22 | 23 | # Examples 24 | (Each example will hide all the code of the previous examples. The complete code can be found in [examples/main.py](examples/main.py)) 25 | 26 | For all the examples below, you need to start by importing sigpy: 27 | 28 | ```python 29 | from sigpy import get_faculty, get_school_year 30 | ``` 31 | 32 | ### Login to your account 33 | Give your id (with or without `up`) and either hardcode your password or wait for prompt: 34 | 35 | ```python 36 | # get faculty object and login 37 | fac = get_faculty("feup") 38 | 39 | # login is optional but gives access to more information 40 | fac.login("201403027", "youWish") 41 | 42 | # if no password is given, secret prompt will appear 43 | fac.login("up201403027") 44 | ``` 45 | 46 | ### Extract Student Information 47 | 48 | ```python 49 | # access student data from their id 50 | # "up" is case insensitive and also optional 51 | msramalho = fac.get_student("UP201403027") 52 | 53 | # print a complete JSON view of student information 54 | print(msramalho) 55 | 56 | # or simply use the attribute you need 57 | # a complete list is available in the JSON view 58 | message = "Nice to meet you, %s" % msramalho.name 59 | ``` 60 | 61 | ### Access Course Data 62 | 63 | ```python 64 | # load mieic from the student (we know it is at index 1) 65 | # the course url receives the id and the current school year, 66 | # get_school_year returns the current school year, but could be hardcoded 67 | mieic = fac.get_course((msramalho.courses[1].id, get_school_year())) 68 | 69 | # print a complete JSON view of course information 70 | print(mieic) 71 | ``` 72 | 73 | ### Extract Teacher Information 74 | 75 | ```python 76 | # use the id of the course director to access teacher data! 77 | # (the name of the variable may lose meaning in the future) 78 | pascoal = fac.get_teacher(mieic.director.id) 79 | 80 | # print a complete JSON view of teacher information 81 | print(pascoal) 82 | ``` 83 | 84 | ### Get Student, Teacher and Room Pictures 85 | ```python 86 | # use the get_picture method with the object 87 | # (temporarily download picture into a variable) 88 | photo1 = fac.get_picture(msramalho) 89 | 90 | # save the image locally on "./images/ID.jpg" 91 | photo1.save() 92 | # save the image locally on custom dir, absolute or relative 93 | photo1.save("another/dir") 94 | 95 | # you can do the same for teachers (and use chaining) 96 | vidal = fac.get_teacher("206415") 97 | fac.get_picture(vidal).show() 98 | ``` 99 |

100 | 101 | ```python 102 | # and even for room layout pictures 103 | room = fac.get_room(vidal.rooms[0].id) 104 | fac.get_picture(room).show() 105 | ``` 106 |

107 | 108 | ### Get All the Subjects of a Course 109 | ```python 110 | # the study plan is identified by course id and occurrence year 111 | # this will extract the information from the course study plan page 112 | study_plan = fac.get_study_plan((mieic.study_plan.id, mieic.study_plan.year)) 113 | 114 | # to get ALL the information for all the mandatory subjects 115 | # they are grouped by year->semester->subjects 116 | # this will perform one request per subject 117 | mandatory = [fac.get_subject(s.id) for y in study_plan.years 118 | for sm in y.semesters for s in sm.subjects if s.code != ""] 119 | 120 | # to get ALL the information for all the optional subjects, for instance 121 | # this will perform one request per subject 122 | optionals = [fac.get_subject(s.id) for s in study_plan.optionals if s.code != ""] 123 | ``` 124 | 125 | ### Get Subject Data and its Classes (all students for each class of that subject) 126 | ```python 127 | # assuming we have a subject id (could be extracted from study_plan above) 128 | plog = fac.get_subject(420002) 129 | 130 | # get all the classes for this subject (needs course, school year and semester) 131 | # if you miss some parameter you will see a message correcting you, 132 | # with all the values you need to give 133 | # notice that it is ONLY ONE parameter, which is a (tuple) 134 | subject_classes = fac.get_classes((mieic.id, plog.id, get_school_year(), plog.semester)) 135 | 136 | # now you can read all the students grouped by class 137 | for c in subject_classes.classes: 138 | for student in c.students: 139 | print("Hello %s, your email is %s)" % (student.name, student.email)) 140 | # or simply list all the students in a given class (1st in this case (0 indexed)) 141 | print([s.name for s in classes.classes[0].students]) 142 | ``` 143 | 144 | ### Get a Subject's Timetable 145 | ```python 146 | # this is actually an instance of the classes/timetable class 147 | # this class can receive any html page with a timetable from sigarra and parses it 148 | # this uses a python version of the SigTools parsing algorithm 149 | # events are dicts which have a lot of attributes (from, to, name, room, ...) 150 | plog_tt = fac.get_timetable(plog) 151 | 152 | # to get a json view of the events 153 | print(plog_tt) 154 | 155 | # to get an array of the events for further use 156 | plog_events = plog_tt.events 157 | ``` 158 | 159 | ### Get Student Timetable (aka Stalker Mode) 160 | The isolated code for this can be found in [examples/stalker.py](examples/stalker.py), but essentialy: 161 | * load the target student 162 | * get the courses of this student 163 | * for each course 164 | * get the study plan (ids of the subjects) 165 | * load every subject 166 | * get the classes (students in each class) for each subject 167 | * check if the target is in any of those classes 168 | * save the classes the target is in 169 | * produce a url to [TTS](https://ni.fe.up.pt/TTS) with the target's timetable 170 | 171 | Alternatively, one could use the `fac.get_timetable(...)` to retrieve the custom times of the target's timetable, but since TTS made this script a step quicker, I just went for it. 172 | 173 | This script will take some time the first time you run it for each course, after that cache makes it quite fast. 174 | 175 | ### Sky is the limit 176 | This tool was built so there was a simple way to automate my endeavours into Sigarra, you can PR your own examples of tools into this section and help me and others get more out of sigpy. 177 | 178 | ## Cache 179 | Since all of this is based on requests to Sigarra, and many requests are usually duplicates (and url's content rarely change), I have implemented a cache system that makes up for the time most requests take as, in time, most will be duplicates this can be very helpful, as it greatly diminishes request times for subsequent calls (remember that SIGARRA can be really slow). Cache is also convenient if one of your scripts fails mid-execution, because the work done until failure is preserved. 180 | 181 | Anyway, the cache is on by default. To turn it off for the current session: 182 | ```python 183 | # this makes all the operations on fac produce requests 184 | fac = get_faculty("feup", save_cache=False) 185 | 186 | # if you just want to redo some requests (typically for dynamic pages), do 187 | msr = fac.get_student("201403027", use_cache=False) 188 | # this will not READ from cache, but it will UPDATE it 189 | # (unless save_cache is False for the fac variable) 190 | ``` 191 | There is one cache file per faculty, inside the folder `%APPDATA%/sigpy/cache/"FACULTY"/_cache.json`. You can open and edit it maually as it is a JSON mapping of a python dict (url->html), you can also delete it manually and programatically, as follows: 192 | ```python 193 | # this will remove the file on disk for the current faculty only 194 | fac.cache.delete() 195 | ``` 196 | 197 | Note: Pictures are not cached, only html content. This html is minified but further work can be made into cleaning it further (for instance removing inline scripts, ...) 198 | 199 | ## Verbosity 200 | By default, no print is done, to enable warnings about atributes that were not found in the parsed pages, do: 201 | ```python 202 | # obviously, this can be toggled with either True or False 203 | fac.set_verbose(True) # default False 204 | ``` 205 | 206 | # Testing 207 | Tests require a valid user account, to run them do: 208 | ```python 209 | python -m unittest 210 | ``` 211 | And to get the coverage: 212 | ```python 213 | coverage run --include="sigpy/*" -m unittest 214 | coverage report 215 | ``` 216 | And to get the html report: 217 | ```python 218 | coverage run -m unittest 219 | coverage html 220 | ``` 221 | 222 | # Contributing 223 | If you happen to use this tool you may need to extend the parsed parameters or add JSON classes or even extend it to new faculties, if you make them into pull-requests that would be awesome. 224 | 225 | Also, if you just want to keep on building this tool, check the [contributing page](CONTRIBUTING.md)! 226 | -------------------------------------------------------------------------------- /examples/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msramalho/sigpy/0c8470a5d350715d837851f9173a07f3ca5f33b4/examples/demo.gif -------------------------------------------------------------------------------- /examples/main.py: -------------------------------------------------------------------------------- 1 | 2 | from sigpy import get_faculty, get_school_year 3 | 4 | ### Login to your account 5 | 6 | # get faculty object and login 7 | fac = get_faculty("feup") 8 | 9 | # login is optional but gives access to more information 10 | fac.login("201403027", "YOUR_PASSWORD_HERE_OR_JUST_USE_PROMPT_VERSION_BY_REMOVING_PARAMETER") 11 | 12 | ### Extract Student Information 13 | 14 | # access student data from their id 15 | # "up" is case insensitive and also optional 16 | msramalho = fac.get_student("UP201403027") 17 | 18 | # print a complete JSON view of student information 19 | print(msramalho) 20 | 21 | # or simply use the attribute you need 22 | # a complete list is available in the JSON view 23 | message = "Nice to meet you, %s" % msramalho.name 24 | 25 | ### Access Course Data 26 | 27 | # load mieic from the student (we know it is at index 1) 28 | # the course url receives the id and the current school year, 29 | # get_school_year returns the current school year, but could be hardcoded 30 | mieic = fac.get_course((msramalho.courses[1].id, get_school_year())) 31 | 32 | # print a complete JSON view of course information 33 | print(mieic) 34 | 35 | ### Extract Teacher Information 36 | 37 | # use the id of the course director to access teacher data! 38 | # (the name of the variable may lose meaning in the future) 39 | pascoal = fac.get_teacher(mieic.director.id) 40 | 41 | # print a complete JSON view of teacher information 42 | print(pascoal) 43 | 44 | ### Get Student, Teacher and Room Pictures 45 | # use the get_picture method with the object 46 | # (temporarily download picture into a variable) 47 | photo1 = fac.get_picture(msramalho) 48 | 49 | # save the image locally on "./images/ID.jpg" 50 | photo1.save() 51 | # save the image locally on custom dir, absolute or relative 52 | photo1.save("another/dir") 53 | 54 | # you can do the same for teachers (and use chaining) 55 | vidal = fac.get_teacher("206415") 56 | fac.get_picture(vidal).show() 57 | 58 | # and even for room layout pictures 59 | room = fac.get_room(vidal.rooms[0].id) 60 | fac.get_picture(room).show() 61 | 62 | ### Get All the Subjects of a Course 63 | # the study plan is identified by course id and occurrence year 64 | # this will extract the information from the course study plan page 65 | study_plan = fac.get_study_plan((mieic.study_plan.id, mieic.study_plan.year)) 66 | 67 | # to get ALL the information for all the mandatory subjects 68 | # they are grouped by year->semester->subjects 69 | # this will perform one request per subject 70 | mandatory = [fac.get_subject(s.id) for y in study_plan.years 71 | for sm in y.semesters for s in sm.subjects if s.code != ""] 72 | 73 | # to get ALL the information for all the optional subjects, for instance 74 | # this will perform one request per subject 75 | optionals = [fac.get_subject(s.id) for s in study_plan.optionals if s.code != ""] 76 | 77 | ### Get Subject Data and its Classes (all students for each class of that subject) 78 | # assuming we have a subject id (could be extracted from study_plan above) 79 | plog = fac.get_subject(420002) 80 | 81 | # get all the classes for this subject (needs course, school year and semester) 82 | # if you miss some parameter you will see a message correcting you, 83 | # with all the values you need to give 84 | # notice that it is ONLY ONE parameter, which is a (tuple) 85 | subject_classes = fac.get_classes((mieic.id, plog.id, get_school_year(), plog.semester)) 86 | 87 | # now you can read all the students grouped by class 88 | for c in subject_classes.classes: 89 | for student in c.students: 90 | print("Hello %s, your email is %s)" % (student.name, student.email)) 91 | # or simply list all the students in a given class (1st in this case (0 indexed)) 92 | print([s.name for s in classes.classes[0].students]) 93 | 94 | ### Get a Subject's Timetable 95 | # this is actually an instance of the classes/timetable class 96 | # this class can receive any html page with a timetable from sigarra and parses it 97 | # this uses a python version of the SigTools parsing algorithm 98 | # events are dicts which have a lot of attributes (from, to, name, room, ...) 99 | plog_tt = fac.get_timetable(plog) 100 | 101 | # to get a json view of the events 102 | print(plog_tt) 103 | 104 | # to get an array of the events for further use 105 | plog_events = plog_tt.events 106 | -------------------------------------------------------------------------------- /examples/stalker.py: -------------------------------------------------------------------------------- 1 | from sigpy import get_faculty, get_school_year 2 | 3 | target = "201403027" 4 | stalker = "201403027" 5 | p = "" 6 | 7 | fac = get_faculty("feup") 8 | fac.login(stalker, p) 9 | target = fac.get_student(target) 10 | 11 | for c in target.courses: # iterate the target courses 12 | if c.id: # if it has a valid id 13 | target_classes = [] 14 | 15 | # load the course 16 | course = fac.get_course((c.id, get_school_year())) 17 | print(course.name) 18 | 19 | # load the study plan, so we can get the subjects taught in course 20 | study_plan = fac.get_study_plan((course.study_plan.id, course.study_plan.year)) 21 | # merge valid mandatory and optional courses and get their ids 22 | subjects = [s.id for y in study_plan.years for sm in y.semesters for s in sm.subjects if s.code != ""] + [s.id for s in study_plan.optionals if s.code != ""] 23 | 24 | # remove None values and duplicates 25 | subjects = list(set(filter(lambda x: x, subjects))) 26 | 27 | # load all the subjects (if this is not in cache will take some time) 28 | # use the following to provide feedback of the progress instead of the silent one 29 | # subjects = [[fac.get_subject(s), print(s.name)][0] for s in subjects] 30 | subjects = [fac.get_subject(s) for s in subjects] 31 | 32 | # now that we have every subject for this course, we will iterate them 33 | for s in subjects: 34 | print(s.name) 35 | # load the classes (one subject has many classes, like 1MIEIC01, ...) 36 | classes = fac.get_classes((course.id, s.id, get_school_year(), s.semester)) 37 | for i, cl in enumerate(classes.classes): # iterate these classes to look for our target 38 | for st in cl.students: # iterate each student of each class 39 | if st.name == target.name: # if it is our target, we hve another piece of the puzzle!! 40 | target_classes.append("%s.%s" % (s.initials, classes.class_names[i].name)) # save it 41 | 42 | print(target_classes) 43 | 44 | # semester = subjects[0].semester # assume it is the same semester for all 45 | semester = 1 # or hardcode because it sometimes fails 46 | 47 | # get the TTS link, if there is a class with a wrong name 48 | # that is a problem on TTS' side! 49 | print("https://ni.fe.up.pt/TTS/#%s!%s!%s-%s~%s" % (get_school_year(), semester, fac.name.upper(), course.initials, "~".join(target_classes))) 50 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | appdirs==1.4.3 2 | certifi==2019.3.9 3 | chardet==3.0.4 4 | coverage==4.5.3 5 | cssselect==1.0.3 6 | htmlmin==0.1.12 7 | idna==2.7 8 | lxml==4.6.3 9 | Pillow>=7.1.0 10 | requests==2.20.1 11 | urllib3==1.24.2 12 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import os 3 | 4 | with open('requirements.txt') as f: 5 | required = f.read().splitlines() 6 | 7 | setup(name='sigpy', 8 | version='1.0', 9 | description='Sigarra Python API based on Recursive Web Scraping Parser (wtf)', 10 | url='https://github.com/msramalho/sigpy', 11 | author='msramalho', 12 | license='MIT', 13 | packages=['sigpy', 'sigpy.classes', 'sigpy.faculties', 'sigpy.faculties.feup', 'sigpy.faculties.fmup'], 14 | install_requires=required, 15 | include_package_data=True, # Include the files in MANIFEST.in 16 | zip_safe=False) 17 | -------------------------------------------------------------------------------- /sigpy/__init__.py: -------------------------------------------------------------------------------- 1 | from . interface import get_faculty 2 | from . utils import get_school_year -------------------------------------------------------------------------------- /sigpy/cache.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import htmlmin 4 | from appdirs import * 5 | from sigpy.utils import vprint 6 | 7 | print() 8 | 9 | SAVE_TO = user_data_dir('sigpy', False) + "/cache/%s/" # cache folder in each faculty 10 | 11 | 12 | # this class defines all the variables and methods that the faculty class should implement 13 | class cache: 14 | 15 | def __init__(self, faculty, save_cache): 16 | self.path = SAVE_TO % faculty 17 | self.filepath = self.path + "_cache.json" 18 | self.save_cache = save_cache 19 | self._cache = self._load_requests() 20 | 21 | # reads the previously saved cache from disk 22 | def _load_requests(self): 23 | if os.path.exists(self.path) and self.save_cache: 24 | with open(self.filepath) as infile: 25 | return json.load(infile) 26 | return {} 27 | 28 | # performs a GET request, if necessary, and returns the HMTL response 29 | def get(self, session, url, use_cache=True): 30 | if self.save_cache and use_cache and url in self._cache: # value is stored in cache 31 | return self._cache[url] 32 | else: # a new request is needed 33 | req = session.get(url) # perform the request 34 | if req.status_code != 200: # if request fails, display the error code (404, ...) 35 | vprint("[-] [%s] status code on:\n %s" % (req.status_code, url)) 36 | return "" 37 | self.save(url, req.text) 38 | return req.text 39 | 40 | # adds to the inner representation of the memory and also saves to disk 41 | def save(self, url, html): 42 | if not self.save_cache: 43 | return 44 | # create the cache folder if it does not exist 45 | if not os.path.exists(self.path): 46 | os.makedirs(self.path) 47 | self._cache[url] = minified = htmlmin.minify(html, remove_empty_space=True) 48 | with open(self.filepath, 'w') as outfile: 49 | json.dump(self._cache, outfile) 50 | 51 | # completely deletes the cache 52 | def delete(self): 53 | os.unlink(self.path) 54 | -------------------------------------------------------------------------------- /sigpy/classes/__init__.py: -------------------------------------------------------------------------------- 1 | from . model import model 2 | from . picture import picture 3 | from . timetable import timetable -------------------------------------------------------------------------------- /sigpy/classes/model.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | class JsonClassEncoder(json.JSONEncoder): 5 | # needed to print classes inside other classes (that inherit from model) using json.dumps 6 | def default(self, obj): 7 | if hasattr(obj, 'json'): 8 | return obj.json() 9 | else: 10 | return json.JSONEncoder.default(self, obj) 11 | 12 | 13 | class model: 14 | def __init__(self, name, dictionary): 15 | self.class_name = name 16 | for k, v in dictionary.items(): 17 | setattr(self, k, v) 18 | 19 | def json(self): 20 | return self.__dict__ 21 | 22 | def __str__(self): 23 | return json.dumps(self.__dict__, ensure_ascii=False, cls=JsonClassEncoder, indent=2) 24 | 25 | def __repr__(self): 26 | return self.__str__() 27 | 28 | def __getitem__(self, name): 29 | return self.__dict__[name] 30 | 31 | def __iter__(self): 32 | for k, v in self.__dict__.items(): 33 | yield k, v 34 | -------------------------------------------------------------------------------- /sigpy/classes/picture.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from PIL import Image 4 | 5 | SAVE_TO = "images/" 6 | 7 | 8 | class picture: 9 | 10 | def __init__(self, path, raw): 11 | self.img = Image.open(raw) 12 | self.path = path 13 | 14 | def show(self): 15 | self.img.show() 16 | return self 17 | 18 | def save(self, save_to=SAVE_TO): 19 | if not os.path.exists(save_to): 20 | os.makedirs(save_to) 21 | self.img.save(os.path.abspath(save_to + "/" + self.path)) 22 | return self 23 | 24 | def delete(self): 25 | os.unlink(self.path) 26 | return self 27 | -------------------------------------------------------------------------------- /sigpy/classes/timetable.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | from lxml.html import fromstring # , HtmlElement 4 | from lxml.etree import tostring 5 | from collections import defaultdict 6 | 7 | 8 | class timetable: 9 | 10 | def __init__(self, html): 11 | self.tree = fromstring(html) 12 | self.events = self.get_events() 13 | 14 | # return the events in this timetable page 15 | def get_events(self): 16 | events = [] 17 | table = self.tree.cssselect("table.horario") 18 | if len(table): 19 | self.table = table[0] 20 | matrix = self._parse_to_matrix(False, True) 21 | events = self._parse_events(matrix) 22 | events += self._parse_overlapping_events() 23 | return events 24 | 25 | # This converts the timetable into a matrix where ther is no rowspan and colspan 26 | # Events that stretch through more than one block are duplicated 27 | def _parse_to_matrix(self, dupCols=False, dupRows=False, textMode=False): 28 | columns = [None] * len(self.table.xpath("./tr/th")) # every column is None 29 | curr_x = 0 30 | 31 | for row in self.table.xpath("./tr"): 32 | curr_y = 0 33 | for col in row.xpath("./td|./th"): 34 | rowspan = int(col.get("rowspan", 1)) 35 | colspan = int(col.get("colspan", 1)) 36 | content = ''.join([str(tostring(c, encoding="unicode")) for c in col.iterchildren()]) 37 | content = col.text if not len(content) else content 38 | content = content.replace(u'\xa0', u' ').strip() 39 | 40 | for x in range(rowspan): 41 | for y in range(colspan): 42 | 43 | if not columns[curr_y + y]: 44 | columns[curr_y + y] = [None] * 50 45 | 46 | while columns[curr_y + y][curr_x + x]: 47 | curr_y += 1 48 | if not columns[curr_y + y]: 49 | columns[curr_y + y] = [None] * 50 50 | 51 | if (x == 0 or dupRows) and (y == 0 or dupCols): 52 | columns[curr_y + y][curr_x + x] = content 53 | else: 54 | columns[curr_y + y][curr_x + x] = "" 55 | curr_y += 1 56 | curr_x += 1 57 | return columns 58 | 59 | def _parse_events(self, matrix): 60 | events = [] 61 | for i in range(1, len(matrix)): # iterate over the table columns 62 | day = matrix[i] 63 | counter = 1 # count the number of blocks this class takes 64 | for j in range(2, len(day)): # ignore the first row with day names, start at two to see previous 65 | # if this event stops toDay and is not empty or 66 | # if its the last event of the day and is not empty 67 | if (day[j] != day[j - 1] or j == len(day) - 1) and (day[j - 1] is not None and len(day[j - 1]) > 0): 68 | events.append(timetable._get_event(day[j - 1], i, matrix[0][j - counter], matrix[0][j - 1])) 69 | counter = 1 70 | elif day[j] == day[j - 1]: 71 | counter += 1 72 | elif day[j] != day[j - 1]: 73 | counter = 1 74 | return events 75 | 76 | # receives a element and extracts all the info in a dict 77 | def _get_event(html, day, start, end): 78 | tree = fromstring("
%s
" % html) 79 | class_a = tree.cssselect("span.textopequenoc a")[0] 80 | room_td = tree.cssselect("table.formatar td")[0] 81 | teacher_a = tree.cssselect("table.formatar td.textod")[0] 82 | 83 | # because sometimes teachers do not have url 84 | if teacher_a.cssselect("a"): 85 | teacher_a = teacher_a.cssselect("a")[0] 86 | teacher = { 87 | "name": teacher_a.get("title", ""), 88 | "acronym": teacher_a.text, 89 | "url": teacher_a.get("href", ""), 90 | } 91 | else: 92 | teacher = {"name": teacher_a.text} 93 | 94 | return { 95 | "from": start, 96 | "to": end, 97 | "day": day, 98 | "name": tree.cssselect("b acronym")[0].get("title", ""), 99 | "acronym": tree.cssselect("b a")[0].text, 100 | "type": re.findall('\((.+)\)', tree.xpath(".//b/text()")[0])[0], 101 | "class": { 102 | "name": class_a.text, 103 | "url": class_a.cssselect("a")[0].get("href", "") 104 | }, 105 | "room": { 106 | "name": room_td.text, 107 | "url": room_td.cssselect("a")[0].get("href", ""), 108 | }, 109 | "teacher": teacher 110 | } 111 | 112 | # extracts overlapping events from a timetable page 113 | def _parse_overlapping_events(self): 114 | events = [] 115 | for o in self.tree.cssselect("table.dados tr.d"): 116 | events.append(self._get_overlapping_event(o)) 117 | return events 118 | 119 | # parse and get a dict with all the info for an overlapping class 120 | def _get_overlapping_event(self, tree): 121 | class_a = tree.cssselect('[headers="t6"] a')[0] 122 | room_a = tree.cssselect('[headers="t4"] a')[0] 123 | teacher_a = tree.cssselect('[headers="t5"] a')[0] 124 | 125 | return { 126 | "from": tree.cssselect('[headers="t3"]')[0].text, 127 | "to": None, # unable to retrive from this table 128 | "day": tree.cssselect('[headers="t2"]')[0].text, 129 | "name": tree.cssselect('[headers="t1"] acronym')[0].get("title", ""), 130 | "acronym": tree.cssselect('[headers="t1"] a')[0].text, 131 | "type": re.findall('\((.+)\)', str(tostring(tree.cssselect('[headers="t1"]')[0])))[0], 132 | "class": { 133 | "name": class_a.text, 134 | "url": class_a.cssselect("a")[0].get("href", "") 135 | }, 136 | "room": { 137 | "name": room_a.text, 138 | "url": room_a.cssselect("a")[0].get("href", ""), 139 | }, 140 | "teacher": { 141 | # "name": teacher_a.get("title", ""), 142 | "acronym": teacher_a.text, 143 | "url": teacher_a.get("href", ""), 144 | } 145 | } 146 | 147 | def __str__(self): 148 | return json.dumps(self.events, ensure_ascii=False, indent=2) 149 | 150 | def __repr__(self): 151 | return self.__str__() 152 | -------------------------------------------------------------------------------- /sigpy/faculties/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msramalho/sigpy/0c8470a5d350715d837851f9173a07f3ca5f33b4/sigpy/faculties/__init__.py -------------------------------------------------------------------------------- /sigpy/faculties/feup/__init__.py: -------------------------------------------------------------------------------- 1 | from ... interface import interface 2 | from ... utils import get_school_year 3 | 4 | 5 | class faculty(interface): 6 | def __init__(self, name, save_cache): 7 | super(faculty, self).__init__(name, save_cache) 8 | -------------------------------------------------------------------------------- /sigpy/faculties/feup/classes.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://sigarra.up.pt/feup/pt/it_listagem.lista_turma_disciplina?pv_curso_id=%s&pv_ocorrencia_id=%s&pv_ano_lectivo=%s&pv_periodo_id=%s", 3 | "help": "This URL requires: (course_id, subject_id aka occurrence_id, school_year, semester)", 4 | "attributes": { 5 | "class_names": { 6 | "model": "class_name", 7 | "list": "True", 8 | "xpath": ".//h3[1]/following-sibling::table[@class='tabela'][1]/tr[1]/th", 9 | "attributes": { 10 | "name":{ 11 | "xpath": "text()" 12 | } 13 | } 14 | }, 15 | "classes": { 16 | "model": "class", 17 | "list": "True", 18 | "xpath": ".//h3[contains(text(),'Turma: ')]//following-sibling::table[@class='tabela']", 19 | "attributes": { 20 | "name": { 21 | "regex": "Turma: (\\S+)" 22 | }, 23 | "students": { 24 | "model": "student", 25 | "list": "True", 26 | "xpath": "./tr[@class='i']|./tr[@class='p']", 27 | "attributes": { 28 | "name": { 29 | "xpath": ".//td[1]" 30 | }, 31 | "id": { 32 | "xpath": ".//td[2]" 33 | }, 34 | "email": { 35 | "xpath": ".//td[3]" 36 | }, 37 | "allocation_date": { 38 | "xpath": ".//td[4]" 39 | }, 40 | "enrolled": { 41 | "regex": "/feup/pt/imagens/Visto()", 42 | "boolean": "True" 43 | } 44 | } 45 | } 46 | } 47 | } 48 | } 49 | } -------------------------------------------------------------------------------- /sigpy/faculties/feup/course.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://sigarra.up.pt/feup/pt/cur_geral.cur_view?pv_curso_id=%s&pv_ano_lectivo=%d", 3 | "help": "This URL requires: (course_id, school_year)", 4 | "attributes": { 5 | "name": { 6 | "regex": "FEUP - (.+)<\/title>" 7 | }, 8 | "cod": { 9 | "xpath": ".//table[@class='formulario']/tr/td//text()[contains(., 'Código Oficial')]/following::td[1]" 10 | }, 11 | "initials": { 12 | "xpath": ".//table[@class='formulario']/tr/td//text()[contains(., 'Sigla:')]/following::td[1]" 13 | }, 14 | "study_plan": { 15 | "model": "study_plan", 16 | "xpath": ".", 17 | "attributes": { 18 | "id": { 19 | "regex": "cur_planos_estudos_view\\?pv_plano_id=(\\d+)&" 20 | }, 21 | "year": { 22 | "regex": "cur_planos_estudos_view\\?pv_plano_id=.*pv_ano_lectivo=(\\d+)" 23 | } 24 | } 25 | }, 26 | "director": { 27 | "model": "teacher", 28 | "xpath": ".//table[@class='formulario']/tr[td[text()[contains(., 'Diretor:')]]]|.//table[@class='formulario']/tr[td[text()[contains(., 'Docente Responsável:')]]]", 29 | "attributes": { 30 | "name": { 31 | "xpath": ".//td[2]" 32 | }, 33 | "id": { 34 | "regex": "pct_codigo=(.+?)\"" 35 | } 36 | } 37 | }, 38 | "assistant_director": { 39 | "model": "teacher", 40 | "xpath": ".//table[@class='formulario']/tr[td[text()[contains(., 'Diretor Adjunto:')]]]", 41 | "attributes": { 42 | "name": { 43 | "xpath": ".//td[2]" 44 | }, 45 | "id": { 46 | "regex": "pct_codigo=(.+?)\"" 47 | } 48 | } 49 | } 50 | } 51 | } -------------------------------------------------------------------------------- /sigpy/faculties/feup/room.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://sigarra.up.pt/feup/pt/instal_geral.espaco_view?pv_id=%s", 3 | "help": "This URL requires: (room_id)", 4 | "picture": "https://sigarra.up.pt/feup/pt/instal_geral2.get_mapa?pv_id=%s", 5 | "attributes": { 6 | "id": { 7 | "regex": "href=\".*pv_id=(.*?)\"" 8 | }, 9 | "picture_id": { 10 | "regex": "Mapa.*?src=\".*get_mapa\\?pv_id=(.*?)\"" 11 | }, 12 | "name": { 13 | "regex": "<h1>(.+)</h1>" 14 | }, 15 | "building": { 16 | "xpath": ".//div[text()='Edifício:']/following::div[contains(@class, 'form-campo')]" 17 | }, 18 | "floor": { 19 | "xpath": ".//select[@id='pisos']/option[@selected='selected'][2]" 20 | }, 21 | "purpose": { 22 | "xpath": ".//div[text()='Utilização:']/following::div[contains(@class, 'form-campo')]" 23 | }, 24 | "area": { 25 | "xpath": ".//div[contains(text(), 'Área')]/following::div[contains(@class, 'form-campo')]" 26 | }, 27 | "phone": { 28 | "xpath": ".//div[text()='Telefone:']/following::div[contains(@class, 'form-campo')]" 29 | }, 30 | "managers": { 31 | "model": "teacher", 32 | "list": "True", 33 | "xpath": ".//div[text()='Responsáveis:']/following::div[contains(@class, 'form-campo')]/ul", 34 | "attributes": { 35 | "name": { 36 | "css": "li a" 37 | }, 38 | "id": { 39 | "regex": "href=\".*p_codigo=(.*?)\"" 40 | } 41 | } 42 | }, 43 | "occupants": { 44 | "model": "teacher", 45 | "list": "True", 46 | "xpath": ".//div[text()='Ocupante:']/following::div[contains(@class, 'form-campo')]/ul", 47 | "attributes": { 48 | "name": { 49 | "css": "li a" 50 | }, 51 | "id": { 52 | "regex": "href=\".*p_codigo=(.*?)\"" 53 | } 54 | } 55 | } 56 | } 57 | } -------------------------------------------------------------------------------- /sigpy/faculties/feup/student.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://sigarra.up.pt/feup/pt/fest_geral.cursos_list?pv_num_unico=%s", 3 | "picture": "https://sigarra.up.pt/feup/pt/fotografias_service.foto?pct_cod=%s", 4 | "help": "This URL requires: (student_id)", 5 | "attributes": { 6 | "name": { 7 | "css": "div.estudante-info-nome" 8 | }, 9 | "id": { 10 | "css": "div.estudante-info-numero a" 11 | }, 12 | "email": { 13 | "derivate": "up%s@fe.up.pt", 14 | "from": ["id"] 15 | }, 16 | "orcid": { 17 | "regex": "href=\"http://orcid\\.org/(.*?)\"" 18 | }, 19 | "homepage":{ 20 | "css": "div.pagina-pessoal a" 21 | }, 22 | "courses": { 23 | "model": "course", 24 | "list": "True", 25 | "css": "div.estudante-lista-curso-activo", 26 | "attributes": { 27 | "name": { 28 | "css": "div.estudante-lista-curso-nome" 29 | }, 30 | "institution": { 31 | "css": "div.estudante-lista-curso-instit" 32 | }, 33 | "id": { 34 | "regex": ".*pv_curso_id=(\\d+).*" 35 | }, 36 | "enrolled": { 37 | "xpath": ".//td[text()='Ano da primeira inscrição:']/following::td[1]" 38 | }, 39 | "year": { 40 | "xpath": ".//td[text()='Ano curricular atual:']/following::td[1]" 41 | }, 42 | "state": { 43 | "xpath": ".//td[text()='Estado atual:']/following::td[1]" 44 | } 45 | } 46 | }, 47 | "inactive_courses": { 48 | "model": "course", 49 | "list": "True", 50 | "css": "div.tabela-longa", 51 | "attributes": { 52 | "name": { 53 | "css": "td.t.k" 54 | }, 55 | "id": { 56 | "regex": ".*pv_curso_id=(\\d+).*" 57 | }, 58 | "institution": { 59 | "xpath": ".//tr[@class='i']/td[2]/a/@title" 60 | }, 61 | "old_id": { 62 | "css": "td.l" 63 | }, 64 | "type": { 65 | "css": "td.t", 66 | "index": 2 67 | }, 68 | "started": { 69 | "css": "td.l", 70 | "index": 1 71 | } 72 | } 73 | } 74 | } 75 | } -------------------------------------------------------------------------------- /sigpy/faculties/feup/study_plan.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://sigarra.up.pt/feup/pt/cur_geral.cur_planos_estudos_view?pv_plano_id=%s&pv_ano_lectivo=%s&pv_tipo_cur_sigla=&pv_origem=CUR", 3 | "help": "This URL requires: (study_plan_id, school_year)", 4 | "attributes": { 5 | "years": { 6 | "model": "year", 7 | "list": "True", 8 | "css": "a#bloco_acurr_ShowOrHide + div.caixa", 9 | "attributes": { 10 | "name": { 11 | "regex": "class=\"caixa\" id=\"\\d*ano_(\\d+)\"" 12 | }, 13 | "semesters": { 14 | "model": "semester", 15 | "list": "True", 16 | "xpath": ".", 17 | "attributes": { 18 | "name": { 19 | "regex": "colspan=\"7\">(\\d+).*" 20 | }, 21 | "subjects": { 22 | "model": "subject", 23 | "list": "True", 24 | "css": "tr.i,tr.p", 25 | "attributes": { 26 | "code": { 27 | "xpath": ".//td[1]" 28 | }, 29 | "initials": { 30 | "xpath": ".//td[2]" 31 | }, 32 | "name": { 33 | "xpath": ".//td[3]" 34 | }, 35 | "observations": { 36 | "xpath": ".//td[4]" 37 | }, 38 | "ects": { 39 | "xpath": ".//td[5]" 40 | }, 41 | "id": { 42 | "regex": "ucurr_geral.ficha_uc_view\\?pv_ocorrencia_id=(\\d+)" 43 | } 44 | } 45 | } 46 | } 47 | } 48 | } 49 | }, 50 | "optionals": { 51 | "model": "subject", 52 | "list": "True", 53 | "css": ".caixa[id^=\"div_id_\"] tr.i,.caixa[id^=\"div_id_\"] tr.p", 54 | "attributes": { 55 | "code": { 56 | "xpath": ".//td[1]" 57 | }, 58 | "initials": { 59 | "xpath": ".//td[2]" 60 | }, 61 | "name": { 62 | "xpath": ".//td[3]" 63 | }, 64 | "ects": { 65 | "xpath": ".//td[4]" 66 | }, 67 | "year": { 68 | "xpath": ".//td[5]" 69 | }, 70 | "semester": { 71 | "xpath": ".//td[6]" 72 | }, 73 | "id": { 74 | "regex": "ucurr_geral.ficha_uc_view\\?pv_ocorrencia_id=(\\d+)" 75 | } 76 | } 77 | } 78 | } 79 | } -------------------------------------------------------------------------------- /sigpy/faculties/feup/subject.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://sigarra.up.pt/feup/pt/ucurr_geral.ficha_uc_view?pv_ocorrencia_id=%s", 3 | "timetable": "https://sigarra.up.pt/feup/pt/hor_geral.ucurr_view?pv_ocorrencia_id=%s&pv_ano_lectivo=%s", 4 | "help": "This URL requires: (subject_id)\nThe timetable schedule requires: (subject_id, school_year)", 5 | "attributes": { 6 | "name": { 7 | "regex": "<title>FEUP - (.*?)" 8 | }, 9 | "code": { 10 | "xpath": ".//td[text()='Código:']/following::td[1]" 11 | }, 12 | "initials": { 13 | "xpath": ".//td[text()='Sigla:']/following::td[1]" 14 | }, 15 | "moodle": { 16 | "regex": "moodle_portal()", 17 | "boolean": "True" 18 | }, 19 | "year":{ 20 | "regex": "(\\d+)" 21 | }, 22 | "semester": { 23 | "regex": "Ocorrência: \\d+/\\d+ - (\\d+)S" 24 | }, 25 | "department":{ 26 | "model": "department", 27 | "xpath": ".//h2[contains(text(),'Ocorrência:')]//following-sibling::table[@class='formulario']", 28 | "attributes":{ 29 | "id": { 30 | "regex": "uni_geral\\.unidade_view\\?pv_unidade=(\\d+)" 31 | } 32 | } 33 | } 34 | } 35 | } -------------------------------------------------------------------------------- /sigpy/faculties/feup/teacher.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://sigarra.up.pt/feup/pt/func_geral.formview?p_codigo=%s", 3 | "help": "This URL requires: (teacher_id)", 4 | "picture": "https://sigarra.up.pt/feup/pt/fotografias_service.foto?pct_cod=%s", 5 | "timetable": "https://sigarra.up.pt/feup/pt/hor_geral.docentes_view?pv_doc_codigo=%s&pv_ano_lectivo=%s", 6 | "attributes": { 7 | "name": { 8 | "regex": "FEUP - (.*?)" 9 | }, 10 | "initials": { 11 | "xpath": ".//td[text()='Sigla:']/following::td[1]" 12 | }, 13 | "state": { 14 | "xpath": ".//td[text()='Estado:']/following::td[1]" 15 | }, 16 | "orcid": { 17 | "regex": "href=\"http://orcid.org/(.*?)\"" 18 | }, 19 | "alias": { 20 | "regex": "':(.+)'\\+secure\\+'fe\\.up\\.pt'" 21 | }, 22 | "email": { 23 | "derivate": "%s@fe.up.pt", 24 | "from": ["alias"] 25 | }, 26 | "phone": { 27 | "xpath": ".//td[text()='Telefone:']/following::td[1]" 28 | }, 29 | "alternative_phone": { 30 | "xpath": ".//td[text()='Telf.Alt.:']/following::td[1]" 31 | }, 32 | "voip": { 33 | "xpath": ".//td[text()='Voip:']/following::td[1]" 34 | }, 35 | "category": { 36 | "xpath": ".//div[contains(@class, 'informacao-pessoal-funcoes')]//td[text()='Categoria:']/following::td[1]" 37 | }, 38 | "presentation": { 39 | "css": "div.informacao-pessoal-apresentacao span" 40 | }, 41 | "rooms": { 42 | "model": "room", 43 | "list": "True", 44 | "xpath": ".//td[text()='Salas: ']/following::td[1]", 45 | "attributes": { 46 | "name": { 47 | "css": "a.normal" 48 | }, 49 | "id": { 50 | "regex": "pv_id=(.*?)\"" 51 | } 52 | } 53 | }, 54 | "positions": { 55 | "model": "position", 56 | "list": "True", 57 | "css": "tr.k.d", 58 | "attributes": { 59 | "name": { 60 | "css": "td.k" 61 | }, 62 | "date": { 63 | "css": "td.l" 64 | }, 65 | "id": { 66 | "regex": "href=\".*=(.*?)\"" 67 | } 68 | } 69 | } 70 | } 71 | } -------------------------------------------------------------------------------- /sigpy/faculties/fmup/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from ... interface import interface 3 | 4 | 5 | class faculty(interface): 6 | def __init__(self, name, save_cache): 7 | super(faculty, self).__init__(name, save_cache) 8 | -------------------------------------------------------------------------------- /sigpy/faculties/fmup/classes.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://sigarra.up.pt/fmup/pt/it_listagem.lista_turma_disciplina?pv_curso_id=%s&pv_ocorrencia_id=%s&pv_ano_lectivo=%s&pv_periodo_id=%s", 3 | "help": "This URL requires: (course_id, subject_id aka occurrence_id, school_year, semester)", 4 | "attributes": { 5 | "class_names": { 6 | "model": "class_name", 7 | "list": "True", 8 | "xpath": ".//h3[1]/following-sibling::table[@class='tabela'][1]/tr[1]/th", 9 | "attributes": { 10 | "name":{ 11 | "xpath": "text()" 12 | } 13 | } 14 | }, 15 | "classes": { 16 | "model": "class", 17 | "list": "True", 18 | "xpath": ".//h3[contains(text(),'Turma: ')]//following-sibling::table[@class='tabela']", 19 | "attributes": { 20 | "name": { 21 | "regex": "Turma: (\\S+)" 22 | }, 23 | "students": { 24 | "model": "student", 25 | "list": "True", 26 | "xpath": "./tr[@class='i']|./tr[@class='p']", 27 | "attributes": { 28 | "name": { 29 | "xpath": ".//td[1]" 30 | }, 31 | "id": { 32 | "xpath": ".//td[2]" 33 | }, 34 | "email": { 35 | "xpath": ".//td[3]" 36 | }, 37 | "allocation_date": { 38 | "xpath": ".//td[4]" 39 | }, 40 | "enrolled": { 41 | "regex": "/fmup/pt/imagens/Visto()", 42 | "boolean": "True" 43 | } 44 | } 45 | } 46 | } 47 | } 48 | } 49 | } -------------------------------------------------------------------------------- /sigpy/faculties/fmup/course.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://sigarra.up.pt/fmup/pt/cur_geral.cur_view?pv_curso_id=%s&pv_ano_lectivo=%d", 3 | "help": "This URL requires: (course_id, school_year)", 4 | "attributes": { 5 | "name": { 6 | "regex": "FMUP - (.+)<\/title>" 7 | }, 8 | "cod": { 9 | "xpath": ".//table[@class='formulario']/tr/td//text()[contains(., 'Código Oficial')]/following::td[1]" 10 | }, 11 | "initials": { 12 | "xpath": ".//table[@class='formulario']/tr/td//text()[contains(., 'Sigla:')]/following::td[1]" 13 | }, 14 | "study_plan": { 15 | "model": "study_plan", 16 | "xpath": ".", 17 | "attributes": { 18 | "id": { 19 | "regex": "cur_planos_estudos_view\\?pv_plano_id=(\\d+)&" 20 | }, 21 | "year": { 22 | "regex": "cur_planos_estudos_view\\?pv_plano_id=.*pv_ano_lectivo=(\\d+)" 23 | } 24 | } 25 | }, 26 | "director": { 27 | "model": "teacher", 28 | "xpath": ".//table[@class='formulario']/tr[td[text()[contains(., 'Diretor:')]]]|.//table[@class='formulario']/tr[td[text()[contains(., 'Docente Responsável:')]]]", 29 | "attributes": { 30 | "name": { 31 | "xpath": ".//td[2]" 32 | }, 33 | "id": { 34 | "regex": "pct_codigo=(.+?)\"" 35 | } 36 | } 37 | }, 38 | "assistant_director": { 39 | "model": "teacher", 40 | "xpath": ".//table[@class='formulario']/tr[td[text()[contains(., 'Diretor Adjunto:')]]]", 41 | "attributes": { 42 | "name": { 43 | "xpath": ".//td[2]" 44 | }, 45 | "id": { 46 | "regex": "pct_codigo=(.+?)\"" 47 | } 48 | } 49 | } 50 | } 51 | } -------------------------------------------------------------------------------- /sigpy/faculties/fmup/room.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://sigarra.up.pt/fmup/pt/instal_geral.espaco_view?pv_id=%s", 3 | "help": "This URL requires: (room_id)", 4 | "picture": "https://sigarra.up.pt/fmup/pt/instal_geral2.get_mapa?pv_id=%s", 5 | "attributes": { 6 | "id": { 7 | "regex": "href=\".*pv_id=(.*?)\"" 8 | }, 9 | "picture_id": { 10 | "regex": "Mapa.*?src=\".*get_mapa\\?pv_id=(.*?)\"" 11 | }, 12 | "name": { 13 | "regex": "<h1>(.+)</h1>" 14 | }, 15 | "building": { 16 | "xpath": ".//div[text()='Edifício:']/following::div[contains(@class, 'form-campo')]" 17 | }, 18 | "floor": { 19 | "xpath": ".//select[@id='pisos']/option[@selected='selected'][2]" 20 | }, 21 | "purpose": { 22 | "xpath": ".//div[text()='Utilização:']/following::div[contains(@class, 'form-campo')]" 23 | }, 24 | "area": { 25 | "xpath": ".//div[contains(text(), 'Área')]/following::div[contains(@class, 'form-campo')]" 26 | }, 27 | "phone": { 28 | "xpath": ".//div[text()='Telefone:']/following::div[contains(@class, 'form-campo')]" 29 | }, 30 | "managers": { 31 | "model": "teacher", 32 | "list": "True", 33 | "xpath": ".//div[text()='Responsáveis:']/following::div[contains(@class, 'form-campo')]/ul", 34 | "attributes": { 35 | "name": { 36 | "css": "li a" 37 | }, 38 | "id": { 39 | "regex": "href=\".*p_codigo=(.*?)\"" 40 | } 41 | } 42 | }, 43 | "occupants": { 44 | "model": "teacher", 45 | "list": "True", 46 | "xpath": ".//div[text()='Ocupante:']/following::div[contains(@class, 'form-campo')]/ul", 47 | "attributes": { 48 | "name": { 49 | "css": "li a" 50 | }, 51 | "id": { 52 | "regex": "href=\".*p_codigo=(.*?)\"" 53 | } 54 | } 55 | } 56 | } 57 | } -------------------------------------------------------------------------------- /sigpy/faculties/fmup/student.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://sigarra.up.pt/fmup/pt/fest_geral.cursos_list?pv_num_unico=%s", 3 | "picture": "https://sigarra.up.pt/fmup/pt/fotografias_service.foto?pct_cod=%s", 4 | "help": "This URL requires: (student_id)", 5 | "attributes": { 6 | "name": { 7 | "css": "div.estudante-info-nome" 8 | }, 9 | "id": { 10 | "css": "div.estudante-info-numero a" 11 | }, 12 | "email": { 13 | "derivate": "up%s@fe.up.pt", 14 | "from": ["id"] 15 | }, 16 | "orcid": { 17 | "regex": "href=\"http://orcid\\.org/(.*?)\"" 18 | }, 19 | "homepage":{ 20 | "css": "div.pagina-pessoal a" 21 | }, 22 | "courses": { 23 | "model": "course", 24 | "list": "True", 25 | "css": "div.estudante-lista-curso-activo", 26 | "attributes": { 27 | "name": { 28 | "css": "div.estudante-lista-curso-nome" 29 | }, 30 | "institution": { 31 | "css": "div.estudante-lista-curso-instit" 32 | }, 33 | "id": { 34 | "regex": ".*pv_curso_id=(\\d+).*" 35 | }, 36 | "enrolled": { 37 | "xpath": ".//td[text()='Ano da primeira inscrição:']/following::td[1]" 38 | }, 39 | "year": { 40 | "xpath": ".//td[text()='Ano curricular atual:']/following::td[1]" 41 | }, 42 | "state": { 43 | "xpath": ".//td[text()='Estado atual:']/following::td[1]" 44 | } 45 | } 46 | }, 47 | "inactive_courses": { 48 | "model": "course", 49 | "list": "True", 50 | "css": "div.tabela-longa", 51 | "attributes": { 52 | "name": { 53 | "css": "td.t.k" 54 | }, 55 | "id": { 56 | "regex": ".*pv_curso_id=(\\d+).*" 57 | }, 58 | "institution": { 59 | "xpath": ".//tr[@class='i']/td[2]/a/@title" 60 | }, 61 | "old_id": { 62 | "css": "td.l" 63 | }, 64 | "type": { 65 | "css": "td.t", 66 | "index": 2 67 | }, 68 | "started": { 69 | "css": "td.l", 70 | "index": 1 71 | } 72 | } 73 | } 74 | } 75 | } -------------------------------------------------------------------------------- /sigpy/faculties/fmup/study_plan.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://sigarra.up.pt/fmup/pt/cur_geral.cur_planos_estudos_view?pv_plano_id=%s&pv_ano_lectivo=%s&pv_tipo_cur_sigla=&pv_origem=CUR", 3 | "help": "This URL requires: (study_plan_id, school_year)", 4 | "attributes": { 5 | "years": { 6 | "model": "year", 7 | "list": "True", 8 | "css": "a#bloco_acurr_ShowOrHide + div.caixa", 9 | "attributes": { 10 | "name": { 11 | "regex": "class=\"caixa\" id=\"\\d*ano_(\\d+)\"" 12 | }, 13 | "semesters": { 14 | "model": "semester", 15 | "list": "True", 16 | "xpath": ".", 17 | "attributes": { 18 | "name": { 19 | "regex": "colspan=\"7\">(\\d+).*" 20 | }, 21 | "subjects": { 22 | "model": "subject", 23 | "list": "True", 24 | "css": "tr.i,tr.p", 25 | "attributes": { 26 | "code": { 27 | "xpath": ".//td[1]" 28 | }, 29 | "initials": { 30 | "xpath": ".//td[2]" 31 | }, 32 | "name": { 33 | "xpath": ".//td[3]" 34 | }, 35 | "observations": { 36 | "xpath": ".//td[4]" 37 | }, 38 | "ects": { 39 | "xpath": ".//td[5]" 40 | }, 41 | "id": { 42 | "regex": "ucurr_geral.ficha_uc_view\\?pv_ocorrencia_id=(\\d+)" 43 | } 44 | } 45 | } 46 | } 47 | } 48 | } 49 | }, 50 | "optionals": { 51 | "model": "subject", 52 | "list": "True", 53 | "css": ".caixa[id^=\"div_id_\"] tr.i,.caixa[id^=\"div_id_\"] tr.p", 54 | "attributes": { 55 | "code": { 56 | "xpath": ".//td[1]" 57 | }, 58 | "initials": { 59 | "xpath": ".//td[2]" 60 | }, 61 | "name": { 62 | "xpath": ".//td[3]" 63 | }, 64 | "ects": { 65 | "xpath": ".//td[4]" 66 | }, 67 | "year": { 68 | "xpath": ".//td[5]" 69 | }, 70 | "semester": { 71 | "xpath": ".//td[6]" 72 | }, 73 | "id": { 74 | "regex": "ucurr_geral.ficha_uc_view\\?pv_ocorrencia_id=(\\d+)" 75 | } 76 | } 77 | } 78 | } 79 | } -------------------------------------------------------------------------------- /sigpy/faculties/fmup/subject.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://sigarra.up.pt/fmup/pt/ucurr_geral.ficha_uc_view?pv_ocorrencia_id=%s", 3 | "timetable": "https://sigarra.up.pt/fmup/pt/hor_geral.ucurr_view?pv_ocorrencia_id=%s&pv_ano_lectivo=%s", 4 | "help": "This URL requires: (subject_id)\nThe timetable schedule requires: (subject_id, school_year)", 5 | "attributes": { 6 | "name": { 7 | "regex": "<title>FMUP - (.*?)" 8 | }, 9 | "code": { 10 | "xpath": ".//td[text()='Código:']/following::td[1]" 11 | }, 12 | "initials": { 13 | "xpath": ".//td[text()='Sigla:']/following::td[1]" 14 | }, 15 | "moodle": { 16 | "regex": "moodle_portal()", 17 | "boolean": "True" 18 | }, 19 | "year":{ 20 | "regex": "(\\d+)" 21 | }, 22 | "semester": { 23 | "regex": "Ocorrência: \\d+/\\d+ - (\\d+)S" 24 | }, 25 | "department":{ 26 | "model": "department", 27 | "xpath": ".//h2[contains(text(),'Ocorrência:')]//following-sibling::table[@class='formulario']", 28 | "attributes":{ 29 | "id": { 30 | "regex": "uni_geral\\.unidade_view\\?pv_unidade=(\\d+)" 31 | } 32 | } 33 | } 34 | } 35 | } -------------------------------------------------------------------------------- /sigpy/faculties/fmup/teacher.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://sigarra.up.pt/fmup/pt/func_geral.formview?p_codigo=%s", 3 | "help": "This URL requires: (teacher_id)", 4 | "picture": "https://sigarra.up.pt/fmup/pt/fotografias_service.foto?pct_cod=%s", 5 | "timetable": "https://sigarra.up.pt/fmup/pt/hor_geral.docentes_view?pv_doc_codigo=%s&pv_ano_lectivo=%s", 6 | "attributes": { 7 | "name": { 8 | "regex": "FMUP - (.*?)" 9 | }, 10 | "initials": { 11 | "xpath": ".//td[text()='Sigla:']/following::td[1]" 12 | }, 13 | "state": { 14 | "xpath": ".//td[text()='Estado:']/following::td[1]" 15 | }, 16 | "orcid": { 17 | "regex": "href=\"http://orcid.org/(.*?)\"" 18 | }, 19 | "alias": { 20 | "regex": "':(.+)'\\+secure\\+'fe\\.up\\.pt'" 21 | }, 22 | "email": { 23 | "derivate": "%s@fe.up.pt", 24 | "from": ["alias"] 25 | }, 26 | "phone": { 27 | "xpath": "(.//td[text()='Telefone:'])[2]/following::td[1]" 28 | }, 29 | "voip": { 30 | "xpath": ".//td[text()='Voip:']/following::td[1]" 31 | }, 32 | "category": { 33 | "xpath": ".//div[contains(@class, 'informacao-pessoal-funcoes')]//td[text()='Categoria:']/following::td[1]" 34 | }, 35 | "presentation": { 36 | "css": "div.informacao-pessoal-apresentacao span" 37 | }, 38 | "rooms": { 39 | "model": "room", 40 | "list": "True", 41 | "xpath": ".//td[text()='Salas: ']/following::td[1]", 42 | "attributes": { 43 | "name": { 44 | "css": "a.normal" 45 | }, 46 | "id": { 47 | "regex": "pv_id=(.*?)\"" 48 | } 49 | } 50 | }, 51 | "positions": { 52 | "model": "position", 53 | "list": "True", 54 | "css": "tr.k.d", 55 | "attributes": { 56 | "name": { 57 | "css": "td.k" 58 | }, 59 | "date": { 60 | "css": "td.l" 61 | }, 62 | "id": { 63 | "regex": "href=\".*=(.*?)\"" 64 | } 65 | } 66 | } 67 | } 68 | } -------------------------------------------------------------------------------- /sigpy/interface.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from os.path import basename, splitext 4 | import re 5 | import requests 6 | import importlib 7 | import json 8 | import glob 9 | 10 | from getpass import getpass 11 | from lxml.html import fromstring, HtmlElement 12 | 13 | from . classes import picture, timetable 14 | from . parser import parse_attributes, get_class_from_dict 15 | from . utils import get_school_year, vprint, set_verbose 16 | from . cache import cache 17 | 18 | 19 | # this class defines all the variables and methods that the faculty class should implement 20 | class interface: 21 | configs = { 22 | "auth": "https://sigarra.up.pt/feup/pt/vld_validacao.validacao", 23 | "auth_failed": "O conjunto utilizador/senha não é válido." 24 | } 25 | 26 | classes = {} # this is the property set from the JSON files 27 | 28 | def __init__(self, faculty, save_cache): 29 | self.session = requests 30 | self.name = faculty 31 | self.cache = cache(self.name, save_cache) 32 | 33 | def set_verbose(self, verbose): 34 | set_verbose(verbose) 35 | 36 | def get_class(self, class_name, route_tuple, original=None, use_cache=True): 37 | config = interface.classes[class_name] 38 | try: 39 | url = config["url"] % route_tuple # format the url with the given data 40 | except Exception as e: 41 | raise Exception("[-] Error: %s in formatting URL with your tuple %s: \n %s" % (str(e), route_tuple, config["help"])) 42 | tree = fromstring(self.GET(url, use_cache)) 43 | return get_class_from_dict(class_name, parse_attributes(tree, config["attributes"], original)) 44 | 45 | # helper method to perform and debug requests on failure 46 | def GET(self, url, use_cache): 47 | return self.cache.get(self.session, url, use_cache) 48 | 49 | # static method that receives an id and returns the numeric part 50 | def get_id(id): 51 | if isinstance(id, str) and "up" in id.lower(): 52 | return id[2:] 53 | return id 54 | 55 | # reads a picture from the web and returns it, if it exists, m is a model instance 56 | def get_picture(self, m): 57 | if "picture" in interface.classes[m.class_name]: # this instance has picture 58 | route = interface.classes[m.class_name]["picture"] 59 | pid = getattr(m, "picture_id", interface.get_id(m.id)) 60 | r = self.session.get(route % str(pid), stream=True) 61 | if r.status_code == 200: 62 | return picture("%s.jpg" % pid, r.raw) 63 | return False 64 | 65 | # parses a timetable from the web and returns it, if it exists, m is a model instance 66 | def get_timetable(self, m, school_year=get_school_year(), use_cache=True): 67 | if "timetable" in interface.classes[m.class_name]: # this instance has a timetable 68 | route = interface.classes[m.class_name]["timetable"] % (m.id, school_year) 69 | return timetable(self.GET(route, use_cache)) 70 | return False 71 | 72 | # log a user in, either receive or prompt for password, tests using configs["auth_failed"] 73 | def login(self, username, password=None): # creates a requests session to access protected pages 74 | if password is None: 75 | password = getpass("Password for %s?\n" % username) 76 | self.session = requests.Session() 77 | payload = {'p_user': username, 'p_pass': password} 78 | r = self.session.post(interface.configs["auth"], params=payload) 79 | if re.search(interface.configs["auth_failed"], r.text): 80 | self.session = requests 81 | return False 82 | return True 83 | 84 | # checks if a valid session exists and exits if not 85 | def logged_in(self): 86 | return self.session != requests 87 | 88 | # add a method get_something(id, original=True) to itself where "something" is a string 89 | def create_dynamic_method(self, name): 90 | def _get_method(id, original=None, use_cache=True): 91 | thing = self.get_class(name, interface.get_id(id), original, use_cache) 92 | thing.id = id if type(id) is not tuple else id[0] 93 | return thing 94 | return _get_method 95 | 96 | 97 | # this function is used to dynamically select the appropriate faculty and load its values from the JSON mappings 98 | def get_faculty(faculty="feup", save_cache=True): 99 | if not os.path.isfile(os.path.join(os.path.dirname(__file__), "faculties/%s/__init__.py" % faculty)): # faculty not implemented 100 | raise Exception("The faculty %s has not been implemented" % faculty) 101 | mod = importlib.import_module("sigpy.faculties.%s" % faculty) # import the correct module 102 | fac = mod.faculty(faculty, save_cache) # create an instance of the correct faculty 103 | 104 | # find all the JSON files inside the faculty folder 105 | file_list = glob.glob(os.path.join(os.path.dirname(__file__), 'faculties/%s/*.json' % faculty)) 106 | for filename in file_list: # iterate over the files 107 | # for each, read contents, parse the JSON and load it into fac.classes for interface to use 108 | with open(filename, encoding="utf-8") as f: 109 | model = splitext(basename(filename))[0] # get the model name from the filename 110 | json_data = json.load(f) 111 | fac.classes[model] = json_data 112 | setattr(fac, "get_%s" % model, fac.create_dynamic_method(model)) 113 | return fac 114 | -------------------------------------------------------------------------------- /sigpy/parser.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | from lxml.html import fromstring, HtmlElement 4 | from lxml import cssselect 5 | from lxml.cssselect import CSSSelector as css 6 | from lxml import etree 7 | 8 | from sigpy.classes.model import model 9 | from sigpy.utils import vprint 10 | 11 | 12 | # ROUTE PARSING FUNCTIONS 13 | # given a class name and a dict of attribute->value, create a new class (child of model) all the possibilities must be imported (TODO: use __init__.py) 14 | def get_class_from_dict(class_name, dictionary): 15 | return model(class_name, dictionary) 16 | 17 | 18 | # given a config and a default value for index, either return the one in config or the default 19 | def get_index(config, default=0): 20 | return config["index"] if "index" in config else default 21 | 22 | 23 | # given an lxml tree and a config dict with a "regex" key, get its VALUE 24 | def parse_regex(tree, config): 25 | res = re.search(config["regex"], etree.tostring(tree, encoding='utf-8').decode("utf-8")) 26 | if res: 27 | return res.group(get_index(config, 1)) 28 | return None 29 | 30 | 31 | # given an lxml tree and a config dict with a "css" key, get its VALUE 32 | def parse_css(tree, config): 33 | return tree.cssselect(config["css"])[get_index(config)].text_content().strip() 34 | 35 | 36 | # given an lxml tree and a config dict with a "xpath" key, get its VALUE 37 | def parse_xpath(tree, config): 38 | el = tree.xpath(config["xpath"])[get_index(config)] 39 | if isinstance(el, HtmlElement): 40 | el = el.text_content() 41 | return el.strip() 42 | 43 | 44 | # given a config dict with string "derivate" and tuple "from", match both and return 45 | def parse_derivate(config, res): 46 | return config["derivate"] % tuple(res[t_el] for t_el in config["from"]) 47 | 48 | 49 | # given an lxml tree and a config dict with one of [css, regex, xpath] key, get its VALUE 50 | def parse_attribute(tree, config, res): 51 | try: 52 | if "css" in config: # this is an attribute from css 53 | return parse_css(tree, config) 54 | elif "regex" in config: # this is an attribute from regex 55 | return parse_regex(tree, config) 56 | elif "xpath" in config: # this is an attirbute from xpath 57 | return parse_xpath(tree, config) 58 | elif "derivate" in config: # this is an attribute derivated from another 59 | return parse_derivate(config, res) 60 | except Exception as e: # some attributes do not exist if not logged in 61 | vprint("[-] Error: %s when parsing %s... \n Ignoring attribute - may be due to lack of login" % (str(e), config)) 62 | return None 63 | 64 | 65 | # given an lxml tree and a config dict with one of [css, xpath] key, get its lxml ELEMENT 66 | def parse_element(tree, config): 67 | try: 68 | if "css" in config: # this is an attribute from css 69 | return tree.cssselect(config["css"]) 70 | elif "xpath" in config: # xpath always returns a list, so an extra step is needed 71 | res = tree.xpath(config["xpath"]) 72 | return res[get_index(config)] if "list" not in config else res 73 | except Exception as e: # some attributes do not exist if not logged in 74 | vprint("[-] Error: %s when parsing %s... Ignoring element" % (str(e), config)) 75 | return [] 76 | 77 | 78 | # given an lxml element and a config, parse a class's attributes and create a new class from them 79 | def parse_class(element, config): 80 | d = parse_attributes(element, config["attributes"]) 81 | return get_class_from_dict(config["model"], d) 82 | 83 | 84 | # primary function that receives a tree and recursively finds its values, returning accordingly 85 | def parse_attributes(tree, attributes, original=None): 86 | res = {} 87 | for attr, config in attributes.items(): 88 | # if the original already has the attribute defined then simple load it 89 | if original and attr in original.__dict__: 90 | res[attr] = original.__dict__[attr] 91 | continue 92 | # else load from the configurations 93 | if "model" not in config: # this is a simple attr with direct css 94 | res[attr] = parse_attribute(tree, config, res) 95 | if "boolean" in config: # we only want a yes or no value 96 | res[attr] = res[attr] is not None 97 | elif "model" in config: # handle classes 98 | element = parse_element(tree, config) 99 | if "list" in config: # handle list of said class 100 | res[attr] = [parse_class(e, config) for e in element] 101 | else: # handle single element of that class 102 | res[attr] = parse_class(element, config) 103 | return res 104 | -------------------------------------------------------------------------------- /sigpy/utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | def get_school_year(): 4 | now = datetime.datetime.now() 5 | if now.month <= 8: 6 | return now.year-1 7 | return now.year 8 | 9 | def getIndent(indent): 10 | return " " * (2 * indent) 11 | 12 | def constructStringVariable(indent, key = "", value = ""): 13 | base = getIndent(indent) 14 | base = (base + "%s%s" if key == "" else base + "%s: %s") 15 | if "classes" in str(type(value)):#this is an object 16 | return (base + "\n") % (key, constructString(value, indent + 1)) 17 | else: 18 | return getIndent(indent + 1) + base % (key, str(value)) 19 | 20 | def constructString(obj, indent = -1): 21 | res = [] 22 | for key, value in obj.__dict__.items(): 23 | if value != "" and value != None: 24 | if type(value) == list and len(value) > 0:#list - iterate 25 | res.append("%s%s (%s)" % (getIndent(indent), key, str(len(value))))#line saying the number of elements 26 | for el in value: 27 | res.append(constructStringVariable(indent+1, value=el)) 28 | res.append("")#paragraph 29 | else: 30 | res.append(constructStringVariable(indent, key, value)) 31 | res[:] = [item for item in res if item != '']#remove empty elements 32 | return "\n".join(res) 33 | 34 | 35 | VERBOSE = False 36 | 37 | def set_verbose(verbose): 38 | global VERBOSE 39 | VERBOSE = verbose 40 | 41 | # only print if the verbose flag is set 42 | def vprint(message): 43 | global VERBOSE 44 | if VERBOSE: 45 | print(message) -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msramalho/sigpy/0c8470a5d350715d837851f9173a07f3ca5f33b4/test/__init__.py -------------------------------------------------------------------------------- /test/classes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msramalho/sigpy/0c8470a5d350715d837851f9173a07f3ca5f33b4/test/classes/__init__.py -------------------------------------------------------------------------------- /test/classes/test_model.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from sigpy.classes import model 3 | 4 | 5 | model_name = "test_model" 6 | original = {"int": 10, "char": 'a', "string": "ab de", "list": [None, 0, 1, "a", []]} 7 | model_dict = original 8 | model_dict['class_name'] = model_name 9 | 10 | 11 | class SigTest(unittest.TestCase): 12 | @classmethod 13 | def setUpClass(self): 14 | self.m = model(model_name, original) 15 | 16 | def test_init(self): 17 | self.assertEqual(self.m.class_name, "test_model") 18 | self.assertEqual(self.m.int, 10) 19 | self.assertEqual(self.m.char, 'a') 20 | self.assertEqual(self.m.string, "ab de") 21 | self.assertEqual(self.m.list, [None, 0, 1, "a", []]) 22 | 23 | def test_json(self): 24 | self.assertIsNotNone(self.m.json) 25 | self.assertDictEqual(self.m.json(), model_dict) 26 | 27 | def test_str(self): 28 | self.assertIsNotNone(self.m.__str__) 29 | self.assertEqual(len(str(self.m)), 138, "should be json with identation 2") 30 | 31 | def test_repr(self): 32 | self.assertIsNotNone(self.m.__repr__) 33 | self.assertEqual(len(str([self.m])), 140, "should be json with identation 2 and __repr__ should be implemented") 34 | 35 | def test_get_item(self): 36 | self.assertIsNotNone(self.m.__getitem__) 37 | self.assertEqual(self.m["class_name"], "test_model") 38 | self.assertEqual(self.m["int"], 10) 39 | self.assertEqual(self.m["char"], 'a') 40 | self.assertEqual(self.m["string"], "ab de") 41 | self.assertEqual(self.m["list"], [None, 0, 1, "a", []]) 42 | 43 | def test_iter(self): 44 | self.assertIsNotNone(self.m.__iter__) 45 | for k, v in self.m: 46 | self.assertEqual(v,original[k]) 47 | 48 | def test_inner_clases(self): 49 | class temp: 50 | def __init__(self, a, b): 51 | self.a, self.b = a, b 52 | self.m2 = model("model_with_inner", {"inner": self.m, "other_class": temp(-1, 20)}) 53 | self.assertEqual(len(self.m2.json()), 3) 54 | 55 | if __name__ == "__main__": 56 | unittest.main() 57 | -------------------------------------------------------------------------------- /test/faculties/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msramalho/sigpy/0c8470a5d350715d837851f9173a07f3ca5f33b4/test/faculties/__init__.py -------------------------------------------------------------------------------- /test/faculties/feup/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msramalho/sigpy/0c8470a5d350715d837851f9173a07f3ca5f33b4/test/faculties/feup/__init__.py -------------------------------------------------------------------------------- /test/faculties/feup/test_feup.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from sigpy import get_faculty, get_school_year 3 | 4 | 5 | class SigTest(unittest.TestCase): 6 | @classmethod 7 | def setUpClass(self): 8 | self.fac = get_faculty("feup") 9 | # self.fac.cache.delete() 10 | #self.fac.login("201403027") 11 | 12 | def test_get_course(self): 13 | course = self.fac.get_course((742, get_school_year())) 14 | self.assertEqual(course.name, "Mestrado Integrado em Engenharia Informática e Computação") 15 | self.assertEqual(course.cod, "9459") 16 | self.assertEqual(course.id, 742) 17 | self.assertIsNotNone(course.director) 18 | self.assertIsNotNone(course.assistant_director) 19 | 20 | def test_get_teacher(self): 21 | pascoal = self.fac.get_teacher(210006) 22 | self.assertEqual(pascoal.name, "João Carlos Pascoal Faria") 23 | self.assertEqual(pascoal.initials, "JPF") 24 | self.assertEqual(pascoal.orcid, "0000-0003-3825-3954") 25 | self.assertEqual(pascoal.phone, "22 508 1523") 26 | self.assertEqual(pascoal.alternative_phone, "225081316") 27 | self.assertEqual(pascoal.voip, "3386") 28 | 29 | 30 | if __name__ == "__main__": 31 | unittest.main() 32 | --------------------------------------------------------------------------------