├── main.py ├── rss.py └── iolsucker.py /main.py: -------------------------------------------------------------------------------- 1 | from iolsucker import * 2 | import getpass 3 | 4 | def main(): 5 | 6 | #TODO Verify if dni is correct. 7 | dni = raw_input('Enter your DNI: ') 8 | pwd = getpass.getpass("Enter password for DNI %s: " %dni) 9 | 10 | sucker = PyIOLSucker() 11 | if not sucker.isLogged(): 12 | sucker.doLogin(dni, pwd) 13 | #news = News() 14 | #news.printNews() 15 | #news.deleteAll() 16 | #news.printNews() 17 | subs = getSubjects() 18 | 19 | for subject in subs: 20 | files = files + acidRain(subject.folder) 21 | 22 | 23 | main() -------------------------------------------------------------------------------- /rss.py: -------------------------------------------------------------------------------- 1 | import PyRSS2Gen 2 | from iolsucker import * 3 | 4 | def getFeed(dni, passwd): 5 | files = [] 6 | 7 | sucker = PyIOLSucker() 8 | if not sucker.isLogged(): 9 | sucker.doLogin(dni,passwd) 10 | subs = getSubjects() 11 | 12 | for subject in subs: 13 | files = files + acidRain(subject.folder) 14 | 15 | items = [] 16 | 17 | for i in files: 18 | items.append(PyRSS2Gen.RSSItem( 19 | title = i.name, 20 | link = i.file, 21 | description = i.name, 22 | pubDate = datetime.datetime.now() 23 | )) 24 | 25 | 26 | rss = PyRSS2Gen.RSS2( 27 | title = "ITBA feed", 28 | link = SILVESTRE_PATH, 29 | description = "ITBA Feed", 30 | lastBuildDate = datetime.datetime.now(), 31 | items = items) 32 | 33 | rss.write_xml(open("itba.xml", "w")) 34 | 35 | 36 | if __name__ == '__main__': 37 | #TODO Verify if dni is correct. 38 | dni = raw_input('Enter your DNI: ') 39 | pwd = getpass.getpass("Enter password for DNI %s: " %dni) 40 | getFeed(dni, pwd) -------------------------------------------------------------------------------- /iolsucker.py: -------------------------------------------------------------------------------- 1 | import urllib, urllib2, cookielib 2 | import re 3 | import string 4 | import datetime 5 | from BeautifulSoup import BeautifulSoup 6 | import cPickle 7 | 8 | SILVESTRE_PATH = 'http://silvestre.itba.edu.ar' 9 | BASE_PATH = SILVESTRE_PATH + '/itbaV/' 10 | IOL_LOGIN_PATH = BASE_PATH + 'mynav.asp' 11 | IOL_NAVBAR_PATH = BASE_PATH + 'mynav.asp' 12 | MATERIAL_DIDACTICO_PATH = BASE_PATH + 'newmaterialdid.asp' 13 | IOL_DESKTOP_PATH = BASE_PATH + 'mydesktop.asp' 14 | NEWS_PATH = BASE_PATH + 'novlistall.asp' 15 | 16 | class AbstractIOLPathNode(object): 17 | """ Node belonging to a subjet's tree. """ 18 | 19 | def __init__(self, url, parent=None, name=None): 20 | self.url = url 21 | self.parent = parent 22 | self.name = name 23 | self.buildNode() 24 | 25 | #Hook. 26 | def buildNode(self): 27 | pass 28 | 29 | 30 | class IOLFile(AbstractIOLPathNode): 31 | """ Subjet's file """ 32 | def buildNode(self): 33 | webPage = PyIOLSucker().IOLUrlOpen(self.url) 34 | soup = BeautifulSoup(webPage.read()) 35 | 36 | webPage = PyIOLSucker().IOLUrlOpen(BASE_PATH + soup('frame')[0]['src']) 37 | soup = BeautifulSoup(webPage.read()) 38 | 39 | self.file = SILVESTRE_PATH + soup('a')[0]['href'] 40 | 41 | def __repr__(self): 42 | return self.name 43 | 44 | class IOLAbstractFolder(AbstractIOLPathNode): 45 | """ Subjet's directory """ 46 | def __init__(self, url, parent=None, name=None): 47 | self._children = [] 48 | AbstractIOLPathNode.__init__(self, url, parent, name) 49 | 50 | def buildNode(self): 51 | webPage = PyIOLSucker().IOLUrlOpen(self.url) 52 | soup = BeautifulSoup(webPage.read()) 53 | table = soup('tbody')[0] 54 | files = table('tr', 'hand') 55 | folders = filter ( lambda x: x.findAll('img', alt='Ir a carpeta') != [], table('tr')) 56 | 57 | for folder in folders: 58 | folder_name = ((folder('td',colspan=2)[0])('font')[0].string).strip() 59 | self._children.append(IOLFolder( BASE_PATH + folder('a')[0]['href'], self, name=folder_name)) 60 | 61 | for f in files: 62 | file_name = (f('td')[1])('font')[0].contents[0].string.strip() 63 | number = re.findall("[0-9]+", f['onclick'])[0] 64 | self._children.append(IOLFile( BASE_PATH + 'showfile.asp?fiid=' + str(number),\ 65 | self, name=file_name)) 66 | 67 | def __repr__(self): 68 | return self.name + self._children.__repr__() 69 | 70 | #Inherits from IOLAbstractFolder 71 | class IOLFolder(IOLAbstractFolder): 72 | pass 73 | 74 | #class IOLLazyFolder(IOLAbstractFolder): 75 | # _nodeBuilt = False 76 | # def __getReal(self): 77 | # self = IOLFolder(self.obj, self.url, self.parent) 78 | # TODO: Me tiene que pisar el padre!!! No tengo el puntero!! 79 | # self.parent.makeMeReal(self) 80 | # if not self._nodeBuilt: 81 | # self._children = property(None, None, None) 82 | # self._children = [] 83 | # self.__buildNode() 84 | # self._nodeBuilt = True 85 | # return self._children 86 | # def __init__(self, obj, url, parent=None): 87 | # self.url = url 88 | # self.parent = parent 89 | # self.obj = obj 90 | 91 | # _children = property(__getReal, None, None) 92 | 93 | 94 | #Albert: shouldn't be class PyIOLSucker(object): ? 95 | class PyIOLSucker: 96 | 97 | __instance = None 98 | 99 | class __impl: 100 | """Implementation of the singleton instance""" 101 | user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' 102 | headers = { 'User-Agent' : user_agent } 103 | 104 | def __init__(self): 105 | self.cookies = None 106 | 107 | def doLogin(self, dni, passwd): 108 | params = urllib.urlencode({'txtdni': dni, 109 | 'txtpwd': passwd, 110 | 'cmd': 'login'}) 111 | 112 | req = urllib2.Request(IOL_LOGIN_PATH, params, self.headers) 113 | webPage = urllib2.urlopen(req) 114 | self.cookies = cookielib.CookieJar() 115 | self.cookies.extract_cookies(webPage,req) 116 | 117 | def isLogged(self): 118 | if self.cookies is None: 119 | return False 120 | 121 | try: 122 | webPage = self.IOLUrlOpen( IOL_DESKTOP_PATH ) 123 | soup = BeautifulSoup(webPage.read()) 124 | except urllib2.HTTPError, e: 125 | return False 126 | 127 | if soup('title')[0] == 'The page cannot be displayed': 128 | return False 129 | return True 130 | 131 | def IOLUrlOpen(self, url): 132 | req = urllib2.Request(url, headers=self.headers) 133 | self.cookies.add_cookie_header(req) 134 | try: 135 | return urllib2.urlopen(req) 136 | except urllib2.URLError, e: 137 | raise urllib2.URLError 138 | 139 | def __init__(self): 140 | """ Create singleton instance """ 141 | if PyIOLSucker.__instance is None: 142 | PyIOLSucker.__instance = PyIOLSucker.__impl() 143 | 144 | self.__dict__['_PyIOLSucker__instance'] = PyIOLSucker.__instance 145 | 146 | def __getattr__(self, attr): 147 | """ Delegate access to implementation """ 148 | return getattr(self.__instance, attr) 149 | 150 | def __setattr__(self, attr, value): 151 | """ Delegate access to implementation """ 152 | return setattr(self.__instance, attr, value) 153 | 154 | class Subject(object): 155 | def __init__(self, url): 156 | #To get different "Material Didactico's path, we must 157 | #first connect to the main page of the subject and then 158 | #open the one and only "Material Didactico's path" 159 | self.webPage = PyIOLSucker().IOLUrlOpen(BASE_PATH + url) 160 | self.folder = IOLFolder(MATERIAL_DIDACTICO_PATH, name='root') 161 | 162 | class News(object): 163 | 164 | def __init__(self): 165 | #Gets HTML needed to parse news. 166 | self.webPage = PyIOLSucker().IOLUrlOpen(NEWS_PATH) 167 | self.html = self.webPage.read() 168 | self.soup = BeautifulSoup(self.html) 169 | 170 | #List of singleNews 171 | self.newsList = [] 172 | 173 | #Vars used to parse news 174 | bgcolor = None 175 | father = None 176 | section = None 177 | link = None 178 | delLink = None 179 | title = None 180 | clase = None 181 | 182 | #Every table @ HTML is parse depending on bgcolor attribute. 183 | self.tables = self.soup.findAll('tr') 184 | for table in self.tables: 185 | for at in table.attrs: 186 | if at[0] == 'bgcolor': 187 | bgcolor = at[1] 188 | 189 | if bgcolor == 'LIGHTSTEELBLUE': 190 | father = table.td.string.strip() 191 | elif bgcolor == 'SILVER': 192 | section = table.td.string.strip() 193 | elif bgcolor == 'WHITE' and table.a: 194 | link = table.attrs[2][1].split('\'')[1] 195 | delLink = table.a.attrs[0][1] 196 | title = table.td.contents[2].strip() 197 | self.newsList.append( singleNews(father, section, link, delLink, title) ) 198 | 199 | def deleteAll(self): 200 | if self.newsList: 201 | for news in self.newsList: 202 | PyIOLSucker().IOLUrlOpen(BASE_PATH + news.delLink) 203 | self.newsList.remove(news) 204 | else: 205 | print 'No news to remove' 206 | 207 | def printNews(self): 208 | if self.newsList: 209 | for news in self.newsList: 210 | print 'Father: %s\nSection: %s\nLink: %s\nDelLink: %s\nTitle: %s \n '\ 211 | % ( news.father, news.section, news.link, news.delLink, news.title) 212 | else: 213 | print 'No news to print' 214 | 215 | 216 | class singleNews(object): 217 | #singleNews variables. 218 | link = None 219 | delLink = None 220 | father = None 221 | section = None 222 | title = None 223 | 224 | def __init__(self, father, section, link, delLink, title ): 225 | self.father = father 226 | self.section = section 227 | self.link = link 228 | self.delLink = delLink 229 | self.title = title 230 | 231 | 232 | 233 | def getSubjects(): 234 | 235 | try: 236 | pkl_file = open('subjects_iol.pkl','rb') 237 | subjects = cPickle.load(pkl_file) 238 | #TODO: Update! 239 | return subjects 240 | except IOError, e: 241 | subjects = [] 242 | except EOFError, e: 243 | subjects = [] 244 | 245 | #Nav bar 246 | webPage = PyIOLSucker().IOLUrlOpen(IOL_NAVBAR_PATH) 247 | html = webPage.read() 248 | 249 | soup = BeautifulSoup(html) 250 | 251 | #Checking if I can fix bug. Old code below 252 | #materias = soup('td', colspan='2')[1:] 253 | #subject_links = map( lambda x: x('a')[0]['href'], materias) 254 | 255 | #new code. 256 | #Gets subjets I am doing. 257 | #from soup I don't get first one, because it should be the name of my career. 258 | materias = soup.findAll('td', colspan="2")[1:] 259 | subject_links = [materia('a')[0]['href'] for materia in materias if materia.a] 260 | 261 | if subject_links: 262 | for subject_link in subject_links: 263 | #Stupid bug: 264 | #@ utf8 links looks like: 265 | #mynav.asp?cmd=ChangeContext&nivel=4&snivel=22.09 266 | #where they should be 267 | #mynav.asp?cmd=ChangeContext&nivel=4&snivel=22.09 268 | #Fixed with a string.replace 269 | subject_link = string.replace(subject_link,"&","&") 270 | subjects.append(Subject(subject_link)) 271 | 272 | output = open('subjects_iol.pkl','wb') 273 | cPickle.dump(subjects, output) 274 | output.close() 275 | 276 | return subjects 277 | 278 | def acidRain(t): 279 | #TODO: REFACTOR 280 | if str(t.__class__) == '': 281 | return [t] 282 | else: 283 | files_ = [] 284 | for i in t._children: 285 | new_files = acidRain(i) 286 | files_ = files_ + new_files 287 | 288 | return files_ 289 | 290 | --------------------------------------------------------------------------------