├── main.py
├── rss.py
└── iolsucker.py


/main.py:
--------------------------------------------------------------------------------
 1 | from iolsucker import *
 2 | import getpass
 3 | 
 4 | def main():
 5 | 
 6 |     #TODO Verify if dni is correct.
 7 |     dni = raw_input('Enter your DNI: ')
 8 |     pwd = getpass.getpass("Enter password for DNI %s: " %dni)
 9 | 
10 |     sucker = PyIOLSucker()
11 |     if not sucker.isLogged():
12 |         sucker.doLogin(dni, pwd)
13 |     #news = News()
14 |     #news.printNews()
15 |     #news.deleteAll()
16 |     #news.printNews()
17 |     subs = getSubjects()
18 | 
19 |     for subject in subs:
20 |         files = files + acidRain(subject.folder)
21 | 
22 | 
23 | main()


--------------------------------------------------------------------------------
/rss.py:
--------------------------------------------------------------------------------
 1 | import PyRSS2Gen
 2 | from iolsucker import *
 3 | 
 4 | def getFeed(dni, passwd):
 5 | 	files = []
 6 | 
 7 | 	sucker = PyIOLSucker()
 8 | 	if not sucker.isLogged():
 9 | 		sucker.doLogin(dni,passwd)
10 | 	subs = getSubjects()
11 | 
12 | 	for subject in subs:
13 | 		files = files + acidRain(subject.folder)
14 | 
15 | 	items = []
16 | 
17 | 	for i in files:
18 | 		items.append(PyRSS2Gen.RSSItem(
19 | 			title = i.name,
20 | 			link = i.file,
21 | 			description = i.name,
22 | 			pubDate =  datetime.datetime.now()
23 | 			))
24 | 	
25 | 
26 | 	rss = PyRSS2Gen.RSS2(
27 | 		title = "ITBA feed",
28 | 	link = SILVESTRE_PATH,
29 | 		description = "ITBA Feed",
30 | 		lastBuildDate = datetime.datetime.now(),
31 | 		items = items)
32 | 
33 | 	rss.write_xml(open("itba.xml", "w"))
34 | 	
35 | 
36 | if __name__ == '__main__':
37 | 	#TODO Verify if dni is correct.
38 | 	dni = raw_input('Enter your DNI: ')
39 | 	pwd = getpass.getpass("Enter password for DNI %s: " %dni)
40 | 	getFeed(dni, pwd)


--------------------------------------------------------------------------------
/iolsucker.py:
--------------------------------------------------------------------------------
  1 | import urllib, urllib2, cookielib
  2 | import re
  3 | import string
  4 | import datetime
  5 | from BeautifulSoup import BeautifulSoup
  6 | import cPickle
  7 | 
  8 | SILVESTRE_PATH = 'http://silvestre.itba.edu.ar'
  9 | BASE_PATH = SILVESTRE_PATH + '/itbaV/'
 10 | IOL_LOGIN_PATH = BASE_PATH + 'mynav.asp'
 11 | IOL_NAVBAR_PATH = BASE_PATH + 'mynav.asp'
 12 | MATERIAL_DIDACTICO_PATH = BASE_PATH + 'newmaterialdid.asp'
 13 | IOL_DESKTOP_PATH = BASE_PATH + 'mydesktop.asp'
 14 | NEWS_PATH = BASE_PATH + 'novlistall.asp'
 15 | 
 16 | class AbstractIOLPathNode(object):
 17 |     """ Node belonging to a subjet's tree. """
 18 | 
 19 |     def __init__(self, url, parent=None, name=None):
 20 |         self.url = url
 21 |         self.parent = parent
 22 |         self.name = name
 23 |         self.buildNode()
 24 | 
 25 |     #Hook.
 26 |     def buildNode(self):
 27 |         pass
 28 | 
 29 | 
 30 | class IOLFile(AbstractIOLPathNode):
 31 |     """ Subjet's file """
 32 |     def buildNode(self):
 33 |         webPage = PyIOLSucker().IOLUrlOpen(self.url)
 34 |         soup = BeautifulSoup(webPage.read())
 35 | 
 36 |         webPage = PyIOLSucker().IOLUrlOpen(BASE_PATH + soup('frame')[0]['src'])
 37 |         soup = BeautifulSoup(webPage.read())
 38 | 
 39 |         self.file = SILVESTRE_PATH + soup('a')[0]['href']
 40 | 
 41 |     def __repr__(self):
 42 |         return  self.name
 43 | 
 44 | class IOLAbstractFolder(AbstractIOLPathNode):
 45 |     """ Subjet's directory """
 46 |     def __init__(self, url, parent=None, name=None):
 47 |         self._children = []
 48 |         AbstractIOLPathNode.__init__(self, url, parent, name)
 49 | 
 50 |     def buildNode(self):
 51 |         webPage = PyIOLSucker().IOLUrlOpen(self.url)
 52 |         soup = BeautifulSoup(webPage.read())
 53 |         table = soup('tbody')[0]
 54 |         files = table('tr', 'hand')
 55 |         folders = filter ( lambda x: x.findAll('img', alt='Ir a carpeta') != [], table('tr'))
 56 | 
 57 |         for folder in folders:
 58 |             folder_name = ((folder('td',colspan=2)[0])('font')[0].string).strip()
 59 |             self._children.append(IOLFolder( BASE_PATH + folder('a')[0]['href'], self, name=folder_name))
 60 | 
 61 |         for f in files:
 62 |             file_name = (f('td')[1])('font')[0].contents[0].string.strip()
 63 |             number = re.findall("[0-9]+", f['onclick'])[0]
 64 |             self._children.append(IOLFile( BASE_PATH + 'showfile.asp?fiid=' + str(number),\
 65 |                                  self, name=file_name))
 66 | 
 67 |     def __repr__(self):
 68 |         return  self.name + self._children.__repr__()
 69 | 
 70 | #Inherits from IOLAbstractFolder
 71 | class IOLFolder(IOLAbstractFolder):
 72 |      pass
 73 | 
 74 | #class IOLLazyFolder(IOLAbstractFolder):
 75 | #    _nodeBuilt = False
 76 | #    def __getReal(self):
 77 | #        self = IOLFolder(self.obj, self.url, self.parent)
 78 | #       TODO: Me tiene que pisar el padre!!! No tengo el puntero!!
 79 | #        self.parent.makeMeReal(self)
 80 | #        if not self._nodeBuilt:
 81 | #            self._children = property(None, None, None)
 82 | #            self._children = []
 83 | #            self.__buildNode()
 84 | #            self._nodeBuilt = True
 85 | #        return self._children 
 86 | #    def __init__(self, obj, url, parent=None):
 87 | #        self.url = url
 88 | #        self.parent = parent
 89 | #        self.obj = obj
 90 | 
 91 | #    _children = property(__getReal, None, None)
 92 | 
 93 | 
 94 | #Albert: shouldn't be class PyIOLSucker(object): ?
 95 | class PyIOLSucker:
 96 | 
 97 |     __instance = None
 98 | 
 99 |     class __impl:
100 |         """Implementation of the singleton instance"""
101 |         user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
102 |         headers = { 'User-Agent' : user_agent }
103 | 
104 |         def __init__(self):
105 |             self.cookies = None
106 | 
107 |         def doLogin(self, dni, passwd):
108 |             params = urllib.urlencode({'txtdni': dni,
109 |                                         'txtpwd': passwd,
110 |                                         'cmd': 'login'})
111 | 
112 |             req = urllib2.Request(IOL_LOGIN_PATH, params, self.headers)
113 |             webPage = urllib2.urlopen(req)
114 |             self.cookies = cookielib.CookieJar()
115 |             self.cookies.extract_cookies(webPage,req)
116 | 
117 |         def isLogged(self):
118 |             if self.cookies is None:
119 |                 return False
120 | 
121 |             try:
122 |                 webPage = self.IOLUrlOpen( IOL_DESKTOP_PATH )
123 |                 soup = BeautifulSoup(webPage.read())
124 |             except urllib2.HTTPError, e:
125 |                 return False
126 | 
127 |             if soup('title')[0] == 'The page cannot be displayed':
128 |                 return False
129 |             return True
130 | 
131 |         def IOLUrlOpen(self, url):
132 |             req = urllib2.Request(url, headers=self.headers)
133 |             self.cookies.add_cookie_header(req)
134 |             try:
135 |                 return urllib2.urlopen(req)
136 |             except urllib2.URLError, e:
137 |                 raise urllib2.URLError
138 | 
139 |     def __init__(self):
140 |         """ Create singleton instance """
141 |         if PyIOLSucker.__instance is None:
142 |             PyIOLSucker.__instance = PyIOLSucker.__impl()
143 | 
144 |         self.__dict__['_PyIOLSucker__instance'] = PyIOLSucker.__instance
145 | 
146 |     def __getattr__(self, attr):
147 |         """ Delegate access to implementation """
148 |         return getattr(self.__instance, attr)
149 | 
150 |     def __setattr__(self, attr, value):
151 |         """ Delegate access to implementation """
152 |         return setattr(self.__instance, attr, value)
153 | 
154 | class Subject(object):
155 |     def __init__(self, url):
156 |         #To get different "Material Didactico's path, we must
157 |         #first connect to the main page of the subject and then
158 |         #open the one and only "Material Didactico's path"
159 |         self.webPage = PyIOLSucker().IOLUrlOpen(BASE_PATH + url)
160 |         self.folder = IOLFolder(MATERIAL_DIDACTICO_PATH, name='root')
161 | 
162 | class News(object):
163 | 
164 |     def __init__(self):
165 |         #Gets HTML needed to parse news. 
166 |         self.webPage = PyIOLSucker().IOLUrlOpen(NEWS_PATH)
167 |         self.html = self.webPage.read()
168 |         self.soup = BeautifulSoup(self.html)
169 | 
170 |         #List of singleNews
171 |         self.newsList = []
172 | 
173 |         #Vars used to parse news
174 |         bgcolor = None
175 |         father = None
176 |         section = None
177 |         link = None
178 |         delLink = None
179 |         title = None
180 |         clase = None
181 | 
182 |         #Every table @ HTML is parse depending on bgcolor attribute.
183 |         self.tables = self.soup.findAll('tr')
184 |         for table in self.tables:
185 |             for at in table.attrs:
186 |                 if at[0] == 'bgcolor':
187 |                     bgcolor = at[1]
188 |             
189 |             if bgcolor == 'LIGHTSTEELBLUE':
190 |                 father = table.td.string.strip()
191 |             elif bgcolor == 'SILVER':
192 |                 section = table.td.string.strip()
193 |             elif bgcolor == 'WHITE' and table.a:
194 |                 link = table.attrs[2][1].split('\'')[1]
195 |                 delLink = table.a.attrs[0][1]
196 |                 title = table.td.contents[2].strip()
197 |                 self.newsList.append( singleNews(father, section, link, delLink, title) )
198 | 
199 |     def deleteAll(self):
200 |         if self.newsList:
201 |             for news in self.newsList:
202 |                 PyIOLSucker().IOLUrlOpen(BASE_PATH + news.delLink)
203 |                 self.newsList.remove(news)
204 |         else:
205 |             print 'No news to remove'
206 | 
207 |     def printNews(self):
208 |         if self.newsList:
209 |             for news in self.newsList:
210 |                 print   'Father: %s\nSection: %s\nLink: %s\nDelLink: %s\nTitle: %s \n '\
211 |                         % ( news.father, news.section, news.link, news.delLink, news.title)
212 |         else:
213 |             print 'No news to print'
214 | 
215 | 
216 | class singleNews(object):
217 |     #singleNews variables.
218 |     link = None
219 |     delLink = None
220 |     father = None
221 |     section = None
222 |     title = None
223 | 
224 |     def __init__(self, father, section, link, delLink, title ):
225 |         self.father = father
226 |         self.section = section
227 |         self.link = link
228 |         self.delLink = delLink
229 |         self.title = title
230 | 
231 | 
232 | 
233 | def getSubjects():
234 | 
235 |     try:
236 |         pkl_file = open('subjects_iol.pkl','rb')
237 |         subjects = cPickle.load(pkl_file)
238 |         #TODO: Update!
239 |         return subjects
240 |     except IOError, e:
241 |         subjects = []
242 |     except EOFError, e:
243 |         subjects = []
244 | 
245 |     #Nav bar
246 |     webPage = PyIOLSucker().IOLUrlOpen(IOL_NAVBAR_PATH)
247 |     html = webPage.read()
248 | 
249 |     soup = BeautifulSoup(html)
250 | 
251 |     #Checking if I can fix bug. Old code below
252 |     #materias = soup('td', colspan='2')[1:]
253 |     #subject_links = map( lambda x: x('a')[0]['href'],  materias)
254 |     
255 |     #new code.
256 |     #Gets subjets I am doing.
257 |     #from soup I don't get first one, because it should be the name of my career.
258 |     materias = soup.findAll('td', colspan="2")[1:]
259 |     subject_links = [materia('a')[0]['href'] for materia in materias if materia.a]
260 | 
261 |     if subject_links:
262 |         for subject_link in subject_links:
263 |             #Stupid bug:
264 |             #@ utf8 links looks like: 
265 |             #mynav.asp?cmd=ChangeContext&amp;nivel=4&amp;snivel=22.09
266 |             #where they should be
267 |             #mynav.asp?cmd=ChangeContext&nivel=4&snivel=22.09
268 |             #Fixed with a string.replace
269 |             subject_link = string.replace(subject_link,"&amp;","&")
270 |             subjects.append(Subject(subject_link))
271 | 
272 |     output = open('subjects_iol.pkl','wb')
273 |     cPickle.dump(subjects, output)
274 |     output.close()
275 | 
276 |     return subjects
277 | 
278 | def acidRain(t):
279 |     #TODO: REFACTOR
280 |     if str(t.__class__) == '<class \'iol.IOLFile\'>':
281 |         return [t]
282 |     else:
283 |         files_ = []
284 |         for i in t._children:
285 |          new_files = acidRain(i)
286 |          files_ = files_ + new_files
287 |  
288 |     return files_
289 | 
290 | 


--------------------------------------------------------------------------------