Last update:
59 |
62 | ">
45 | " title="
52 | ">
57 |
74 |
77 | ">
83 | Powered by Planet!
84 | Last updated:
25 | Last update:
59 |
62 | ">
45 | " title="
52 | ">
57 |
26 | Last update:
60 | ">
46 | " title="
53 | ">
58 |
37 | Last update:
71 |
74 | ">
57 | " title="
64 | ">
69 |
, tag='pre', attrs=[('class', 'screen')]
79 | if _debug: sys.stderr.write('_BaseHTMLProcessor, unknown_starttag, tag=%s\n' % tag)
80 | uattrs = []
81 | # thanks to Kevin Marks for this breathtaking hack to deal with (valid) high-bit attribute values in UTF-8 feeds
82 | for key, value in attrs:
83 | if type(value) != type(u''):
84 | value = unicode(value, self.encoding)
85 | uattrs.append((unicode(key, self.encoding), value))
86 | strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs]).encode(self.encoding)
87 | if tag in self.elements_no_end_tag:
88 | self.pieces.append('<%(tag)s%(strattrs)s />' % locals())
89 | else:
90 | self.pieces.append('<%(tag)s%(strattrs)s>' % locals())
91 |
92 | def unknown_endtag(self, tag):
93 | # called for each end tag, e.g. for , tag will be 'pre'
94 | # Reconstruct the original end tag.
95 | if tag not in self.elements_no_end_tag:
96 | self.pieces.append("%(tag)s>" % locals())
97 |
98 | def handle_charref(self, ref):
99 | # called for each character reference, e.g. for ' ', ref will be '160'
100 | # Reconstruct the original character reference.
101 | self.pieces.append('%(ref)s;' % locals())
102 |
103 | def handle_entityref(self, ref):
104 | # called for each entity reference, e.g. for '©', ref will be 'copy'
105 | # Reconstruct the original entity reference.
106 | self.pieces.append('&%(ref)s;' % locals())
107 |
108 | def handle_data(self, text):
109 | # called for each block of plain text, i.e. outside of any tag and
110 | # not containing any character or entity references
111 | # Store the original text verbatim.
112 | if _debug: sys.stderr.write('_BaseHTMLProcessor, handle_text, text=%s\n' % text)
113 | self.pieces.append(text)
114 |
115 | def handle_comment(self, text):
116 | # called for each HTML comment, e.g.
117 | # Reconstruct the original comment.
118 | self.pieces.append('' % locals())
119 |
120 | def handle_pi(self, text):
121 | # called for each processing instruction, e.g.
122 | # Reconstruct original processing instruction.
123 | self.pieces.append('%(text)s>' % locals())
124 |
125 | def handle_decl(self, text):
126 | # called for the DOCTYPE, if present, e.g.
127 | #
129 | # Reconstruct original DOCTYPE
130 | self.pieces.append('' % locals())
131 |
132 | _new_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9:]*\s*').match
133 | def _scan_name(self, i, declstartpos):
134 | rawdata = self.rawdata
135 | n = len(rawdata)
136 | if i == n:
137 | return None, -1
138 | m = self._new_declname_match(rawdata, i)
139 | if m:
140 | s = m.group()
141 | name = s.strip()
142 | if (i + len(s)) == n:
143 | return None, -1 # end of buffer
144 | return name.lower(), m.end()
145 | else:
146 | self.handle_data(rawdata)
147 | # self.updatepos(declstartpos, i)
148 | return None, -1
149 |
150 | def output(self):
151 | '''Return processed HTML as a single string'''
152 | return ''.join([str(p) for p in self.pieces])
153 |
154 | class _HTMLSanitizer(_BaseHTMLProcessor):
155 | acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big',
156 | 'blockquote', 'br', 'button', 'caption', 'center', 'cite', 'code', 'col',
157 | 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset',
158 | 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input',
159 | 'ins', 'kbd', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'optgroup',
160 | 'option', 'p', 'pre', 'q', 's', 'samp', 'select', 'small', 'span', 'strike',
161 | 'strong', 'sub', 'sup', 'table', 'textarea', 'tbody', 'td', 'tfoot', 'th',
162 | 'thead', 'tr', 'tt', 'u', 'ul', 'var']
163 |
164 | acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
165 | 'action', 'align', 'alt', 'axis', 'border', 'cellpadding', 'cellspacing',
166 | 'char', 'charoff', 'charset', 'checked', 'cite', 'class', 'clear', 'cols',
167 | 'colspan', 'color', 'compact', 'coords', 'datetime', 'dir', 'disabled',
168 | 'enctype', 'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace',
169 | 'id', 'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method',
170 | 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly',
171 | 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
172 | 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', 'type',
173 | 'usemap', 'valign', 'value', 'vspace', 'width']
174 |
175 | ignorable_elements = ['script', 'applet', 'style']
176 |
177 | def reset(self):
178 | _BaseHTMLProcessor.reset(self)
179 | self.tag_stack = []
180 | self.ignore_level = 0
181 |
182 | def feed(self, data):
183 | _BaseHTMLProcessor.feed(self, data)
184 | while self.tag_stack:
185 | _BaseHTMLProcessor.unknown_endtag(self, self.tag_stack.pop())
186 |
187 | def unknown_starttag(self, tag, attrs):
188 | if tag in self.ignorable_elements:
189 | self.ignore_level += 1
190 | return
191 |
192 | if self.ignore_level:
193 | return
194 |
195 | if tag in self.acceptable_elements:
196 | attrs = self.normalize_attrs(attrs)
197 | attrs = [(key, value) for key, value in attrs if key in self.acceptable_attributes]
198 | if tag not in self.elements_no_end_tag:
199 | self.tag_stack.append(tag)
200 | _BaseHTMLProcessor.unknown_starttag(self, tag, attrs)
201 |
202 | def unknown_endtag(self, tag):
203 | if tag in self.ignorable_elements:
204 | self.ignore_level -= 1
205 | return
206 |
207 | if self.ignore_level:
208 | return
209 |
210 | if tag in self.acceptable_elements and tag not in self.elements_no_end_tag:
211 | match = False
212 | while self.tag_stack:
213 | top = self.tag_stack.pop()
214 | if top == tag:
215 | match = True
216 | break
217 | _BaseHTMLProcessor.unknown_endtag(self, top)
218 |
219 | if match:
220 | _BaseHTMLProcessor.unknown_endtag(self, tag)
221 |
222 | def handle_pi(self, text):
223 | pass
224 |
225 | def handle_decl(self, text):
226 | pass
227 |
228 | def handle_data(self, text):
229 | if not self.ignore_level:
230 | text = text.replace('<', '')
231 | _BaseHTMLProcessor.handle_data(self, text)
232 |
233 | def HTML(htmlSource, encoding='utf8'):
234 | p = _HTMLSanitizer(encoding)
235 | p.feed(htmlSource)
236 | data = p.output()
237 | if TIDY_MARKUP:
238 | # loop through list of preferred Tidy interfaces looking for one that's installed,
239 | # then set up a common _tidy function to wrap the interface-specific API.
240 | _tidy = None
241 | for tidy_interface in PREFERRED_TIDY_INTERFACES:
242 | try:
243 | if tidy_interface == "uTidy":
244 | from tidy import parseString as _utidy
245 | def _tidy(data, **kwargs):
246 | return str(_utidy(data, **kwargs))
247 | break
248 | elif tidy_interface == "mxTidy":
249 | from mx.Tidy import Tidy as _mxtidy
250 | def _tidy(data, **kwargs):
251 | nerrors, nwarnings, data, errordata = _mxtidy.tidy(data, **kwargs)
252 | return data
253 | break
254 | except:
255 | pass
256 | if _tidy:
257 | utf8 = type(data) == type(u'')
258 | if utf8:
259 | data = data.encode('utf-8')
260 | data = _tidy(data, output_xhtml=1, numeric_entities=1, wrap=0, char_encoding="utf8")
261 | if utf8:
262 | data = unicode(data, 'utf-8')
263 | if data.count(''):
266 | data = data.split('>', 1)[1]
267 | if data.count('= 1.5.2, except that source line
22 | information is not available unless 'inspect' is.
23 |
24 | Copyright (C) 2001-2002 Vinay Sajip. All Rights Reserved.
25 |
26 | To use, simply 'import logging' and log away!
27 | """
28 |
29 | import sys, logging, socket, types, os, string, cPickle, struct, time
30 |
31 | from SocketServer import ThreadingTCPServer, StreamRequestHandler
32 |
33 | #
34 | # Some constants...
35 | #
36 |
37 | DEFAULT_TCP_LOGGING_PORT = 9020
38 | DEFAULT_UDP_LOGGING_PORT = 9021
39 | DEFAULT_HTTP_LOGGING_PORT = 9022
40 | DEFAULT_SOAP_LOGGING_PORT = 9023
41 | SYSLOG_UDP_PORT = 514
42 |
43 |
44 | class RotatingFileHandler(logging.FileHandler):
45 | def __init__(self, filename, mode="a", maxBytes=0, backupCount=0):
46 | """
47 | Open the specified file and use it as the stream for logging.
48 |
49 | By default, the file grows indefinitely. You can specify particular
50 | values of maxBytes and backupCount to allow the file to rollover at
51 | a predetermined size.
52 |
53 | Rollover occurs whenever the current log file is nearly maxBytes in
54 | length. If backupCount is >= 1, the system will successively create
55 | new files with the same pathname as the base file, but with extensions
56 | ".1", ".2" etc. appended to it. For example, with a backupCount of 5
57 | and a base file name of "app.log", you would get "app.log",
58 | "app.log.1", "app.log.2", ... through to "app.log.5". The file being
59 | written to is always "app.log" - when it gets filled up, it is closed
60 | and renamed to "app.log.1", and if files "app.log.1", "app.log.2" etc.
61 | exist, then they are renamed to "app.log.2", "app.log.3" etc.
62 | respectively.
63 |
64 | If maxBytes is zero, rollover never occurs.
65 | """
66 | logging.FileHandler.__init__(self, filename, mode)
67 | self.maxBytes = maxBytes
68 | self.backupCount = backupCount
69 | if maxBytes > 0:
70 | self.mode = "a"
71 |
72 | def doRollover(self):
73 | """
74 | Do a rollover, as described in __init__().
75 | """
76 |
77 | self.stream.close()
78 | if self.backupCount > 0:
79 | for i in range(self.backupCount - 1, 0, -1):
80 | sfn = "%s.%d" % (self.baseFilename, i)
81 | dfn = "%s.%d" % (self.baseFilename, i + 1)
82 | if os.path.exists(sfn):
83 | #print "%s -> %s" % (sfn, dfn)
84 | if os.path.exists(dfn):
85 | os.remove(dfn)
86 | os.rename(sfn, dfn)
87 | dfn = self.baseFilename + ".1"
88 | if os.path.exists(dfn):
89 | os.remove(dfn)
90 | os.rename(self.baseFilename, dfn)
91 | #print "%s -> %s" % (self.baseFilename, dfn)
92 | self.stream = open(self.baseFilename, "w")
93 |
94 | def emit(self, record):
95 | """
96 | Emit a record.
97 |
98 | Output the record to the file, catering for rollover as described
99 | in doRollover().
100 | """
101 | if self.maxBytes > 0: # are we rolling over?
102 | msg = "%s\n" % self.format(record)
103 | self.stream.seek(0, 2) #due to non-posix-compliant Windows feature
104 | if self.stream.tell() + len(msg) >= self.maxBytes:
105 | self.doRollover()
106 | logging.FileHandler.emit(self, record)
107 |
108 |
109 | class SocketHandler(logging.Handler):
110 | """
111 | A handler class which writes logging records, in pickle format, to
112 | a streaming socket. The socket is kept open across logging calls.
113 | If the peer resets it, an attempt is made to reconnect on the next call.
114 | The pickle which is sent is that of the LogRecord's attribute dictionary
115 | (__dict__), so that the receiver does not need to have the logging module
116 | installed in order to process the logging event.
117 |
118 | To unpickle the record at the receiving end into a LogRecord, use the
119 | makeLogRecord function.
120 | """
121 |
122 | def __init__(self, host, port):
123 | """
124 | Initializes the handler with a specific host address and port.
125 |
126 | The attribute 'closeOnError' is set to 1 - which means that if
127 | a socket error occurs, the socket is silently closed and then
128 | reopened on the next logging call.
129 | """
130 | logging.Handler.__init__(self)
131 | self.host = host
132 | self.port = port
133 | self.sock = None
134 | self.closeOnError = 0
135 |
136 | def makeSocket(self):
137 | """
138 | A factory method which allows subclasses to define the precise
139 | type of socket they want.
140 | """
141 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
142 | s.connect((self.host, self.port))
143 | return s
144 |
145 | def send(self, s):
146 | """
147 | Send a pickled string to the socket.
148 |
149 | This function allows for partial sends which can happen when the
150 | network is busy.
151 | """
152 | if hasattr(self.sock, "sendall"):
153 | self.sock.sendall(s)
154 | else:
155 | sentsofar = 0
156 | left = len(s)
157 | while left > 0:
158 | sent = self.sock.send(s[sentsofar:])
159 | sentsofar = sentsofar + sent
160 | left = left - sent
161 |
162 | def makePickle(self, record):
163 | """
164 | Pickles the record in binary format with a length prefix, and
165 | returns it ready for transmission across the socket.
166 | """
167 | s = cPickle.dumps(record.__dict__, 1)
168 | #n = len(s)
169 | #slen = "%c%c" % ((n >> 8) & 0xFF, n & 0xFF)
170 | slen = struct.pack(">L", len(s))
171 | return slen + s
172 |
173 | def handleError(self, record):
174 | """
175 | Handle an error during logging.
176 |
177 | An error has occurred during logging. Most likely cause -
178 | connection lost. Close the socket so that we can retry on the
179 | next event.
180 | """
181 | if self.closeOnError and self.sock:
182 | self.sock.close()
183 | self.sock = None #try to reconnect next time
184 | else:
185 | logging.Handler.handleError(self, record)
186 |
187 | def emit(self, record):
188 | """
189 | Emit a record.
190 |
191 | Pickles the record and writes it to the socket in binary format.
192 | If there is an error with the socket, silently drop the packet.
193 | If there was a problem with the socket, re-establishes the
194 | socket.
195 | """
196 | try:
197 | s = self.makePickle(record)
198 | if not self.sock:
199 | self.sock = self.makeSocket()
200 | self.send(s)
201 | except:
202 | self.handleError(record)
203 |
204 | def close(self):
205 | """
206 | Closes the socket.
207 | """
208 | if self.sock:
209 | self.sock.close()
210 | self.sock = None
211 |
212 | class DatagramHandler(SocketHandler):
213 | """
214 | A handler class which writes logging records, in pickle format, to
215 | a datagram socket. The pickle which is sent is that of the LogRecord's
216 | attribute dictionary (__dict__), so that the receiver does not need to
217 | have the logging module installed in order to process the logging event.
218 |
219 | To unpickle the record at the receiving end into a LogRecord, use the
220 | makeLogRecord function.
221 |
222 | """
223 | def __init__(self, host, port):
224 | """
225 | Initializes the handler with a specific host address and port.
226 | """
227 | SocketHandler.__init__(self, host, port)
228 | self.closeOnError = 0
229 |
230 | def makeSocket(self):
231 | """
232 | The factory method of SocketHandler is here overridden to create
233 | a UDP socket (SOCK_DGRAM).
234 | """
235 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
236 | return s
237 |
238 | def send(self, s):
239 | """
240 | Send a pickled string to a socket.
241 |
242 | This function no longer allows for partial sends which can happen
243 | when the network is busy - UDP does not guarantee delivery and
244 | can deliver packets out of sequence.
245 | """
246 | self.sock.sendto(s, (self.host, self.port))
247 |
248 | class SysLogHandler(logging.Handler):
249 | """
250 | A handler class which sends formatted logging records to a syslog
251 | server. Based on Sam Rushing's syslog module:
252 | http://www.nightmare.com/squirl/python-ext/misc/syslog.py
253 | Contributed by Nicolas Untz (after which minor refactoring changes
254 | have been made).
255 | """
256 |
257 | # from