) then the value will be NSNull
51 | * If the attributes have not yet been parsed, this will parser them first.
52 | */
53 | @property (nonatomic, readonly) NSDictionary* attributes;
54 |
55 |
56 | /**
57 | * The character data inside the element. This text is stripped of tags, whitespace, etc
58 | * by stripTags. To see the actual source within the element, use contentsSource
59 | */
60 | @property (nonatomic, retain) NSString* contentsText;
61 |
62 |
63 | /**
64 | * A case-normalized version of the tagName when appropriate. Used in situations
65 | * where the tag name might need to serve as a key into a dictionary
66 | */
67 | @property (nonatomic, retain) NSString* key;
68 |
69 | /**
70 | * One or more chunks where encountered within this element
71 | * Used for more efficient return of contentsText
72 | */
73 | @property BOOL containsMarkup;
74 |
75 |
76 | /**
77 | * The length of the text from the end of the start tag to the start of the end tag
78 | */
79 | @property int contentsLength;
80 |
81 |
82 | /**
83 | * The next Element encountered in the document
84 | */
85 | @property (nonatomic, retain) Element* nextElement;
86 |
87 |
88 | /**
89 | * The next sybling Element (ie the Element at the same depth with the same parent)
90 | */
91 | @property (nonatomic, retain) Element* nextSybling;
92 |
93 |
94 | /**
95 | * The parent Element to this Element
96 | */
97 | @property (nonatomic, assign) Element* parent;
98 |
99 |
100 | /**
101 | * Available for developer's use to hang an object onto this Element
102 | */
103 | @property (nonatomic, retain) NSObject* domainObject;
104 |
105 |
106 | /**
107 | * Parses the supplied source and return an Element tree with Document element serving as the root
108 | * or all top level elements. As HTML, Elements shall be considered case insensative and tag
109 | * specific heuristics will be used to close tags intelligently. See ElementParser for details.
110 | */
111 | +(DocumentRoot*)parseHTML:(NSString*)source;
112 |
113 |
114 | /**
115 | * Parses the supplied source and return an Element tree with Document element serving as the root
116 | * or all top level elements. XML, Elements shall be considered case sensative. See ElementParser for details.
117 | */
118 | +(DocumentRoot*)parseXML:(NSString*)source;
119 |
120 | /**
121 | * Initializer used by ElementParser. See TagChunk for other intializers
122 | */
123 | -(id)initWithTag:(TagChunk*)tag caseSensative:(BOOL)aCaseSensative;
124 |
125 |
126 | /**
127 | * Returns true if the element contains the specified attribute.
128 | * If the attributes have not yet been parsed, this will parser them first.
129 | */
130 | -(BOOL)hasAttribute:(NSString*)attr;
131 |
132 |
133 | /**
134 | * Returns the value of a particular attribute (or nil if it doesn't exist)
135 | * Note: ElementParser does not support default attributes
136 | * If the attributes have not yet been parsed, this will parser them first.
137 | */
138 | -(NSString*)attribute:(NSString*)attr;
139 |
140 |
141 | /**
142 | * Convenience method to compare an element's tag name.
143 | * Comparision will be cases sensative for XML elements and insensative for HTML elements.
144 | */
145 | -(BOOL)isEqualToString:(NSString*)string;
146 |
147 | /**
148 | * Convenience methods for getting NSObjects from elements and their children
149 | */
150 | - (NSString*)contentsTextOfChildElement:(NSString*)selector;
151 |
152 | - (NSNumber*)contentsNumber;
153 |
154 | - (NSNumber*)contentsNumberOfChildElement:(NSString*)selector;
155 |
156 |
157 | /**
158 | * An array of child Elements in document order
159 | */
160 | -(NSArray*)childElements;
161 |
162 | /**
163 | * An array of child Elements in document order
164 | */
165 | -(NSArray*)syblingElements;
166 |
167 |
168 | /**
169 | * The first child Element for this element (or nil if none).
170 | */
171 | -(Element*)firstChild;
172 |
173 |
174 | /**
175 | * A dictionary containing the tagnames of children as keys
176 | * and the contentsText of the children as values.
177 | * If duplicate children tag names are encountered, only the last will
178 | * appear in the dictionary.
179 | */
180 | -(NSDictionary*)contentsOfChildren;
181 |
182 | /**
183 | * Returns true if the supplied Element is a parent of receiver or one of its parents
184 | */
185 | -(BOOL)hasAncestor:(Element*)ancestor;
186 |
187 |
188 | /**
189 | * Returns the nextElement but only if it has the scope Element as an ancestor
190 | */
191 | -(Element*)nextElementWithinScope:(Element*)scope;
192 |
193 |
194 | /**
195 | * Returns true if the class attribute contains the class name (perhaps as one of multiple classes).
196 | */
197 | -(BOOL)hasClassName:(NSString*)aClassName;
198 |
199 |
200 | /**
201 | * Returns true receiver can be a chlid of aParent. Used by ElementParser to prevent inappropriate
202 | * nesting in HTML (e.g.
)
203 | */
204 | -(BOOL)acceptsParent:(Element*)aParent;
205 |
206 | /**
207 | * Debugging method
208 | */
209 | -(NSString*)dumpTree;
210 |
211 |
212 | /**
213 | * The source between the end of the open tag and the beginning of the close tag
214 | */
215 | -(NSString*)contentsSource;
216 |
217 |
218 | /**
219 | * Convenience method for using a selector to find elements within the receiver that match.
220 | * See CSSSelector for details.
221 | */
222 | -(Element*)elementWithCSSSelector:(CSSSelector*)selector;
223 |
224 | /**
225 | * Convenience method for using a selector to find elements within the receiver that match
226 | * See CSSSelector for details.
227 | */
228 | -(Element*)selectElement:(NSString*)cssSelectorString;
229 |
230 |
231 | /**
232 | * Convenience method for using a selector to find elements within the receiver that match
233 | * See CSSSelector for details.
234 | */
235 | -(NSArray*)elementsWithCSSSelector:(CSSSelector*)selector;
236 |
237 | /**
238 | * Convenience method for using a selector to find elements within the receiver that match
239 | * See CSSSelector for details.
240 | */
241 | -(NSArray*)selectElements:(NSString*)cssSelectorString;
242 |
243 | @end
244 |
--------------------------------------------------------------------------------
/Classes/Element.m:
--------------------------------------------------------------------------------
1 | //
2 | // Element.m
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/18/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import "Element.h"
26 | #import "NSString_HTML.h"
27 | #import "CSSSelectorMatcher.h"
28 | #import "ElementParser.h"
29 |
30 | @interface Element ()
31 | -(void)setAttributes:(NSDictionary*)dict;
32 | @end
33 |
34 | @implementation Element
35 |
36 | @synthesize nextElement, nextSybling, parent, contentsLength, contentsText, key, containsMarkup, domainObject;
37 |
38 |
39 | +(DocumentRoot*)parseHTML:(NSString*)source{
40 | ElementParser* parser = [[ElementParser alloc] init];
41 | DocumentRoot* root = [parser parseHTML: source];
42 | [[root retain] autorelease];
43 | [parser release];
44 | return root;
45 | }
46 |
47 | +(DocumentRoot*)parseXML:(NSString*)source{
48 | ElementParser* parser = [[ElementParser alloc] init];
49 | DocumentRoot* root = [parser parseXML: source];
50 | [[root retain] autorelease];
51 | [parser release];
52 | return root;
53 | }
54 |
55 | -(id)initWithString:(NSString*)string{
56 | return [self initWithString: string range: NSMakeRange(0, [string length])];
57 | }
58 |
59 | -(id)initWithTag:(TagChunk*)tag caseSensative:(BOOL)aCaseSensative{
60 | self = [self initWithString: tag.source range: tag.range tagName: tag.tagName];
61 | [self setCaseSensative: aCaseSensative];
62 | return self;
63 | }
64 |
65 | -(void)dealloc{
66 | [attributes release];
67 | [contentsText release];
68 | [nextElement release];
69 | [nextSybling release];
70 | [key release];
71 | [super dealloc];
72 | }
73 |
74 |
75 | -(void)setRange: (NSRange)aRange{
76 | attributesParsed = NO;
77 | [attributes removeAllObjects];
78 | [super setRange: aRange];
79 | }
80 |
81 | //cleans up nested p tags
82 | -(BOOL)acceptsParent:(Element*)aParent{
83 | if ([self tagNameEquals: @"p"] && [aParent tagNameEquals: @"p"])
84 | return NO;
85 | return YES;
86 | }
87 |
88 |
89 | -(BOOL)closesTag:(TagChunk*)aTag{
90 | if (self == aTag || [self isEmptyTag]) //former case is true when shouldBeEmptyTag
91 | return self == aTag;
92 | else
93 | return [super closesTag: aTag];
94 | }
95 |
96 | -(BOOL)hasAttribute:(NSString*)attr{
97 | return [[[self attributes] allKeys] containsObject: attr];
98 | }
99 |
100 | -(NSString*)attribute:(NSString*)attr{
101 | return [[self attributes] objectForKey: attr];
102 | }
103 |
104 | // warning, may contain empty classnames
105 | -(NSArray*)classNames{
106 | NSString* classNames = [self attribute: @"class"];
107 | if (!classNames) return [NSArray array];
108 | return [classNames componentsSeparatedByCharactersInSet: [NSCharacterSet whitespaceCharacterSet]];
109 | }
110 |
111 | -(BOOL)hasClassName:(NSString*)aClassName{
112 | if (![self attribute: @"class"]) return NO;
113 | for (NSString* className in [self classNames])
114 | if ([className isEqualToString: aClassName])
115 | return YES;
116 | return NO;
117 | }
118 |
119 | -(NSDictionary*)attributes{
120 | if (!attributesParsed){
121 | [self setAttributes: [source parseElementAttributesWithRange: range caseSensative: [self caseSensative]]];
122 | attributesParsed = YES;
123 | }
124 | return attributes;
125 | }
126 |
127 | -(void)setAttributes:(NSDictionary*)dict{
128 | [attributes release];
129 | attributes = [dict retain];
130 | }
131 |
132 | -(Element*)firstChild{
133 | if ([nextElement parent] == self)
134 | return nextElement;
135 | else
136 | return nil;
137 | }
138 |
139 | -(BOOL)hasAncestor:(Element*)ancestor{
140 | for (Element* p = parent; p; p = p.parent){
141 | if (p == ancestor)
142 | return YES;
143 | }
144 | return NO;
145 | }
146 |
147 | -(Element*)nextElementWithinScope:(Element*)scope{
148 | if ((nextElement.parent == self) || nextSybling)
149 | return nextElement;
150 | else
151 | return ([nextElement hasAncestor: scope]) ? nextElement : nil;
152 | }
153 |
154 | -(NSString*)contentsText{
155 | if (!contentsText){
156 | // NSRange contentsRange = NSMakeRange(NSMaxRange(range), contentsLength);
157 | self.contentsText = (containsMarkup) ? [[self contentsSource] stripTags] : [self contentsSource];//[source stringByReplacingEntitiesInRange: contentsRange];
158 | }
159 | return contentsText;
160 | }
161 |
162 | - (NSString*)contentsTextOfChildElement:(NSString*)selector {
163 | return [[self selectElement:selector] contentsText];
164 | }
165 |
166 | - (NSNumber*)contentsNumber {
167 | return [NSNumber numberWithInt:[[self contentsText] intValue]];
168 | }
169 |
170 | - (NSNumber*)contentsNumberOfChildElement:(NSString*)selector {
171 | return [[self selectElement:selector] contentsNumber];
172 | }
173 |
174 | -(NSString*)contentsSource{
175 | NSRange contentsRange = NSMakeRange(NSMaxRange(range), contentsLength);
176 | NSString* result = [source substringWithRange: contentsRange];
177 | return result;
178 | }
179 |
180 | -(NSArray*)selectElements:(NSString*)cssSelectorString{
181 | if (!cssSelectorString) return [NSArray array];
182 | CSSSelector* selector = [[CSSSelector alloc] initWithString: cssSelectorString];
183 | NSArray* result = [self elementsWithCSSSelector: selector];
184 | [selector release];
185 | return result;
186 | }
187 |
188 | -(Element*)selectElement:(NSString*)cssSelectorString{
189 | if (!cssSelectorString) return nil;
190 | CSSSelector* selector = [[CSSSelector alloc] initWithString: cssSelectorString];
191 | Element* result = [self elementWithCSSSelector: selector];
192 | [selector release];
193 | return result;
194 | }
195 |
196 | -(NSArray*)elementsWithCSSSelector:(CSSSelector*)selector{
197 | CSSSelectorMatcher* matcher = [[CSSSelectorMatcher alloc] initWithSelector: selector];
198 | Element* e = self;
199 | while (e){
200 | [matcher matchElement: e];
201 | // e = e.nextElement;
202 | e = [e nextElementWithinScope: self];
203 | }
204 | NSArray* result = [[[matcher matches] retain] autorelease];
205 | [matcher release];
206 | return result;
207 | }
208 |
209 | -(Element*)elementWithCSSSelector:(CSSSelector*)selector{
210 | CSSSelectorMatcher* matcher = [[CSSSelectorMatcher alloc] initWithSelector: selector];
211 | Element* e = self;
212 | BOOL success = NO;
213 | while (e && !success){
214 | success = [matcher matchElement: e];
215 | e = [e nextElementWithinScope: self];
216 | }
217 | Element* result = [matcher firstMatch];
218 | [matcher release];
219 | return result;
220 | }
221 |
222 | -(NSArray*)childElements{
223 | NSMutableArray* kids = [NSMutableArray array];
224 | Element* e = [self firstChild];
225 | while (e){
226 | [kids addObject: e];
227 | e = e.nextSybling;
228 | }
229 | return kids;
230 | }
231 |
232 | -(NSArray*)syblingElements{
233 | NSMutableArray* syblings = [NSMutableArray array];
234 | Element* e = self;
235 | while (e){
236 | [syblings addObject: e];
237 | e = e.nextSybling;
238 | }
239 | return syblings;
240 | }
241 |
242 | -(NSDictionary*)contentsOfChildren{
243 | NSMutableDictionary* result = [NSMutableDictionary dictionary];
244 | Element* e = [self firstChild];
245 | while (e){
246 | [result setObject: [e contentsText] forKey: [e key]];
247 | e = e.nextSybling;
248 | }
249 | return result;
250 | }
251 |
252 | -(BOOL)isEqualToString:(NSString*)string{
253 | return [[self description] isEqualToString: string];
254 | }
255 |
256 | -(NSString*)key{
257 | if (!key)
258 | self.key = ([self caseSensative])
259 | ? [self tagName]
260 | : [[self tagName] lowercaseString];
261 | return key;
262 | }
263 |
264 | -(NSString*)description{
265 | NSMutableString* result = [NSMutableString string];
266 | if (!source) return result;//root element has no source
267 | [result appendString: @"<"];
268 | [result appendString: [self tagName]];
269 | for (NSString* att in [[self attributes] allKeys]){
270 | [result appendFormat: @" %@='%@'", att, [attributes objectForKey: att]];
271 | }
272 | if ([self isEmptyTag])
273 | [result appendString: @" />"];
274 | else
275 | [result appendString: @">"];
276 | return result;
277 | }
278 |
279 | -(NSString*)dumpTree{
280 | NSMutableString* result = [NSMutableString string];
281 | Element* e = self;
282 | while (e){
283 | for (Element* ee = e; ee; ee = [ee parent])
284 | [result appendString: @" "];
285 | [result appendString: [e description]];
286 | NSString* txt = (e.containsMarkup) ? @"..." : e.contentsText;
287 | [result appendFormat: @"%@\n", txt];
288 | e = e.nextElement;
289 | }
290 | return result;
291 | }
292 |
293 | @end
294 |
--------------------------------------------------------------------------------
/Classes/ElementParser.h:
--------------------------------------------------------------------------------
1 | //
2 | // ElementParser.h
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/20/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import
26 | #import "Element.h"
27 | #import "DocumentRoot.h"
28 |
29 | typedef enum{
30 | ElementParserModeHTML,
31 | ElementParserModeXML
32 | } ElementParserMode;
33 |
34 | #define ElementParserErrorDomain 1022
35 | typedef enum{
36 | ElementParserTagNotClosedError = -1,
37 | ElementParserGeneralError = -2
38 | }ElementParserErrors;
39 |
40 | @interface ElementParser : NSObject {
41 | NSMutableArray* tagStack;
42 | DocumentRoot* root;
43 | Element* lastOpened; //assigned
44 | Element* lastClosedBeforeOpen;
45 | Chunk* lastChunk;
46 |
47 | CFMutableArrayRef callbackMethods;
48 | NSMutableArray* callbackMatchers;
49 | id delegate;
50 | ElementParserMode mode;
51 | }
52 |
53 | /**
54 | * The delegate that is called when selectors match
55 | */
56 | @property (nonatomic, assign) id delegate;
57 |
58 | /**
59 | * HTML or XML
60 | */
61 | @property ElementParserMode mode;
62 |
63 |
64 | /**
65 | * The source being parsed.
66 | */
67 | @property (readonly) NSString* source;
68 |
69 |
70 | /**
71 | * Parse an HMTL document and return a tree of Elements corresponding to the document.
72 | * The DocumentRoot is a special Element that contains all the top-level Elements in the
73 | * source.
74 | */
75 | -(DocumentRoot*)parseHTML:(NSString*)source;
76 |
77 |
78 | /**
79 | * Parse an XML document and return a tree of Elements corresponding to the document.
80 | * The DocumentRoot is a special Element that contains all the top-level Elements in the
81 | * source.
82 | */
83 | -(DocumentRoot*)parseXML:(NSString*)source;
84 |
85 | /**
86 | * When parsing a document incrementally, begin with a single call to beginParsing,
87 | * followed by multiple calls to continueParsing as text arrives and finaly a single
88 | * call to finishParsing
89 | */
90 | -(DocumentRoot*)beginParsing;
91 | -(void)continueParsingString:(NSString*)string;
92 | -(void)finishParsing;
93 |
94 | /**
95 | * Registers a callback to be performed whenever the supplied selector matches
96 | */
97 | -(void)performSelector:(SEL)method forElementsMatching:(NSString*)cssSelector;
98 |
99 | /**
100 | * returns true for html elements like
101 | */
102 | -(BOOL)shouldBeEmptyElement:(Element*)element;
103 |
104 | /**
105 | * internal callback when a warning condition occurs. May be overidden to surface an
106 | * NSError
107 | */
108 | -(void)warning:(int)code description:(NSString*)description chunk: (Chunk*)chunk;
109 |
110 | /**
111 | * internal callback when an info condition occurs. May be overidden for debugging purposes
112 | */
113 | -(void)info:(NSString*)info atIndex:(int)sourceIndex;
114 |
115 | @end
116 |
--------------------------------------------------------------------------------
/Classes/ElementParser.m:
--------------------------------------------------------------------------------
1 | //
2 | // ElementParser.m
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/20/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import "ElementParser.h"
26 | #import "NSString_HTML.h"
27 | #import "Chunk.h"
28 | #import "TagChunk.h"
29 | #import "CSSSelectorMatcher.h"
30 |
31 | static NSSet* HTML_TAGS_THAT_SHOULD_BE_EMPTY;
32 |
33 |
34 | @interface ElementParser()
35 |
36 | @property (nonatomic, assign) Element* lastOpened;
37 | @property (nonatomic, assign) Element* lastClosedBeforeOpen;
38 | @property (nonatomic, retain) DocumentRoot* root;
39 | @property (nonatomic, retain) Chunk* lastChunk;
40 |
41 | -(void)closeAllTags;
42 | -(void)prepareParseWithString:(NSString*)string;
43 | -(void)parseMoreWithPartial:(BOOL)partial;
44 |
45 | @end
46 |
47 |
48 | @implementation ElementParser
49 |
50 | @synthesize root, lastOpened, lastClosedBeforeOpen, lastChunk, delegate, mode;
51 |
52 | +(void)initialize{
53 | HTML_TAGS_THAT_SHOULD_BE_EMPTY = [[NSSet alloc] initWithObjects: @"img", @"meta", @"br", @"hr", @"area", @"base", @"basefont", @"col", @"frame", @"input", @"isindex", @"link", @"param", nil];
54 | }
55 |
56 | -(id)init{
57 | self = [super init];
58 | tagStack = [[NSMutableArray alloc] initWithCapacity: 24];
59 | mode = ElementParserModeHTML;
60 | return self;
61 | }
62 |
63 | -(void)dealloc{
64 | [tagStack release];
65 | [root release];
66 | [lastChunk release];
67 | if (callbackMethods){
68 | CFRelease(callbackMethods);
69 | [callbackMatchers release];
70 | }
71 | [super dealloc];
72 | }
73 |
74 | -(DocumentRoot*)parseHTML:(NSString*)source{
75 | if (!source) return nil;
76 | self.mode = ElementParserModeHTML;
77 | [self prepareParseWithString: source];
78 | [self parseMoreWithPartial: NO];
79 | [self closeAllTags];
80 | return root;
81 | }
82 |
83 | -(DocumentRoot*)parseXML:(NSString*)source{
84 | if (!source) return nil;
85 | self.mode = ElementParserModeXML;
86 | [self prepareParseWithString: source];
87 | [self parseMoreWithPartial: NO];
88 | [self closeAllTags];
89 | return root;
90 | }
91 |
92 |
93 | -(DocumentRoot*)beginParsing{
94 | NSMutableString* source = [NSMutableString string];
95 | [self prepareParseWithString: source];
96 | return root;
97 | }
98 |
99 | -(void)continueParsingString:(NSString*)moreString{
100 | [(NSMutableString*)self.source appendString:moreString];
101 | [self parseMoreWithPartial: YES];
102 | }
103 |
104 | -(void)finishParsing{
105 | [self parseMoreWithPartial: NO];
106 | [self closeAllTags];
107 | }
108 |
109 | -(NSString*)source{
110 | return root.source;
111 | }
112 |
113 | -(void)prepareParseWithString:(NSString*)string{
114 | root = [[DocumentRoot alloc] initWithString: string range: NSMakeRange(0,0)];
115 | lastOpened = root;
116 | [tagStack removeAllObjects];
117 | [tagStack addObject: root];
118 | }
119 |
120 | -(void)parseMoreWithPartial:(BOOL)partial{
121 | int index = lastChunk ? NSMaxRange(lastChunk.range) : 0;
122 | NSString* source = [root source];
123 | root.contentsLength = [source length];
124 | [NSString parseHTML: source delegate: self selector: @selector(buildElementTreeWithChunk:context:) context: self index: &index partial: partial];
125 | }
126 |
127 |
128 | -(Element*)parentElement{
129 | return [tagStack objectAtIndex: [tagStack count] - 1];
130 | }
131 |
132 | -(void)matchElement:(Element*)element{
133 | for (int i = 0; i < [callbackMatchers count]; i++){
134 | CSSSelectorMatcher* matcher = [callbackMatchers objectAtIndex: i];
135 | BOOL matchComplete = [matcher matchElement: element];
136 | if (matchComplete){
137 | SEL selector = (SEL)CFArrayGetValueAtIndex(callbackMethods, i);
138 | NSObject* domainObject = [delegate performSelector: selector withObject: element];
139 | if (domainObject)
140 | element.domainObject = domainObject;
141 | }
142 | }
143 | }
144 |
145 | // nil is a valid value... closed first open tag
146 | -(void)closeElementWithTag:(TagChunk*) tag{
147 | int depthIndex;
148 | for (depthIndex = [tagStack count] - 1; depthIndex > 0; depthIndex--){
149 | // crawl up stack to find matching element
150 | Element* stackElement = [tagStack objectAtIndex: depthIndex];
151 | if (!tag || [tag closesTag: stackElement])
152 | break;
153 | }
154 | if (depthIndex > 0){
155 | Element* closedElement;
156 | // close everything up to found element
157 | while ([tagStack count] > depthIndex){//int ii=[tagStack count] - 1; ii >= depth; ii--
158 | closedElement = [tagStack lastObject];
159 | closedElement.contentsLength =
160 | (tag == nil) ? lastChunk.range.location - NSMaxRange(closedElement.range) :
161 | (tag == closedElement) ? 0 :
162 | tag.range.location - NSMaxRange(closedElement.range);
163 | if(!tag && closedElement.contentsLength == 0)
164 | [self warning: ElementParserGeneralError description:@"Contents may not be right" chunk: closedElement];
165 | // NSLog(@"Close %@", [closedElement description]);
166 | self.lastClosedBeforeOpen = closedElement;
167 | [tagStack removeObjectsInRange: NSMakeRange([tagStack count] - 1, 1)];
168 | if (delegate && callbackMatchers)
169 | [self matchElement: closedElement];
170 | }
171 | // self.lastClosedBeforeOpen = closedElement;
172 | // [tagStack removeObjectsInRange: NSMakeRange(i, [tagStack count] - i)];
173 | }
174 | else{
175 | // orphan close tag - ignore
176 | }
177 | }
178 |
179 | -(void)openElement:(Element*) element{
180 | // NSLog(@"Open %@", [element description]);
181 | element.parent = [self parentElement];
182 | lastOpened.nextElement = element;
183 | self.lastClosedBeforeOpen.nextSybling = element;
184 | [tagStack addObject: element];
185 | self.lastOpened = element;
186 | self.lastClosedBeforeOpen = nil;
187 | }
188 |
189 | -(void)closeAllTags{
190 | for (int i = [tagStack count] - 1; i >= 0; i--){
191 | Element* stackElement = [tagStack objectAtIndex: i];
192 | if (i > 0)
193 | [self warning: ElementParserTagNotClosedError description:@"document left tag open" chunk: stackElement];
194 | [self closeElementWithTag: nil];
195 | }
196 | }
197 |
198 | -(void)info:(NSString*)info atIndex:(int)sourceIndex{
199 | NSLog(@"INFO [index: %i]: %@", sourceIndex, info);
200 | }
201 |
202 | -(void)warning:(int)code description:(NSString*)description chunk: (Chunk*)chunk{
203 | NSLog(@"WARN [index: %i]: %@\n%@", chunk.range.location, description, [chunk description]);
204 | /* subclasses should do this work if they want to do something with the warnings
205 | NSMutableDictionary* info = [NSMutableDictionary dictionaryWithCapacity: 2];
206 | if (description)
207 | [info addObject: description forKey: NSLocalizedDescriptionKey];
208 | if (chunk)
209 | [info addObject: chunk forKey: ElementParserErrorChunk];
210 | NSError* error = [NSError errorWithDomain: ElementParserErrorDomain code: code userInfo: info];
211 | */
212 | }
213 |
214 | -(BOOL)shouldBeEmptyElement:(Element*)element{
215 | if (mode == ElementParserModeXML) return NO;
216 | BOOL result = [HTML_TAGS_THAT_SHOULD_BE_EMPTY containsObject: [element key]];
217 | return result;
218 | }
219 |
220 | -(id)buildElementTreeWithChunk:(Chunk*)chunk context:(void*)builder{
221 | /*
222 | used to hunt down problem strings in example documents
223 | BOOL breakpoint = [[chunk description] rangeOfString: @""].location != NSNotFound;
224 | if (breakpoint)
225 | NSLog(@"found breakpoint");
226 | */
227 | self.lastChunk = chunk;
228 | TagChunk* tag = [chunk isKind: ChunkKindTag] ? (TagChunk*) chunk : nil;
229 |
230 | if (![chunk isKind: ChunkKindText] && ![tag isCloseTag])
231 | [self parentElement].containsMarkup = YES;
232 |
233 | if (!tag)
234 | return self;
235 | else if ([tag isCloseTag])
236 | [self closeElementWithTag: tag];
237 | else {
238 | Element* element = [[Element alloc] initWithTag: tag caseSensative: mode == ElementParserModeXML];
239 | if ([element isEmptyTag] || [self shouldBeEmptyElement: element]){
240 | [self openElement: element];
241 | [self closeElementWithTag: element];
242 | }
243 | else {
244 | if (![element acceptsParent: [self parentElement]])
245 | [self closeElementWithTag: [self parentElement]];
246 | [self openElement: element];
247 | }
248 | [element release];
249 | }
250 | return self;//to continue parsing
251 | }
252 |
253 | -(void)performSelector:(SEL)method forElementsMatching:(NSString*)cssSelector{
254 | if (!callbackMethods){
255 | callbackMethods = CFArrayCreateMutable(NULL, 0, NULL);
256 | callbackMatchers = [[NSMutableArray alloc] initWithCapacity: 10];
257 | }
258 | CFArrayAppendValue(callbackMethods, method);
259 | CSSSelector* css = [[CSSSelector alloc] initWithString: cssSelector];
260 | CSSSelectorMatcher* matcher = [[CSSSelectorMatcher alloc] initWithSelector: css];
261 | [callbackMatchers addObject: matcher];
262 | [css release];
263 | [matcher release];
264 | }
265 |
266 | -(NSString*)description{
267 | NSMutableString* result = [NSMutableString string];
268 | Element* e = root.nextElement;
269 | while (e){
270 | [result appendString: [e description]];
271 | e = e.nextElement;
272 | }
273 | return result;
274 | }
275 |
276 | @end
277 |
--------------------------------------------------------------------------------
/Classes/EntityChunk.h:
--------------------------------------------------------------------------------
1 | //
2 | // EntityChunk.h
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/21/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import
26 | #import "Chunk.h"
27 |
28 |
29 | /**
30 | EntityChunk corresponds to a Entity section (e.g. &)
31 | */
32 |
33 | @interface EntityChunk : Chunk {
34 |
35 | }
36 |
37 | @end
38 |
--------------------------------------------------------------------------------
/Classes/EntityChunk.m:
--------------------------------------------------------------------------------
1 | //
2 | // EntityChunk.m
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/21/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import "EntityChunk.h"
26 |
27 |
28 | @implementation EntityChunk
29 |
30 | -(NSRange)interiorRange{
31 | return NSMakeRange(range.location + 1, range.length - 2);
32 | }
33 |
34 | -(NSString*)kind{
35 | return ChunkKindEntity;
36 | }
37 |
38 | +(NSString*)humanName{
39 | return @"entity";
40 | }
41 |
42 | @end
43 |
--------------------------------------------------------------------------------
/Classes/NSString_HTML.h:
--------------------------------------------------------------------------------
1 | //
2 | // NSString_HTML.h
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 3/27/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import
26 | #import "CSSSelector.h"
27 | @class Element;
28 |
29 | /**
30 | * spins through string buffer until character a or b encountered (or end of buffer)
31 | */
32 | CFIndex lenThruOr(CFStringInlineBuffer* buffer, CFIndex index, const char a, const char b);
33 |
34 | /**
35 | * spins though an attribute/value pair inside an element
36 | */
37 |
38 | CFIndex lenAttributeAndValue(CFStringInlineBuffer* buffer, CFIndex index, NSString** attrName, NSString**attrValue);
39 |
40 | /**
41 | * spins through string buffer until a white character is encountered.
42 | * Assumes <32 denotes whitespace. Returns 0 if end of buffer encountered.
43 | */
44 | unichar skipNonWhitespace(CFStringInlineBuffer* buffer, CFIndex* index);
45 |
46 |
47 | /**
48 | * spins through string buffer until a non white character is encountered.
49 | * Assumes <32 denotes whitespace. Returns 0 if end of buffer encountered.
50 | */
51 | unichar skipWhitespace(CFStringInlineBuffer* buffer, CFIndex* index);
52 |
53 |
54 | /**
55 | * spins through string buffer until a non token character is encountered.
56 | * Returns length of the token. Used for attributes, class names, identifiers and tag names.
57 | * Does not accommodate non latin characters.
58 | * Accepts '-', '_', ':' even when in first character position
59 | * Also permits '/' to begin the token (simplifies parsing close tags).
60 | */
61 | CFIndex lenToken(CFStringInlineBuffer* buffer, CFIndex index);
62 |
63 |
64 | /**
65 | * Returns true if the characters in th buffer at index begin with the supplied string
66 | */
67 | CFIndex startsWithStr(CFStringInlineBuffer* buffer, CFIndex index, const char* prefix);
68 |
69 | /**
70 | * Parses an entity name and returns its length. Returns 0 if end of buffer
71 | * is encountered or NSNotFound of an invalid entity is encountered.
72 | */
73 | CFIndex lenEntityName(CFStringInlineBuffer* buffer, CFIndex index);
74 |
75 |
76 | /**
77 | * Spins through buffer until the supplied suffix is encountered. Returns
78 | * 0 if end of buffer is encountered before the suffix.
79 | */
80 | CFIndex lenThru(CFStringInlineBuffer* buffer, CFIndex index, const char* suffix);
81 |
82 | /**
83 | * Spins through buffer until the supplied suffix is encountered. Does not
84 | * match on characters with single or double quotes.
85 | * Returns 0 if end of buffer is encountered before the suffix.
86 | */
87 | CFIndex lenThruRespectingQuotes(CFStringInlineBuffer* buffer, CFIndex index, const char* suffix);
88 |
89 | /**
90 | * Returns the character corresponding to the entity at the supplied index in the buffer
91 | */
92 | unichar parseEntity(CFStringInlineBuffer* buffer, CFIndex index, CFIndex* len);
93 |
94 |
95 | /**
96 | * Parses the doctype at the suppied index in the buffer and returns its length.
97 | * Return 0 if end of buffer encountered first
98 | */
99 | CFIndex lenDoctype(CFStringInlineBuffer* buffer, CFIndex index);
100 |
101 |
102 | /**
103 | * Convenience method that creates an string from a range in the buffer
104 | */
105 | NSString* createStringFromBuffer(CFStringInlineBuffer* buffer, CFIndex index, CFIndex length);
106 |
107 | @interface NSString (HTML)
108 |
109 | /**
110 | * converts the string assuming it is a hex number
111 | */
112 | -(int)hexValue;
113 |
114 |
115 | /**
116 | * Returns a string in which
117 | * a) all the tags have been removed
118 | * b) entities are resolved
119 | * c) cdata sections are processed
120 | * d) whitespace is compressed
121 | * e) html markup like and are used to provide minimal formatting
122 | */
123 | -(NSString*)stripTags;
124 |
125 |
126 | /**
127 | * Convenience method to url encode a string
128 | */
129 | -(NSString*)stringByAddingPercentEscaping;
130 |
131 |
132 | /**
133 | * Convenience method to url decode a string
134 | */
135 | -(NSString*)stringByRemovingPercentEscaping;
136 |
137 |
138 | /**
139 | * Resolves entities in string
140 | */
141 | -(NSString*)stringByReplacingEntities;
142 |
143 |
144 | /**
145 | * Convenienece method that replaces entities for a range
146 | */
147 | -(NSString*)stringByReplacingEntitiesInRange:(NSRange)range;
148 |
149 |
150 | /**
151 | * Convenienece method to create an element
152 | */
153 | -(Element*)element;
154 |
155 |
156 | /**
157 | * Parses an element returning its attributes.
158 | */
159 | -(NSDictionary*)parseElementAttributesWithRange:(NSRange) range caseSensative:(BOOL)caseSensative;
160 |
161 |
162 | /**
163 | * Very simpleminded parsing out of character encoding based on an http header contentType
164 | */
165 | + (NSStringEncoding) encodingForContentType:(NSString *)contentType;
166 |
167 |
168 | /**
169 | * The base parser that spins through a string and calls a delegate for each chunk encountered.
170 | * Chucks include: tags, entities, comments, cdata, characters and others.
171 | * ElementParser uses this low level parser to build an Element tree.
172 | */
173 | +(void)parseHTML:(NSString*) source delegate:(id)delegate selector:(SEL)selector context: (void*) context;
174 |
175 | /**
176 | * The base parser that spins through a string and calls a delegate for each chunk encountered.
177 | * This version of the method permits partial parsing... ie the parser will stop if
178 | * it encounters a chunk that extends beyond the end of the string. It can be called
179 | * repeatedly as more text arrives and is appended to the string.
180 | */
181 | +(void)parseHTML:(NSString*)source delegate:(id)delegate selector:(SEL)selector context: (void*) context index:(int*)sourceIndex partial:(BOOL)partial;
182 |
183 | @end
184 |
--------------------------------------------------------------------------------
/Classes/NSString_HTML.m:
--------------------------------------------------------------------------------
1 | //
2 | // NSString_HTML.m
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 3/27/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import "NSString_HTML.h"
26 | #import "Element.h"
27 | #import "CSSSelectorMatcher.h"
28 | #import "ElementParser.h"
29 | #import "TagChunk.h"
30 | #import "CommentChunk.h"
31 | #import "EntityChunk.h"
32 | #import "ProcessingInstructionChunk.h"
33 | #import "CDataChunk.h"
34 | #import "DoctypeChunk.h"
35 | #import "TxtChunk.h"
36 |
37 | #define OUT_BUFFER_LENGTH 20000
38 | #define MAX_READ_BUFFER_LENGTH 60000
39 | static const NSDictionary* ENTITIES_MAP;
40 |
41 |
42 |
43 | CFIndex lenThruOr(CFStringInlineBuffer* buffer, CFIndex index, const char a, const char b){
44 | CFIndex startIndex = index;
45 | unichar c;
46 | while ((c = CFStringGetCharacterFromInlineBuffer(buffer, index)) && (c!=a) && (c != b))
47 | index++;
48 | return index - startIndex;
49 | }
50 |
51 |
52 | unichar skipNonWhitespace(CFStringInlineBuffer* buffer, CFIndex* index){
53 | unichar c;
54 | for (;(c = CFStringGetCharacterFromInlineBuffer(buffer, *index)); (*index)++){
55 | if (c <= 32)
56 | return c;
57 | }
58 | return 0;
59 | }
60 |
61 |
62 | unichar skipWhitespace(CFStringInlineBuffer* buffer, CFIndex* index){
63 | unichar c;
64 | for (;(c = CFStringGetCharacterFromInlineBuffer(buffer, *index)); (*index)++){
65 | if (c > 32)
66 | return c;
67 | }
68 | return 0;
69 | }
70 |
71 |
72 | // allowed to start with / or close elements
73 | CFIndex lenToken(CFStringInlineBuffer* buffer, CFIndex index){
74 | CFIndex maxIndex = buffer->rangeToBuffer.location + buffer->rangeToBuffer.length;
75 | CFIndex i;
76 | for (i = index; i < maxIndex; i++){
77 | unichar c = CFStringGetCharacterFromInlineBuffer(buffer, i);
78 | BOOL valid = ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')) || ((c >= '0') && (c <= '9')) || (c=='-') || (c=='_') || (c == ':');
79 | if ((valid == NO) && (i == index) && ((c == '/')))
80 | valid = YES;
81 | if (valid == NO)
82 | break;
83 | }
84 | return i - index;
85 | }
86 |
87 |
88 | CFIndex startsWithStr(CFStringInlineBuffer* buffer, CFIndex index, const char* prefix){
89 | CFIndex startIndex = index;
90 | while (*prefix){
91 | unichar c = CFStringGetCharacterFromInlineBuffer(buffer, index);
92 | if (c != *prefix)
93 | return 0;
94 | else
95 | prefix++;
96 | index++;
97 | }
98 | return startIndex - index;
99 | }
100 |
101 |
102 | CFIndex lenEntityName(CFStringInlineBuffer* buffer, CFIndex index){
103 | CFIndex len = 1;
104 | index++; // first char is assumed to be a '&'
105 | unichar c;
106 | while (c = CFStringGetCharacterFromInlineBuffer(buffer, index++)){
107 | if (c==';')
108 | return len + 1;
109 | if (((c < 'a') || (c > 'z')) && ((c < 'A') || (c > 'Z')) && ((c < '0') || (c > '9')) && (c != '#'))
110 | return NSNotFound;
111 | len++;
112 | }
113 | return 0;
114 | }
115 |
116 | CFIndex lenThruRespectingQuotes(CFStringInlineBuffer* buffer, CFIndex index, const char* suffix){
117 | CFIndex startIndex = index;
118 | int numCharsMatched = 0;
119 | const char* suffixStart = suffix;
120 | char openQuote = 0;
121 | while (*suffix){
122 | unichar c = CFStringGetCharacterFromInlineBuffer(buffer, index);
123 | if (c==0)
124 | return 0;
125 | else if (c == openQuote)
126 | openQuote = 0;
127 | else if ((c == *suffix) && (openQuote == 0)){
128 | suffix++;
129 | numCharsMatched++;
130 | }
131 | else {
132 | // reset the suffix ptr
133 | if (numCharsMatched){
134 | index -= numCharsMatched;
135 | suffix = suffixStart;
136 | numCharsMatched = 0;
137 | }
138 | if ((openQuote == 0) && ((c == '"') || (c == '\'')))
139 | openQuote = c;
140 | }
141 | index++;
142 | }
143 | return index - startIndex;
144 | }
145 |
146 | CFIndex lenThru(CFStringInlineBuffer* buffer, CFIndex index, const char* suffix){
147 | CFIndex startIndex = index;
148 | int numCharsMatched = 0;
149 | const char* suffixStart = suffix;
150 | while (*suffix){
151 | unichar c = CFStringGetCharacterFromInlineBuffer(buffer, index);
152 | if (c==0)
153 | return 0;
154 | else if (c == *suffix){
155 | suffix++;
156 | numCharsMatched++;
157 | }
158 | else if (suffix != suffixStart){
159 | // reset the suffix ptr
160 | index -= numCharsMatched;
161 | suffix = suffixStart;
162 | numCharsMatched = 0;
163 | }
164 | index++;
165 | }
166 | return index - startIndex;
167 | }
168 |
169 | unichar parseEntity(CFStringInlineBuffer* buffer, CFIndex index, CFIndex* len){
170 | // assert(CFStringGetCharacterFromInlineBuffer(&buffer, index) == '&');
171 | if (startsWithStr(buffer, index+1, "gt;")){
172 | (*len) = 4;
173 | return '>';
174 | }
175 | else if (startsWithStr(buffer, index+1, "lt;")){
176 | (*len) = 4;
177 | return '<';
178 | }
179 | else if (startsWithStr(buffer, index+1, "amp;")){
180 | (*len) = 5;
181 | return '&';
182 | }
183 | else{
184 | (*len) = lenThru(buffer, index + 1, ";") + 1;
185 | if (((*len) < 2) || ((*len) > 12)) return 0;
186 | unichar c = CFStringGetCharacterFromInlineBuffer(buffer, index + 1);
187 | if (c == '#'){
188 | unichar c = CFStringGetCharacterFromInlineBuffer(buffer, index + 2);
189 | if (c == 'x'){
190 | // hex entity
191 | NSString* hexString = [(NSString*)buffer->theString substringWithRange: NSMakeRange(index + 3, (*len) - 4)];
192 | return [hexString hexValue];
193 | }
194 | else{
195 | // decimal entity
196 | NSString* decString = [(NSString*)buffer->theString substringWithRange: NSMakeRange(index + 2, (*len) - 3)];
197 | return CFStringGetIntValue((CFStringRef)decString);
198 | }
199 | }
200 | else{
201 | //named enityt
202 | if (ENTITIES_MAP == nil)
203 | ENTITIES_MAP = [[NSDictionary alloc] initWithContentsOfFile: [[NSBundle mainBundle] pathForResource: @"HTML Entities" ofType: @"plist"]];
204 | if (!ENTITIES_MAP) return 0;
205 | NSString* key = [(NSString*)buffer->theString substringWithRange: NSMakeRange(index + 1, (*len) - 2)];
206 | NSString* result = [ENTITIES_MAP objectForKey: key];
207 | return (result) ? [result characterAtIndex: 0] : 0;
208 | }
209 | }
210 | return 0;
211 | }
212 |
213 |
214 | /*
215 | assumes starting at the '<' of ''
218 | 2. will get confused if public identifier or system id have a '>' or a '[' in them
219 | */
220 | CFIndex lenDoctype(CFStringInlineBuffer* buffer, CFIndex index){
221 | CFIndex startIndex = index;
222 | index += 9;
223 | CFIndex len = lenThruOr(buffer, index, '>', '[');
224 | unichar c = CFStringGetCharacterFromInlineBuffer(buffer, index + len);
225 | if (c == '>') //no internal decls
226 | return len + 10;
227 |
228 | // skip thru the internal decls / pe references
229 | while ((c = skipWhitespace(buffer, &index)) != ']'){
230 | if (c == '<')//elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
231 | len = lenThru(buffer, index, ">");
232 | else if (c == '%')//PEReference
233 | len = lenThru(buffer, index, ";");
234 | if (len < 0) return 0; //end of decl not found, fail
235 | index += len;
236 | }
237 |
238 | if (!c) return 0; // ran out of buffer
239 |
240 | // found end of internal subset, just need the closing '>'
241 |
242 | index++;
243 |
244 | c = skipWhitespace(buffer, &index);
245 | if (c != '>') return 0; // ran out of buffer
246 |
247 | return index - startIndex + 1;
248 | }
249 |
250 | NSString* createStringFromBuffer(CFStringInlineBuffer* buffer, CFIndex index, CFIndex length){
251 | return (NSString*) CFStringCreateWithSubstring(NULL, buffer->theString, CFRangeMake(buffer->rangeToBuffer.location + index, length));
252 | }
253 |
254 | @implementation NSString (HTML)
255 |
256 | -(int)hexValue{
257 | int base = 16;
258 | int result = 0;
259 | for (int i = 0; i < [self length]; i++){
260 | unichar c = [self characterAtIndex: i];
261 | if ((c >= '0') && (c <= '9'))
262 | result = (result * base) + (c - '0');
263 | else if ((c >= 'A') && (c <= 'F'))
264 | result = (result * base) + (c - 'A' + 10);
265 | else if ((c >= 'a') && (c <= 'f'))
266 | result = (result * base) + (c - 'a' + 10);
267 | else
268 | return result;
269 | }
270 | return result;
271 | }
272 |
273 | -(NSString*)stringByReplacingEntitiesInRange:(NSRange)range{
274 | int bufferLength = range.length;
275 | unichar *outBuffer = malloc(sizeof(unichar) * bufferLength);
276 | CFIndex index = 0;
277 | int writeIndex = 0;
278 | CFStringInlineBuffer buffer;
279 | CFStringInitInlineBuffer((CFStringRef)self, &buffer, CFRangeMake(range.location, range.length));
280 |
281 | while (index < range.length){
282 | unichar c = CFStringGetCharacterFromInlineBuffer(&buffer, index);
283 | CFIndex len;
284 | unichar entity = (c == '&') ? parseEntity(&buffer, index, &len) : 0;
285 | if (entity){
286 | outBuffer[writeIndex++] = entity;
287 | index += len;
288 | }
289 | else {
290 | outBuffer[writeIndex++] = c;
291 | index++;
292 | }
293 | }
294 | NSString* result = [NSString stringWithCharacters: outBuffer length: writeIndex];
295 | free(outBuffer);
296 | return result;
297 | }
298 |
299 | -(NSString*)stringByReplacingEntities{
300 | return [self stringByReplacingEntitiesInRange: NSMakeRange(0, [self length])];
301 | }
302 |
303 |
304 | -(NSDictionary*)parseElementAttributesWithRange:(NSRange) range caseSensative:(BOOL)caseSensative{
305 | NSMutableDictionary* attributes = [[[NSMutableDictionary alloc] initWithCapacity: 8] autorelease];
306 |
307 | CFStringInlineBuffer localBuffer;
308 | CFStringInitInlineBuffer((CFStringRef)self, &localBuffer, CFRangeMake(range.location, range.length));
309 |
310 | CFIndex index = 1; // skip the leading '<'
311 |
312 | unichar c = skipNonWhitespace(&localBuffer, &index);
313 |
314 | while (c){
315 | NSString* attrName;
316 | NSString* attrValue;
317 |
318 | c = skipWhitespace(&localBuffer, &index);
319 | if (c == '/'){
320 | //the empty tag char at the end
321 | index++;
322 | break;
323 | }
324 | CFIndex tokenLen = lenToken(&localBuffer, index);
325 | if (tokenLen == 0)
326 | break;
327 | attrName = [self substringWithRange: NSMakeRange(index + localBuffer.rangeToBuffer.location, tokenLen)];
328 | index += [attrName length];
329 | c = skipWhitespace(&localBuffer, &index);
330 | if (c == '='){
331 | index++;//skip the =
332 | c = skipWhitespace(&localBuffer, &index);
333 | NSRange valueRange;
334 | if (c=='"'){
335 | CFIndex valueLen = lenThru(&localBuffer, index + 1, "\"");
336 | valueRange = NSMakeRange(index + localBuffer.rangeToBuffer.location + 1, valueLen - 1);
337 | index += 2;
338 | }
339 | else if (c=='\''){
340 | CFIndex valueLen = lenThru(&localBuffer, index + 1, "'");
341 | valueRange = NSMakeRange(index + localBuffer.rangeToBuffer.location + 1, valueLen - 1);
342 | index += 2;
343 | }
344 | else{
345 | CFIndex tokenLen = lenToken(&localBuffer, index);
346 | valueRange = NSMakeRange(index + localBuffer.rangeToBuffer.location, tokenLen);
347 | }
348 | attrValue = [self stringByReplacingEntitiesInRange: valueRange];
349 | [attributes setObject: attrValue forKey: caseSensative ? attrName : [attrName lowercaseString]];
350 | index += valueRange.length;
351 | }
352 | else{
353 | [attributes setObject: [NSNull null] forKey: caseSensative ? attrName : [attrName lowercaseString]];
354 | }
355 | }
356 | return attributes;
357 | }
358 |
359 |
360 | static inline int moveBufferToIndex(CFStringInlineBuffer *buffer, CFIndex index){
361 | CFIndex lengthLeftInString = CFStringGetLength(buffer->theString) - index;
362 | if (!lengthLeftInString) {
363 | // NSLog(@"done with string");
364 | return false;
365 | }
366 | int bufferLength = MIN(lengthLeftInString, MAX_READ_BUFFER_LENGTH);
367 | CFRange range = CFRangeMake(index, bufferLength);
368 | if (range.location + range.length == buffer->rangeToBuffer.location + buffer->rangeToBuffer.length){
369 | // NSLog(@"end of string already buffered");
370 | return false;
371 | }
372 | CFStringInitInlineBuffer(buffer->theString, buffer, range);
373 | // if(range.location)
374 | // NSLog(@"moved buffer beyond 0");
375 | return true;
376 | }
377 |
378 | +(void)parseHTML:(NSString*)source delegate:(id)delegate selector:(SEL)selector context: (void*) context index:(int*)sourceIndex partial:(BOOL)partial{
379 | NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
380 | ElementParser* parser = ([delegate respondsToSelector:@selector(isKindOfClass:)] && [delegate isKindOfClass: [ElementParser class]]) ? delegate : nil;
381 | CFIndex index = *sourceIndex;
382 | CFIndex maxSourceIndex = [source length];
383 | CFStringInlineBuffer buffer;
384 | buffer.theString = (CFStringRef)source;
385 | buffer.rangeToBuffer.location = buffer.rangeToBuffer.length = 0;
386 |
387 | TagChunk* tag = [[TagChunk alloc] initWithString: source range: NSMakeRange(0,0) tagName: nil];
388 | CommentChunk* comment = [[CommentChunk alloc] initWithString: source range: NSMakeRange(0,0)];
389 | EntityChunk* entity = [[EntityChunk alloc] initWithString: source range: NSMakeRange(0,0)];
390 | DoctypeChunk* doctype = [[DoctypeChunk alloc] initWithString: source range: NSMakeRange(0,0)];
391 | ProcessingInstructionChunk* pi = [[ProcessingInstructionChunk alloc] initWithString: source range: NSMakeRange(0,0)];
392 | CDataChunk* cdata = [[CDataChunk alloc] initWithString: source range: NSMakeRange(0,0)];
393 | TxtChunk* text = [[TxtChunk alloc] initWithString: source range: NSMakeRange(0,0)];
394 |
395 | BOOL delegateWantsToContinue = YES;
396 | unichar c;
397 |
398 | while (delegateWantsToContinue && moveBufferToIndex(&buffer, buffer.rangeToBuffer.location + index)){
399 | index = 0;
400 |
401 | while (delegateWantsToContinue && (c = CFStringGetCharacterFromInlineBuffer(&buffer, index))){
402 |
403 | int tagLen;
404 | int len = 0;
405 | int interior;
406 | Chunk* chunk = nil;
407 | Chunk* partialChunk = nil;
408 |
409 | if (c == '<'){
410 | if (tagLen = lenToken(&buffer, index + 1)){
411 | interior = lenThruRespectingQuotes(&buffer, index + tagLen + 1, ">") + tagLen - 1;
412 | if (interior > 0){
413 | tag.tagName = createStringFromBuffer(&buffer, index + 1, tagLen);
414 | [tag.tagName release];
415 | chunk = tag;
416 | len = interior + 2;
417 | }
418 | else
419 | partialChunk = tag;
420 | }
421 | else if (startsWithStr(&buffer, index + 1, "!--")){
422 | interior = lenThru(&buffer, index + 4, "-->")-3;
423 | if (interior > 0){
424 | chunk = comment;
425 | len = interior + 7;
426 | }
427 | else
428 | partialChunk = comment;
429 | }
430 | else if (startsWithStr(&buffer, index + 1, "![CDATA[")){
431 | interior = lenThru(&buffer, index + 9, "]]>")-3;
432 | if (interior > 0){
433 | chunk = cdata;
434 | len = interior + 12;
435 | }
436 | else
437 | partialChunk = cdata;
438 | }
439 | else if (startsWithStr(&buffer, index + 1, "?")){
440 | interior = lenThru(&buffer, index + 2, ">")-1;
441 | if (interior > 0){
442 | chunk = pi;
443 | len = interior + 3;
444 | }
445 | else
446 | partialChunk = pi;
447 | }
448 | else if (startsWithStr(&buffer, index + 1, "!DOCTYPE")){
449 | interior = lenDoctype(&buffer, index + 9) - 1;
450 | if (interior > 0){
451 | chunk = doctype;
452 | len = interior + 10;
453 | }
454 | else
455 | partialChunk = doctype;
456 | }
457 | else
458 | partialChunk = tag;
459 | }
460 | else if (c == '&'){
461 | // complicated by the fact that what appears to be an entity may infact just be text
462 | CFIndex entityLen = lenEntityName(&buffer, index);
463 | if (entityLen == NSNotFound){
464 | len = lenThruOr(&buffer, index + 1, '<', '&') + 1;
465 | chunk = text;
466 | }
467 | else if (entityLen > 0){
468 | chunk = entity;
469 | len = entityLen;
470 | }
471 | else
472 | partialChunk = entity;
473 | }
474 | else{
475 | len = lenThruOr(&buffer, index + 1, '<', '&') + 1;
476 | chunk = text;
477 | }
478 |
479 | if (partialChunk){ // recover from a partial chunk
480 | BOOL bytesLeftBeyondBuffer = maxSourceIndex > (buffer.rangeToBuffer.location + buffer.rangeToBuffer.length);
481 | if (bytesLeftBeyondBuffer || partial)
482 | break; // go get more bytes in the buffer / or exit
483 |
484 | // recover by emiting as text
485 | len = lenThruOr(&buffer, index + 1, '<', '&') + 1;
486 | chunk = text;
487 |
488 | NSString* fragment = [source substringWithRange: NSMakeRange(buffer.rangeToBuffer.location + index, MIN(8, [source length] - buffer.rangeToBuffer.location + index))];
489 | [parser info: [NSString stringWithFormat: @"Unable to parse '%@' as %@", fragment, [[partialChunk class] humanName]] atIndex: buffer.rangeToBuffer.location + index];
490 | }
491 |
492 | // hand the chunk to the delgate
493 | chunk.range = NSMakeRange(index + buffer.rangeToBuffer.location, len);
494 | // NSLog(@"%@: %@", [[chunk class] humanName], [source substringWithRange: chunk.range]);
495 | chunk.buffer = &buffer;
496 | delegateWantsToContinue = [delegate performSelector: selector withObject: chunk withObject: context] != nil;
497 | index += len;
498 |
499 | assert(index > 0);
500 | }
501 | }
502 |
503 | if (!delegateWantsToContinue)
504 | [parser info: @"delegate stopped the parsing" atIndex: buffer.rangeToBuffer.location + index];
505 |
506 | [tag release];
507 | [comment release];
508 | [entity release];
509 | [pi release];
510 | [cdata release];
511 | [doctype release];
512 | [text release];
513 |
514 | *sourceIndex = index + buffer.rangeToBuffer.location;
515 | [pool release];
516 | }
517 |
518 |
519 | +(void)parseHTML:(NSString*) source delegate:(id)delegate selector:(SEL)selector context: (void*) context{
520 | int index = 0;
521 | [self parseHTML: source delegate: delegate selector: selector context: context index: &index partial: NO];
522 | NSAssert2(index == [source length], @"%i != %i", index, [source length]);
523 | }
524 |
525 | typedef struct{
526 | NSMutableString* result;
527 | unichar* outBuffer;
528 | int outBufferLength;
529 | int writeIndex;
530 | BOOL inScriptElement;
531 | BOOL inWhite;
532 | BOOL inPara;
533 | } StripTagsContext;
534 |
535 |
536 | -(NSString*)stripTags{
537 | NSMutableString* result = [NSMutableString stringWithCapacity: [self length]];
538 | StripTagsContext context;
539 | context.result = result;
540 | context.outBufferLength = MIN([self length], OUT_BUFFER_LENGTH);
541 | context.outBuffer = malloc(sizeof(unichar) * context.outBufferLength);
542 | context.writeIndex = 0;
543 | context.inScriptElement = NO;
544 | context.inWhite = YES;
545 | context.inPara = YES;
546 |
547 | [NSString parseHTML: self delegate: self selector:@selector(chunk:context:) context: &context];
548 |
549 | if (context.writeIndex > 0)
550 | CFStringAppendCharacters((CFMutableStringRef)result, context.outBuffer, context.writeIndex);
551 |
552 | free(context.outBuffer);
553 |
554 | return result;
555 | }
556 |
557 | -(id)chunk:(Chunk*)chunk context:(StripTagsContext*)context{
558 | //write the outBuffer if there isn't enough room for the whole chunk
559 | if (context->writeIndex + chunk.range.length > context->outBufferLength){
560 | CFStringAppendCharacters((CFMutableStringRef)context->result, context->outBuffer, context->writeIndex);
561 | context->writeIndex = 0;
562 | if (chunk.range.length > context->outBufferLength){
563 | // need to grow buffer
564 | free(context->outBuffer);
565 | context->outBufferLength = chunk.range.length;
566 | context->outBuffer = malloc(sizeof(unichar) * context->outBufferLength);
567 | }
568 | }
569 | assert(context->writeIndex + chunk.range.length <= context->outBufferLength);
570 |
571 | CFRange bufferRangeToAppend = CFRangeMake(0, 0);
572 | CFStringInlineBuffer* buffer = chunk.buffer;
573 |
574 | if ([chunk isKind: ChunkKindTag]){
575 | TagChunk* tag = (TagChunk*)chunk;
576 | if (context->inScriptElement == YES){
577 | if ([tag tagNameEquals: @"/script"])
578 | context->inScriptElement = NO;
579 | }
580 | else if ([tag tagNameEquals: @"script"])
581 | context->inScriptElement = YES;
582 | else if ([tag emitsNewLineInContents]){
583 | if (!context->inPara){//dont do double paras
584 | context->outBuffer[context->writeIndex++] = '\n';
585 | context->outBuffer[context->writeIndex++] = '\n';
586 | context->inWhite = YES;
587 | context->inPara = YES;
588 | }
589 | }
590 | }
591 | else if (context->inScriptElement == YES)
592 | ; // do nothing
593 | else if ([chunk isKind: ChunkKindText]){
594 | bufferRangeToAppend = chunk.rangeInBuffer;
595 | }
596 | else if ([chunk isKind: ChunkKindCData]){
597 | bufferRangeToAppend = [chunk interiorRangeInBuffer];
598 | }
599 | else if ([chunk isKind: ChunkKindEntity]){
600 | CFRange rangeInBuffer = [chunk rangeInBuffer];
601 | unichar entity = parseEntity(chunk.buffer, rangeInBuffer.location, &rangeInBuffer.length);
602 | if (entity){
603 | context->outBuffer[context->writeIndex++] = entity;
604 | context->inWhite = NO;
605 | }
606 | else{
607 | //we regurgitate unrecognized entities
608 | bufferRangeToAppend = rangeInBuffer;
609 | }
610 | }
611 |
612 | int maxBufferIndex = bufferRangeToAppend.location + bufferRangeToAppend.length;
613 | for (int bufferIndex = bufferRangeToAppend.location; bufferIndex < maxBufferIndex; bufferIndex ++){
614 | unichar c = CFStringGetCharacterFromInlineBuffer(buffer, bufferIndex);
615 | if (c <= 32){
616 | if (!context->inWhite)
617 | context->outBuffer[context->writeIndex++] = 32;
618 | context->inWhite = YES;
619 | }
620 | else{
621 | context->outBuffer[context->writeIndex++] = c;
622 | context->inWhite = NO;
623 | context->inPara = NO;
624 | }
625 | }
626 | return self;
627 | }
628 |
629 | -(Element*)element{
630 | CFStringInlineBuffer buffer;
631 | CFStringInitInlineBuffer((CFStringRef)self, &buffer, CFRangeMake(0, [self length]));
632 | int len = lenToken(&buffer, 1);
633 | NSString* tagName = createStringFromBuffer(&buffer, 1, len);
634 | Element* result = [[[Element alloc] initWithString: self range: NSMakeRange(0, [self length]) tagName: tagName] autorelease];
635 | [tagName release];
636 | return result;
637 | }
638 |
639 |
640 | -(NSString*)stringByAddingPercentEscaping{
641 | return [(NSString*)CFURLCreateStringByAddingPercentEscapes(NULL, (CFStringRef)self, NULL, NULL, kCFStringEncodingUTF8) autorelease];
642 | }
643 |
644 | -(NSString*)stringByRemovingPercentEscaping{
645 | return [(NSString*)CFURLCreateStringByReplacingPercentEscapes(NULL, (CFStringRef)self, NULL) autorelease];
646 | }
647 |
648 | // TODO Handle different encodings
649 | + (NSStringEncoding) encodingForContentType:(NSString *)contentType{
650 | if ([contentType rangeOfString: @"utf-8" options: NSCaseInsensitiveSearch].location != NSNotFound)
651 | return NSUTF8StringEncoding;
652 | else if ([contentType rangeOfString: @"iso-8859-1" options: NSCaseInsensitiveSearch].location != NSNotFound)
653 | return NSISOLatin1StringEncoding;
654 | else if ([contentType rangeOfString: @"windows-1252" options: NSCaseInsensitiveSearch].location != NSNotFound)
655 | return NSWindowsCP1252StringEncoding;
656 | else if ([contentType rangeOfString: @"encoding=" options: NSCaseInsensitiveSearch].location != NSNotFound)
657 | NSLog(@"unknown encoding: %@", contentType);
658 | return NSISOLatin1StringEncoding;
659 | }
660 |
661 |
662 | @end
663 |
--------------------------------------------------------------------------------
/Classes/ProcessingInstructionChunk.h:
--------------------------------------------------------------------------------
1 | //
2 | // ProcessingInstructionChunk.h
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/21/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import
26 | #import "Chunk.h"
27 |
28 | /**
29 | ProcessingInstructionChunk corresponds to a Processing Instruction (e.g. dfhjdfhj >)
30 | */
31 |
32 | @interface ProcessingInstructionChunk : Chunk {
33 |
34 | }
35 |
36 | @end
37 |
--------------------------------------------------------------------------------
/Classes/ProcessingInstructionChunk.m:
--------------------------------------------------------------------------------
1 | //
2 | // ProcessingInstructionChunk.m
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/21/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import "ProcessingInstructionChunk.h"
26 |
27 |
28 | @implementation ProcessingInstructionChunk
29 |
30 | -(NSRange)interiorRange{
31 | return NSMakeRange(range.location + 2, range.length - 3);
32 | }
33 |
34 | -(NSString*)kind{
35 | return ChunkKindPI;
36 | }
37 |
38 | +(NSString*)humanName{
39 | return @"processing instruction";
40 | }
41 |
42 | @end
43 |
--------------------------------------------------------------------------------
/Classes/TagChunk.h:
--------------------------------------------------------------------------------
1 | //
2 | // TagChunk.h
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/21/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import
26 | #import "Chunk.h"
27 |
28 |
29 | /**
30 | TagChunk corresponds to a tag (e.g. ). It may be an open, close or empty tag. It includes
31 | the text of the attributes.
32 | */
33 |
34 | @interface TagChunk : Chunk {
35 | NSString* tagName;
36 | NSStringCompareOptions compareOptions;
37 | }
38 |
39 | /**
40 | The name of the tag. include leading '/' for close tags
41 | */
42 | @property (nonatomic, retain) NSString* tagName;
43 |
44 |
45 | /**
46 | Determines if tagName comparisons aer case sensative (XML) or not (HTML).
47 | */
48 | @property BOOL caseSensative;
49 |
50 | /**
51 | Use this initializer when the tagname has already been created as a string to reduce object allocations
52 | */
53 | -(id)initWithString: (NSString*)aSource range:(NSRange)aRange tagName:(NSString*)aTagName;
54 |
55 | /**
56 | A tag that ends with '/>'
57 | */
58 | -(BOOL)isEmptyTag;
59 |
60 |
61 | /**
62 | A tag that starts with ''
63 | */
64 | -(BOOL)isCloseTag;
65 |
66 |
67 | /**
68 | Is this a close tag version of aTag
69 | */
70 | -(BOOL)closesTag:(TagChunk*)aTag;
71 |
72 |
73 | /**
74 | The name of the tag e.g. 'body'
75 | */
76 | -(NSString*)tagName;
77 |
78 |
79 | /**
80 | Does the proper tag name comparision (ie case sensative or not)
81 | */
82 | -(BOOL)tagNameEquals:(NSString*)anotherTagName;
83 |
84 |
85 | /**
86 | When reducing to plain text, is this one of the tags that should emit a new line?
87 | True for and tags.
88 | */
89 | -(BOOL)emitsNewLineInContents;
90 |
91 | @end
92 |
--------------------------------------------------------------------------------
/Classes/TagChunk.m:
--------------------------------------------------------------------------------
1 | //
2 | // TagChunk.m
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/21/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import "TagChunk.h"
26 | #import "Element.h"
27 | #import "NSString_HTML.h"
28 |
29 |
30 | @interface TagChunk()
31 | @property NSStringCompareOptions compareOptions;
32 | @end
33 |
34 | @implementation TagChunk
35 |
36 | @synthesize tagName, compareOptions;
37 |
38 | -(id)initWithString: (NSString*)aSource range:(NSRange)aRange{
39 | assert(NO);
40 | }
41 |
42 | -(id)initWithString: (NSString*)aSource range:(NSRange)aRange tagName:(NSString*)aTagName{
43 | source = [aSource retain];
44 | range = aRange;
45 | tagName = [aTagName retain];
46 | compareOptions = NSCaseInsensitiveSearch;
47 | return self;
48 | }
49 |
50 | -(void)dealloc{
51 | [tagName release];
52 | [super dealloc];
53 | }
54 |
55 | -(NSRange)interiorRange{
56 | return NSMakeRange(range.location +1, range.length - 2);
57 | }
58 |
59 | -(NSString*)kind{
60 | return ChunkKindTag;
61 | }
62 |
63 | -(BOOL)isEmptyTag{
64 | return [source characterAtIndex: range.location + range.length - 2] == '/';
65 | }
66 |
67 | -(BOOL)isCloseTag{
68 | return [source characterAtIndex: range.location + 1] == '/';
69 | }
70 |
71 | -(BOOL)closesTag:(TagChunk*)aTag{
72 | NSComparisonResult result = [[self tagName] compare: [aTag tagName]
73 | options: compareOptions
74 | range: NSMakeRange(1, [[self tagName] length] - 1)];
75 | return result == NSOrderedSame;
76 | }
77 |
78 | -(BOOL)tagNameEquals:(NSString*)anotherTagName{
79 | NSComparisonResult result = [[self tagName] compare: anotherTagName options: compareOptions];
80 | return result == NSOrderedSame;
81 | }
82 |
83 | -(BOOL)emitsNewLineInContents{
84 | return [self tagNameEquals: @"p"] || [self tagNameEquals: @"br"];
85 | }
86 |
87 |
88 | -(void)setRange: (NSRange)aRange{
89 | range = aRange;
90 | }
91 |
92 | -(BOOL)caseSensative{
93 | return compareOptions == NSLiteralSearch;
94 | }
95 |
96 | -(void)setCaseSensative:(BOOL)flag{
97 | compareOptions = (flag) ? NSLiteralSearch : NSCaseInsensitiveSearch;
98 | }
99 |
100 | -(NSString*)description{
101 | return [source substringWithRange: range];
102 | }
103 |
104 | -(NSString*)tagName{
105 | assert(tagName);
106 | return tagName;
107 | }
108 |
109 | +(NSString*)humanName{
110 | return @"tag";
111 | }
112 |
113 | @end
114 |
--------------------------------------------------------------------------------
/Classes/TxtChunk.h:
--------------------------------------------------------------------------------
1 | //
2 | // TxtChunk.h
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/21/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import
26 | #import "Chunk.h"
27 |
28 |
29 | /**
30 | Represents a chunk of text. Note that we don't distinguish between ignoreable whitespace or not...
31 | */
32 | @interface TxtChunk : Chunk {
33 |
34 | }
35 |
36 | @end
37 |
--------------------------------------------------------------------------------
/Classes/TxtChunk.m:
--------------------------------------------------------------------------------
1 | //
2 | // TxtChunk.m
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/21/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import "TxtChunk.h"
26 |
27 |
28 | @implementation TxtChunk
29 |
30 | -(NSString*)kind{
31 | return ChunkKindText;
32 | }
33 |
34 | +(NSString*)humanName{
35 | return @"text";
36 | }
37 |
38 | @end
39 |
--------------------------------------------------------------------------------
/Classes/URLParser.h:
--------------------------------------------------------------------------------
1 | //
2 | // URLParser.h
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/25/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import
26 | #import "ElementParser.h"
27 |
28 |
29 | @interface URLParser : NSObject {
30 | NSError* lastError;
31 | NSURLConnection* connection;
32 | ElementParser* parser;
33 | NSString* contentType;
34 | NSStringEncoding encoding;
35 | NSObject* connectionDelegate;
36 | NSMutableData* partialStringData;
37 | }
38 |
39 | @property(retain, nonatomic) NSObject* connectionDelegate;
40 | @property(retain, readonly) NSURLConnection* connection;
41 | @property(retain, readonly) ElementParser* parser;
42 | @property(retain, nonatomic) NSError* lastError;
43 | @property(retain, nonatomic) NSString* contentType;
44 | @property NSStringEncoding encoding;
45 | @property (retain, nonatomic) NSMutableData* partialStringData;
46 |
47 |
48 | -(id)initWithCallbackDelegate:(id)delegate;
49 | -(void)performSelector:(SEL)method forElementsMatching:(NSString*)cssSelector;
50 | -(void)parseURL:(NSURL*) url;
51 | -(void)cancelLoading;
52 |
53 | @end
54 |
--------------------------------------------------------------------------------
/Classes/URLParser.m:
--------------------------------------------------------------------------------
1 | //
2 | // URLParser.m
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/25/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import "URLParser.h"
26 | #import "NSString_HTML.h"
27 |
28 | @implementation URLParser
29 |
30 | @synthesize parser, connection, lastError, contentType, encoding, connectionDelegate, partialStringData;
31 |
32 | - (id)initWithCallbackDelegate:(id)delegate{
33 | parser = [[ElementParser alloc] init];
34 | parser.delegate = delegate;
35 | encoding = NSISOLatin1StringEncoding;
36 | return self;
37 | }
38 |
39 | -(void) dealloc{
40 | [connection cancel];
41 | [connection release];
42 | [parser release];
43 | [lastError release];
44 | [partialStringData release];
45 | [super dealloc];
46 | }
47 |
48 | -(void)parseURL:(NSURL*) url{
49 | NSURLRequest* request = [[NSURLRequest alloc] initWithURL: url];
50 | connection = [[NSURLConnection alloc] initWithRequest:request delegate:self];
51 | [request release];
52 | [parser beginParsing];
53 | }
54 |
55 |
56 | -(void)performSelector:(SEL)method forElementsMatching:(NSString*)cssSelector{
57 | [parser performSelector: method forElementsMatching: cssSelector];
58 | }
59 |
60 | -(void)cancelLoading{
61 | [connection cancel];
62 | }
63 |
64 |
65 | #pragma mark NSURLConnection Delegate methods
66 |
67 | - (void)connection:(NSURLConnection *)aConnection didReceiveResponse:(NSURLResponse *)response{
68 | assert(aConnection = connection);
69 | NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
70 | if ([response respondsToSelector: @selector(allHeaderFields)]){
71 | self.contentType = [[(NSHTTPURLResponse*)response allHeaderFields] valueForKey: @"Content-Type"];
72 | encoding = [NSString encodingForContentType: contentType];
73 | if ([contentType rangeOfString: @"html" options: NSCaseInsensitiveSearch].location != NSNotFound)
74 | parser.mode = ElementParserModeHTML;
75 | else
76 | parser.mode = ElementParserModeXML;
77 | }
78 | if ([connectionDelegate respondsToSelector:@selector(connection:didReceiveResponse:)])
79 | [connectionDelegate connection:connection didReceiveResponse: response];
80 | [pool release];
81 | }
82 |
83 | - (void)connection:(NSURLConnection *)aConnection didFailWithError:(NSError *)error {
84 | assert(aConnection = connection);
85 | NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
86 | self.lastError = error;
87 | [connection cancel];
88 | if ([connectionDelegate respondsToSelector:@selector(connection:didFailWithError:)])
89 | [connectionDelegate connection:connection didFailWithError: error];
90 | [pool release];
91 | }
92 |
93 | - (void)connection:(NSURLConnection *)aConnection didReceiveData:(NSData *)data {
94 | NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
95 | if (partialStringData){
96 | [partialStringData appendData: data];
97 | data = partialStringData;
98 | }
99 | int less;
100 | NSString* moreSource = nil;
101 | for (less = 0; less <= 3 && !moreSource; less++)
102 | moreSource = [[NSString alloc] initWithBytes: data.bytes length: (data.length - less) encoding: encoding];
103 | NSAssert(moreSource, @"unable to make string from data");
104 | if (--less){//decrement b/c we incremented before loop exit test
105 | char* charPtr = (char*) data.bytes;
106 | unichar c = *(charPtr + data.length - less);
107 | NSLog(@"Partial string received storing %i bytes, first char=%i", less, c);
108 | self.partialStringData = [[NSMutableData alloc] initWithBytes: charPtr + (data.length - less) length: less];
109 | [partialStringData release]; // setter has retained it
110 | }
111 | [parser continueParsingString: moreSource];
112 | [moreSource release];
113 | if ([connectionDelegate respondsToSelector:@selector(connection:didReceiveData:)])
114 | [connectionDelegate connection:connection didReceiveData: data];
115 | [pool release];
116 | }
117 |
118 | - (void)connectionDidFinishLoading:(NSURLConnection *)aConnection {
119 | NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
120 | [parser finishParsing];
121 | if ([connectionDelegate respondsToSelector:@selector(connectionDidFinishLoading:)])
122 | [connectionDelegate connectionDidFinishLoading:connection];
123 | [pool release];
124 | }
125 |
126 |
127 | @end
128 |
--------------------------------------------------------------------------------
/Code Overview.txt:
--------------------------------------------------------------------------------
1 | Code Overview
2 |
3 | This overview will introduce you to the major classes of ElementParser and give you enough information to find your way around the code. It complements the read me document which has some simple usage examples to get you started quickly.
4 |
5 | ElementParser is architected in layers. The lowest layer is a simple-minded parser that spins through an NSString and spits out (ie hands to the delegate) "chunks" corresponding to things like tags, comments, entities, etc. It is defined in the HTML category of NSString and looks like this:
6 |
7 | +(void)parseHTML:(NSString*) source
8 | delegate:(id)delegate
9 | selector:(SEL)selector
10 | context: (void*) context;
11 |
12 | You'll notice that many of the classes in are just different flavors of Chunks to be served up to the delegate of the parseHTML method. These include: Chunk, CDataChunk, DoctypeChunk, CommentChunk, EntityChunk, TagChunk and TxtChunk.
13 |
14 | The ElementParser class itself is the namesake of the library. It uses the low-level chunk parser to build a tree of Element objects.
15 |
16 | Element is the basic building block for the document tree. Each of these correspond to an element (ie tag) in the document. Unlike other DOM builders, only Elements are modeled in the tree... everything else (even attributes) are parsed out only when needed.
17 |
18 | ElementParser supports one-shot parsing like:
19 |
20 | -(DocumentRoot*)parseHTML:(NSString*)source;
21 |
22 | and
23 |
24 | -(DocumentRoot*)parseXML:(NSString*)source;
25 |
26 | and it support incremental parsing with:
27 |
28 | -(DocumentRoot*)beginParsing;
29 | -(void)continueParsingString:(NSString*)string;
30 | -(void)finishParsing;
31 |
32 |
33 | Note: DocumentRoot is a subclass of Element (see below) to hold all the top-level Elements in the document.
34 |
35 | One of the most powerful parts of ElementParser is its ability to fire high-level callbacks into your code when css-style selectors are matched. You register these with the ElementParser before parsing and then are handed matching elements as they come across.
36 |
37 | URLParser ties an NSURLConnection and an ElementParser together to provide a very convenient way to parse documents out in the world leveraging the incremental features of the parser.
38 |
39 | CSSSelector and CSSSelectorPart are used to model a CSS selector. These provide a lightweight way to access elements within an XML or HTML document.
40 |
41 | CSSSelectorMatcher and CSSPartMatcher are used during the evaluation of a CSS selector against an Element tree.
42 |
43 | That's it. Comments welcome at feedback@touchtankapps.com.
--------------------------------------------------------------------------------
/Demo/Classes/DemoAppDelegate.h:
--------------------------------------------------------------------------------
1 | //
2 | // DemoAppDelegate.h
3 | // Demo
4 | //
5 | // Created by Lee Buck on 8/23/09.
6 | // Copyright Blue Bright Ventures 2009. All rights reserved.
7 | //
8 |
9 | #import
10 |
11 | @class DemoViewController;
12 |
13 | @interface DemoAppDelegate : NSObject {
14 | UIWindow *window;
15 | DemoViewController *viewController;
16 | }
17 |
18 | @property (nonatomic, retain) IBOutlet UIWindow *window;
19 | @property (nonatomic, retain) IBOutlet DemoViewController *viewController;
20 |
21 | @end
22 |
23 |
--------------------------------------------------------------------------------
/Demo/Classes/DemoAppDelegate.m:
--------------------------------------------------------------------------------
1 | //
2 | // DemoAppDelegate.m
3 | // Demo
4 | //
5 | // Created by Lee Buck on 8/23/09.
6 | // Copyright Blue Bright Ventures 2009. All rights reserved.
7 | //
8 |
9 | #import "DemoAppDelegate.h"
10 | #import "DemoViewController.h"
11 |
12 | @implementation DemoAppDelegate
13 |
14 | @synthesize window;
15 | @synthesize viewController;
16 |
17 |
18 | - (void)applicationDidFinishLaunching:(UIApplication *)application {
19 |
20 | // Override point for customization after app launch
21 | [window addSubview:viewController.view];
22 | [window makeKeyAndVisible];
23 | }
24 |
25 |
26 | - (void)dealloc {
27 | [viewController release];
28 | [window release];
29 | [super dealloc];
30 | }
31 |
32 |
33 | @end
34 |
--------------------------------------------------------------------------------
/Demo/Classes/DemoViewController.h:
--------------------------------------------------------------------------------
1 | //
2 | // DemoViewController.h
3 | // Demo
4 | //
5 | // Created by Lee Buck on 8/23/09.
6 | // Copyright Blue Bright Ventures 2009. All rights reserved.
7 | //
8 |
9 | #import
10 |
11 | @interface DemoViewController : UIViewController {
12 |
13 | IBOutlet UITextView* textView;
14 | IBOutlet UISegmentedControl* segmentControl;
15 | NSString* source;
16 | NSString* pattern;
17 | NSString* result;
18 | int selectedIndex;
19 | }
20 |
21 | @property (nonatomic, retain) NSString* source;
22 | @property (nonatomic, retain) NSString* pattern;
23 | @property (nonatomic, retain) NSString* result;
24 |
25 | -(IBAction)updateView:(id)sender;
26 |
27 | @end
28 |
29 |
--------------------------------------------------------------------------------
/Demo/Classes/DemoViewController.m:
--------------------------------------------------------------------------------
1 | //
2 | // DemoViewController.m
3 | // Demo
4 | //
5 | // Created by Lee Buck on 8/23/09.
6 | // Copyright Blue Bright Ventures 2009. All rights reserved.
7 | //
8 |
9 | #import "DemoViewController.h"
10 | #import "Element.h"
11 | #import "DocumentRoot.h"
12 |
13 | @implementation DemoViewController
14 |
15 | @synthesize source, pattern, result;
16 |
17 | /*
18 | // The designated initializer. Override to perform setup that is required before the view is loaded.
19 | - (id)initWithNibName:(NSString *)nibNameOrNil bundle:(NSBundle *)nibBundleOrNil {
20 | if (self = [super initWithNibName:nibNameOrNil bundle:nibBundleOrNil]) {
21 | // Custom initialization
22 | }
23 | return self;
24 | }
25 | */
26 |
27 | /*
28 | // Implement loadView to create a view hierarchy programmatically, without using a nib.
29 | - (void)loadView {
30 | }
31 | */
32 |
33 |
34 |
35 | // Implement viewDidLoad to do additional setup after loading the view, typically from a nib.
36 | - (void)viewDidLoad {
37 | [super viewDidLoad];
38 | /* pre populate source with the source.html file */
39 | NSString* path = [[NSBundle mainBundle] pathForResource: @"source2" ofType: @"html"];
40 | NSStringEncoding encoding;
41 | self.source = [NSString stringWithContentsOfFile: path usedEncoding: &encoding error: NULL];
42 | textView.text = self.source;
43 | self.pattern = @"*";
44 | }
45 |
46 |
47 |
48 | /*
49 | // Override to allow orientations other than the default portrait orientation.
50 | - (BOOL)shouldAutorotateToInterfaceOrientation:(UIInterfaceOrientation)interfaceOrientation {
51 | // Return YES for supported orientations
52 | return (interfaceOrientation == UIInterfaceOrientationPortrait);
53 | }
54 | */
55 |
56 | - (void)didReceiveMemoryWarning {
57 | // Releases the view if it doesn't have a superview.
58 | [super didReceiveMemoryWarning];
59 |
60 | // Release any cached data, images, etc that aren't in use.
61 | }
62 |
63 | - (void)viewDidUnload {
64 | // Release any retained subviews of the main view.
65 | // e.g. self.myOutlet = nil;
66 | }
67 |
68 |
69 | - (void)dealloc {
70 | [source dealloc];
71 | [pattern dealloc];
72 | [result dealloc];
73 | [super dealloc];
74 | }
75 |
76 | -(NSString*)matchResult{
77 | DocumentRoot* document = [Element parseHTML: source];
78 | NSArray* elements = [document selectElements: pattern];
79 | NSMutableArray* results = [NSMutableArray array];
80 | for (Element* element in elements){
81 | NSString* snipet = [element contentsSource];
82 | snipet = ([snipet length] > 5) ? [snipet substringToIndex: 5] : snipet;
83 | snipet = [[element description] stringByAppendingFormat: @"%@...", snipet];
84 | [results addObject: snipet];
85 | }
86 | return [results componentsJoinedByString: @"\n—————————————————\n"];
87 | }
88 |
89 | -(IBAction)updateView:(id)sender{
90 | if (selectedIndex == [segmentControl selectedSegmentIndex]) return;
91 |
92 | if (selectedIndex == 0){
93 | self.source = [textView text];
94 | }
95 | else if (selectedIndex == 1){
96 | self.pattern = [textView text];
97 | }
98 | else if (selectedIndex == 2){
99 | }
100 |
101 | if ([segmentControl selectedSegmentIndex] == 0) {
102 | textView.text = self.source;
103 | textView.editable = YES;
104 | }
105 | else if ([segmentControl selectedSegmentIndex] == 1) {
106 | textView.text = self.pattern;
107 | textView.editable = YES;
108 | }
109 | else if ([segmentControl selectedSegmentIndex] == 2) {
110 | textView.editable = NO;
111 | textView.text = [self matchResult];
112 | }
113 | selectedIndex = [segmentControl selectedSegmentIndex];
114 | }
115 |
116 |
117 | @end
118 |
--------------------------------------------------------------------------------
/Demo/Demo-Info.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | CFBundleDevelopmentRegion
6 | English
7 | CFBundleDisplayName
8 | ${PRODUCT_NAME}
9 | CFBundleExecutable
10 | ${EXECUTABLE_NAME}
11 | CFBundleIconFile
12 |
13 | CFBundleIdentifier
14 | com.yourcompany.${PRODUCT_NAME:rfc1034identifier}
15 | CFBundleInfoDictionaryVersion
16 | 6.0
17 | CFBundleName
18 | ${PRODUCT_NAME}
19 | CFBundlePackageType
20 | APPL
21 | CFBundleSignature
22 | ????
23 | CFBundleVersion
24 | 1.0
25 | LSRequiresIPhoneOS
26 |
27 | NSMainNibFile
28 | MainWindow
29 |
30 |
31 |
--------------------------------------------------------------------------------
/Demo/Demo.xcodeproj/project.pbxproj:
--------------------------------------------------------------------------------
1 | // !$*UTF8*$!
2 | {
3 | archiveVersion = 1;
4 | classes = {
5 | };
6 | objectVersion = 45;
7 | objects = {
8 |
9 | /* Begin PBXBuildFile section */
10 | 1D3623260D0F684500981E51 /* DemoAppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 1D3623250D0F684500981E51 /* DemoAppDelegate.m */; };
11 | 1D60589B0D05DD56006BFB54 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 29B97316FDCFA39411CA2CEA /* main.m */; };
12 | 1D60589F0D05DD5A006BFB54 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 1D30AB110D05D00D00671497 /* Foundation.framework */; };
13 | 1DF5F4E00D08C38300B7A737 /* UIKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 1DF5F4DF0D08C38300B7A737 /* UIKit.framework */; };
14 | 288765A50DF7441C002DB57D /* CoreGraphics.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 288765A40DF7441C002DB57D /* CoreGraphics.framework */; };
15 | 2899E5220DE3E06400AC0155 /* DemoViewController.xib in Resources */ = {isa = PBXBuildFile; fileRef = 2899E5210DE3E06400AC0155 /* DemoViewController.xib */; };
16 | 28AD733F0D9D9553002E5188 /* MainWindow.xib in Resources */ = {isa = PBXBuildFile; fileRef = 28AD733E0D9D9553002E5188 /* MainWindow.xib */; };
17 | 28D7ACF80DDB3853001CB0EB /* DemoViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = 28D7ACF70DDB3853001CB0EB /* DemoViewController.m */; };
18 | DC65742F1042E7C500BE3D62 /* CDataChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = DC65740E1042E7C500BE3D62 /* CDataChunk.m */; };
19 | DC6574301042E7C500BE3D62 /* Chunk.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574101042E7C500BE3D62 /* Chunk.m */; };
20 | DC6574311042E7C500BE3D62 /* CommentChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574121042E7C500BE3D62 /* CommentChunk.m */; };
21 | DC6574321042E7C500BE3D62 /* CSSPartMatcher.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574141042E7C500BE3D62 /* CSSPartMatcher.m */; };
22 | DC6574331042E7C500BE3D62 /* CSSSelector.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574161042E7C500BE3D62 /* CSSSelector.m */; };
23 | DC6574341042E7C500BE3D62 /* CSSSelectorMatcher.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574181042E7C500BE3D62 /* CSSSelectorMatcher.m */; };
24 | DC6574351042E7C500BE3D62 /* CSSSelectorPart.m in Sources */ = {isa = PBXBuildFile; fileRef = DC65741A1042E7C500BE3D62 /* CSSSelectorPart.m */; };
25 | DC6574361042E7C500BE3D62 /* DoctypeChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = DC65741C1042E7C500BE3D62 /* DoctypeChunk.m */; };
26 | DC6574371042E7C500BE3D62 /* DocumentRoot.m in Sources */ = {isa = PBXBuildFile; fileRef = DC65741E1042E7C500BE3D62 /* DocumentRoot.m */; };
27 | DC6574381042E7C500BE3D62 /* Element.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574201042E7C500BE3D62 /* Element.m */; };
28 | DC6574391042E7C500BE3D62 /* ElementParser.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574221042E7C500BE3D62 /* ElementParser.m */; };
29 | DC65743A1042E7C500BE3D62 /* EntityChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574241042E7C500BE3D62 /* EntityChunk.m */; };
30 | DC65743B1042E7C500BE3D62 /* NSString_HTML.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574261042E7C500BE3D62 /* NSString_HTML.m */; };
31 | DC65743C1042E7C500BE3D62 /* ProcessingInstructionChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574281042E7C500BE3D62 /* ProcessingInstructionChunk.m */; };
32 | DC65743D1042E7C500BE3D62 /* TagChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = DC65742A1042E7C500BE3D62 /* TagChunk.m */; };
33 | DC65743E1042E7C500BE3D62 /* TxtChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = DC65742C1042E7C500BE3D62 /* TxtChunk.m */; };
34 | DC65743F1042E7C500BE3D62 /* URLParser.m in Sources */ = {isa = PBXBuildFile; fileRef = DC65742E1042E7C500BE3D62 /* URLParser.m */; };
35 | DC903C0610777B8000F65535 /* source.html in Resources */ = {isa = PBXBuildFile; fileRef = DC903C0510777B8000F65535 /* source.html */; };
36 | DCBCF2791078D72F00B671ED /* source2.html in Resources */ = {isa = PBXBuildFile; fileRef = DCBCF2781078D72F00B671ED /* source2.html */; };
37 | /* End PBXBuildFile section */
38 |
39 | /* Begin PBXFileReference section */
40 | 1D30AB110D05D00D00671497 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; };
41 | 1D3623240D0F684500981E51 /* DemoAppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = DemoAppDelegate.h; sourceTree = ""; };
42 | 1D3623250D0F684500981E51 /* DemoAppDelegate.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = DemoAppDelegate.m; sourceTree = ""; };
43 | 1D6058910D05DD3D006BFB54 /* Demo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = Demo.app; sourceTree = BUILT_PRODUCTS_DIR; };
44 | 1DF5F4DF0D08C38300B7A737 /* UIKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = UIKit.framework; path = System/Library/Frameworks/UIKit.framework; sourceTree = SDKROOT; };
45 | 288765A40DF7441C002DB57D /* CoreGraphics.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreGraphics.framework; path = System/Library/Frameworks/CoreGraphics.framework; sourceTree = SDKROOT; };
46 | 2899E5210DE3E06400AC0155 /* DemoViewController.xib */ = {isa = PBXFileReference; lastKnownFileType = file.xib; path = DemoViewController.xib; sourceTree = ""; };
47 | 28AD733E0D9D9553002E5188 /* MainWindow.xib */ = {isa = PBXFileReference; lastKnownFileType = file.xib; path = MainWindow.xib; sourceTree = ""; };
48 | 28D7ACF60DDB3853001CB0EB /* DemoViewController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = DemoViewController.h; sourceTree = ""; };
49 | 28D7ACF70DDB3853001CB0EB /* DemoViewController.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = DemoViewController.m; sourceTree = ""; };
50 | 29B97316FDCFA39411CA2CEA /* main.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = ""; };
51 | 32CA4F630368D1EE00C91783 /* Demo_Prefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Demo_Prefix.pch; sourceTree = ""; };
52 | 8D1107310486CEB800E47090 /* Demo-Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = "Demo-Info.plist"; plistStructureDefinitionIdentifier = "com.apple.xcode.plist.structure-definition.iphone.info-plist"; sourceTree = ""; };
53 | DC65740D1042E7C500BE3D62 /* CDataChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CDataChunk.h; path = ../Classes/CDataChunk.h; sourceTree = SOURCE_ROOT; };
54 | DC65740E1042E7C500BE3D62 /* CDataChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CDataChunk.m; path = ../Classes/CDataChunk.m; sourceTree = SOURCE_ROOT; };
55 | DC65740F1042E7C500BE3D62 /* Chunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Chunk.h; path = ../Classes/Chunk.h; sourceTree = SOURCE_ROOT; };
56 | DC6574101042E7C500BE3D62 /* Chunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = Chunk.m; path = ../Classes/Chunk.m; sourceTree = SOURCE_ROOT; };
57 | DC6574111042E7C500BE3D62 /* CommentChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CommentChunk.h; path = ../Classes/CommentChunk.h; sourceTree = SOURCE_ROOT; };
58 | DC6574121042E7C500BE3D62 /* CommentChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CommentChunk.m; path = ../Classes/CommentChunk.m; sourceTree = SOURCE_ROOT; };
59 | DC6574131042E7C500BE3D62 /* CSSPartMatcher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSPartMatcher.h; path = ../Classes/CSSPartMatcher.h; sourceTree = SOURCE_ROOT; };
60 | DC6574141042E7C500BE3D62 /* CSSPartMatcher.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CSSPartMatcher.m; path = ../Classes/CSSPartMatcher.m; sourceTree = SOURCE_ROOT; };
61 | DC6574151042E7C500BE3D62 /* CSSSelector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSSelector.h; path = ../Classes/CSSSelector.h; sourceTree = SOURCE_ROOT; };
62 | DC6574161042E7C500BE3D62 /* CSSSelector.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CSSSelector.m; path = ../Classes/CSSSelector.m; sourceTree = SOURCE_ROOT; };
63 | DC6574171042E7C500BE3D62 /* CSSSelectorMatcher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSSelectorMatcher.h; path = ../Classes/CSSSelectorMatcher.h; sourceTree = SOURCE_ROOT; };
64 | DC6574181042E7C500BE3D62 /* CSSSelectorMatcher.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CSSSelectorMatcher.m; path = ../Classes/CSSSelectorMatcher.m; sourceTree = SOURCE_ROOT; };
65 | DC6574191042E7C500BE3D62 /* CSSSelectorPart.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSSelectorPart.h; path = ../Classes/CSSSelectorPart.h; sourceTree = SOURCE_ROOT; };
66 | DC65741A1042E7C500BE3D62 /* CSSSelectorPart.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CSSSelectorPart.m; path = ../Classes/CSSSelectorPart.m; sourceTree = SOURCE_ROOT; };
67 | DC65741B1042E7C500BE3D62 /* DoctypeChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DoctypeChunk.h; path = ../Classes/DoctypeChunk.h; sourceTree = SOURCE_ROOT; };
68 | DC65741C1042E7C500BE3D62 /* DoctypeChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = DoctypeChunk.m; path = ../Classes/DoctypeChunk.m; sourceTree = SOURCE_ROOT; };
69 | DC65741D1042E7C500BE3D62 /* DocumentRoot.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DocumentRoot.h; path = ../Classes/DocumentRoot.h; sourceTree = SOURCE_ROOT; };
70 | DC65741E1042E7C500BE3D62 /* DocumentRoot.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = DocumentRoot.m; path = ../Classes/DocumentRoot.m; sourceTree = SOURCE_ROOT; };
71 | DC65741F1042E7C500BE3D62 /* Element.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Element.h; path = ../Classes/Element.h; sourceTree = SOURCE_ROOT; };
72 | DC6574201042E7C500BE3D62 /* Element.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = Element.m; path = ../Classes/Element.m; sourceTree = SOURCE_ROOT; };
73 | DC6574211042E7C500BE3D62 /* ElementParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ElementParser.h; path = ../Classes/ElementParser.h; sourceTree = SOURCE_ROOT; };
74 | DC6574221042E7C500BE3D62 /* ElementParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = ElementParser.m; path = ../Classes/ElementParser.m; sourceTree = SOURCE_ROOT; };
75 | DC6574231042E7C500BE3D62 /* EntityChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = EntityChunk.h; path = ../Classes/EntityChunk.h; sourceTree = SOURCE_ROOT; };
76 | DC6574241042E7C500BE3D62 /* EntityChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = EntityChunk.m; path = ../Classes/EntityChunk.m; sourceTree = SOURCE_ROOT; };
77 | DC6574251042E7C500BE3D62 /* NSString_HTML.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = NSString_HTML.h; path = ../Classes/NSString_HTML.h; sourceTree = SOURCE_ROOT; };
78 | DC6574261042E7C500BE3D62 /* NSString_HTML.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = NSString_HTML.m; path = ../Classes/NSString_HTML.m; sourceTree = SOURCE_ROOT; };
79 | DC6574271042E7C500BE3D62 /* ProcessingInstructionChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ProcessingInstructionChunk.h; path = ../Classes/ProcessingInstructionChunk.h; sourceTree = SOURCE_ROOT; };
80 | DC6574281042E7C500BE3D62 /* ProcessingInstructionChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = ProcessingInstructionChunk.m; path = ../Classes/ProcessingInstructionChunk.m; sourceTree = SOURCE_ROOT; };
81 | DC6574291042E7C500BE3D62 /* TagChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TagChunk.h; path = ../Classes/TagChunk.h; sourceTree = SOURCE_ROOT; };
82 | DC65742A1042E7C500BE3D62 /* TagChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = TagChunk.m; path = ../Classes/TagChunk.m; sourceTree = SOURCE_ROOT; };
83 | DC65742B1042E7C500BE3D62 /* TxtChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TxtChunk.h; path = ../Classes/TxtChunk.h; sourceTree = SOURCE_ROOT; };
84 | DC65742C1042E7C500BE3D62 /* TxtChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = TxtChunk.m; path = ../Classes/TxtChunk.m; sourceTree = SOURCE_ROOT; };
85 | DC65742D1042E7C500BE3D62 /* URLParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = URLParser.h; path = ../Classes/URLParser.h; sourceTree = SOURCE_ROOT; };
86 | DC65742E1042E7C500BE3D62 /* URLParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = URLParser.m; path = ../Classes/URLParser.m; sourceTree = SOURCE_ROOT; };
87 | DC903C0510777B8000F65535 /* source.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = source.html; sourceTree = ""; };
88 | DCBCF2781078D72F00B671ED /* source2.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = source2.html; sourceTree = ""; };
89 | /* End PBXFileReference section */
90 |
91 | /* Begin PBXFrameworksBuildPhase section */
92 | 1D60588F0D05DD3D006BFB54 /* Frameworks */ = {
93 | isa = PBXFrameworksBuildPhase;
94 | buildActionMask = 2147483647;
95 | files = (
96 | 1D60589F0D05DD5A006BFB54 /* Foundation.framework in Frameworks */,
97 | 1DF5F4E00D08C38300B7A737 /* UIKit.framework in Frameworks */,
98 | 288765A50DF7441C002DB57D /* CoreGraphics.framework in Frameworks */,
99 | );
100 | runOnlyForDeploymentPostprocessing = 0;
101 | };
102 | /* End PBXFrameworksBuildPhase section */
103 |
104 | /* Begin PBXGroup section */
105 | 080E96DDFE201D6D7F000001 /* Classes */ = {
106 | isa = PBXGroup;
107 | children = (
108 | 1D3623240D0F684500981E51 /* DemoAppDelegate.h */,
109 | 1D3623250D0F684500981E51 /* DemoAppDelegate.m */,
110 | 28D7ACF60DDB3853001CB0EB /* DemoViewController.h */,
111 | 28D7ACF70DDB3853001CB0EB /* DemoViewController.m */,
112 | );
113 | path = Classes;
114 | sourceTree = "";
115 | };
116 | 19C28FACFE9D520D11CA2CBB /* Products */ = {
117 | isa = PBXGroup;
118 | children = (
119 | 1D6058910D05DD3D006BFB54 /* Demo.app */,
120 | );
121 | name = Products;
122 | sourceTree = "";
123 | };
124 | 29B97314FDCFA39411CA2CEA /* CustomTemplate */ = {
125 | isa = PBXGroup;
126 | children = (
127 | DC65740C1042E7A500BE3D62 /* ElementParser */,
128 | 080E96DDFE201D6D7F000001 /* Classes */,
129 | 29B97315FDCFA39411CA2CEA /* Other Sources */,
130 | 29B97317FDCFA39411CA2CEA /* Resources */,
131 | 29B97323FDCFA39411CA2CEA /* Frameworks */,
132 | 19C28FACFE9D520D11CA2CBB /* Products */,
133 | );
134 | name = CustomTemplate;
135 | sourceTree = "";
136 | };
137 | 29B97315FDCFA39411CA2CEA /* Other Sources */ = {
138 | isa = PBXGroup;
139 | children = (
140 | 32CA4F630368D1EE00C91783 /* Demo_Prefix.pch */,
141 | 29B97316FDCFA39411CA2CEA /* main.m */,
142 | );
143 | name = "Other Sources";
144 | sourceTree = "";
145 | };
146 | 29B97317FDCFA39411CA2CEA /* Resources */ = {
147 | isa = PBXGroup;
148 | children = (
149 | 2899E5210DE3E06400AC0155 /* DemoViewController.xib */,
150 | DC903C0510777B8000F65535 /* source.html */,
151 | DCBCF2781078D72F00B671ED /* source2.html */,
152 | 28AD733E0D9D9553002E5188 /* MainWindow.xib */,
153 | 8D1107310486CEB800E47090 /* Demo-Info.plist */,
154 | );
155 | name = Resources;
156 | sourceTree = "";
157 | };
158 | 29B97323FDCFA39411CA2CEA /* Frameworks */ = {
159 | isa = PBXGroup;
160 | children = (
161 | 1DF5F4DF0D08C38300B7A737 /* UIKit.framework */,
162 | 1D30AB110D05D00D00671497 /* Foundation.framework */,
163 | 288765A40DF7441C002DB57D /* CoreGraphics.framework */,
164 | );
165 | name = Frameworks;
166 | sourceTree = "";
167 | };
168 | DC65740C1042E7A500BE3D62 /* ElementParser */ = {
169 | isa = PBXGroup;
170 | children = (
171 | DC65740D1042E7C500BE3D62 /* CDataChunk.h */,
172 | DC65740E1042E7C500BE3D62 /* CDataChunk.m */,
173 | DC65740F1042E7C500BE3D62 /* Chunk.h */,
174 | DC6574101042E7C500BE3D62 /* Chunk.m */,
175 | DC6574111042E7C500BE3D62 /* CommentChunk.h */,
176 | DC6574121042E7C500BE3D62 /* CommentChunk.m */,
177 | DC6574131042E7C500BE3D62 /* CSSPartMatcher.h */,
178 | DC6574141042E7C500BE3D62 /* CSSPartMatcher.m */,
179 | DC6574151042E7C500BE3D62 /* CSSSelector.h */,
180 | DC6574161042E7C500BE3D62 /* CSSSelector.m */,
181 | DC6574171042E7C500BE3D62 /* CSSSelectorMatcher.h */,
182 | DC6574181042E7C500BE3D62 /* CSSSelectorMatcher.m */,
183 | DC6574191042E7C500BE3D62 /* CSSSelectorPart.h */,
184 | DC65741A1042E7C500BE3D62 /* CSSSelectorPart.m */,
185 | DC65741B1042E7C500BE3D62 /* DoctypeChunk.h */,
186 | DC65741C1042E7C500BE3D62 /* DoctypeChunk.m */,
187 | DC65741D1042E7C500BE3D62 /* DocumentRoot.h */,
188 | DC65741E1042E7C500BE3D62 /* DocumentRoot.m */,
189 | DC65741F1042E7C500BE3D62 /* Element.h */,
190 | DC6574201042E7C500BE3D62 /* Element.m */,
191 | DC6574211042E7C500BE3D62 /* ElementParser.h */,
192 | DC6574221042E7C500BE3D62 /* ElementParser.m */,
193 | DC6574231042E7C500BE3D62 /* EntityChunk.h */,
194 | DC6574241042E7C500BE3D62 /* EntityChunk.m */,
195 | DC6574251042E7C500BE3D62 /* NSString_HTML.h */,
196 | DC6574261042E7C500BE3D62 /* NSString_HTML.m */,
197 | DC6574271042E7C500BE3D62 /* ProcessingInstructionChunk.h */,
198 | DC6574281042E7C500BE3D62 /* ProcessingInstructionChunk.m */,
199 | DC6574291042E7C500BE3D62 /* TagChunk.h */,
200 | DC65742A1042E7C500BE3D62 /* TagChunk.m */,
201 | DC65742B1042E7C500BE3D62 /* TxtChunk.h */,
202 | DC65742C1042E7C500BE3D62 /* TxtChunk.m */,
203 | DC65742D1042E7C500BE3D62 /* URLParser.h */,
204 | DC65742E1042E7C500BE3D62 /* URLParser.m */,
205 | );
206 | name = ElementParser;
207 | path = Classes;
208 | sourceTree = "";
209 | };
210 | /* End PBXGroup section */
211 |
212 | /* Begin PBXNativeTarget section */
213 | 1D6058900D05DD3D006BFB54 /* Demo */ = {
214 | isa = PBXNativeTarget;
215 | buildConfigurationList = 1D6058960D05DD3E006BFB54 /* Build configuration list for PBXNativeTarget "Demo" */;
216 | buildPhases = (
217 | 1D60588D0D05DD3D006BFB54 /* Resources */,
218 | 1D60588E0D05DD3D006BFB54 /* Sources */,
219 | 1D60588F0D05DD3D006BFB54 /* Frameworks */,
220 | );
221 | buildRules = (
222 | );
223 | dependencies = (
224 | );
225 | name = Demo;
226 | productName = Demo;
227 | productReference = 1D6058910D05DD3D006BFB54 /* Demo.app */;
228 | productType = "com.apple.product-type.application";
229 | };
230 | /* End PBXNativeTarget section */
231 |
232 | /* Begin PBXProject section */
233 | 29B97313FDCFA39411CA2CEA /* Project object */ = {
234 | isa = PBXProject;
235 | buildConfigurationList = C01FCF4E08A954540054247B /* Build configuration list for PBXProject "Demo" */;
236 | compatibilityVersion = "Xcode 3.1";
237 | hasScannedForEncodings = 1;
238 | mainGroup = 29B97314FDCFA39411CA2CEA /* CustomTemplate */;
239 | projectDirPath = "";
240 | projectRoot = "";
241 | targets = (
242 | 1D6058900D05DD3D006BFB54 /* Demo */,
243 | );
244 | };
245 | /* End PBXProject section */
246 |
247 | /* Begin PBXResourcesBuildPhase section */
248 | 1D60588D0D05DD3D006BFB54 /* Resources */ = {
249 | isa = PBXResourcesBuildPhase;
250 | buildActionMask = 2147483647;
251 | files = (
252 | 28AD733F0D9D9553002E5188 /* MainWindow.xib in Resources */,
253 | 2899E5220DE3E06400AC0155 /* DemoViewController.xib in Resources */,
254 | DC903C0610777B8000F65535 /* source.html in Resources */,
255 | DCBCF2791078D72F00B671ED /* source2.html in Resources */,
256 | );
257 | runOnlyForDeploymentPostprocessing = 0;
258 | };
259 | /* End PBXResourcesBuildPhase section */
260 |
261 | /* Begin PBXSourcesBuildPhase section */
262 | 1D60588E0D05DD3D006BFB54 /* Sources */ = {
263 | isa = PBXSourcesBuildPhase;
264 | buildActionMask = 2147483647;
265 | files = (
266 | 1D60589B0D05DD56006BFB54 /* main.m in Sources */,
267 | 1D3623260D0F684500981E51 /* DemoAppDelegate.m in Sources */,
268 | 28D7ACF80DDB3853001CB0EB /* DemoViewController.m in Sources */,
269 | DC65742F1042E7C500BE3D62 /* CDataChunk.m in Sources */,
270 | DC6574301042E7C500BE3D62 /* Chunk.m in Sources */,
271 | DC6574311042E7C500BE3D62 /* CommentChunk.m in Sources */,
272 | DC6574321042E7C500BE3D62 /* CSSPartMatcher.m in Sources */,
273 | DC6574331042E7C500BE3D62 /* CSSSelector.m in Sources */,
274 | DC6574341042E7C500BE3D62 /* CSSSelectorMatcher.m in Sources */,
275 | DC6574351042E7C500BE3D62 /* CSSSelectorPart.m in Sources */,
276 | DC6574361042E7C500BE3D62 /* DoctypeChunk.m in Sources */,
277 | DC6574371042E7C500BE3D62 /* DocumentRoot.m in Sources */,
278 | DC6574381042E7C500BE3D62 /* Element.m in Sources */,
279 | DC6574391042E7C500BE3D62 /* ElementParser.m in Sources */,
280 | DC65743A1042E7C500BE3D62 /* EntityChunk.m in Sources */,
281 | DC65743B1042E7C500BE3D62 /* NSString_HTML.m in Sources */,
282 | DC65743C1042E7C500BE3D62 /* ProcessingInstructionChunk.m in Sources */,
283 | DC65743D1042E7C500BE3D62 /* TagChunk.m in Sources */,
284 | DC65743E1042E7C500BE3D62 /* TxtChunk.m in Sources */,
285 | DC65743F1042E7C500BE3D62 /* URLParser.m in Sources */,
286 | );
287 | runOnlyForDeploymentPostprocessing = 0;
288 | };
289 | /* End PBXSourcesBuildPhase section */
290 |
291 | /* Begin XCBuildConfiguration section */
292 | 1D6058940D05DD3E006BFB54 /* Debug */ = {
293 | isa = XCBuildConfiguration;
294 | buildSettings = {
295 | ALWAYS_SEARCH_USER_PATHS = NO;
296 | COPY_PHASE_STRIP = NO;
297 | GCC_DYNAMIC_NO_PIC = NO;
298 | GCC_OPTIMIZATION_LEVEL = 0;
299 | GCC_PRECOMPILE_PREFIX_HEADER = YES;
300 | GCC_PREFIX_HEADER = Demo_Prefix.pch;
301 | INFOPLIST_FILE = "Demo-Info.plist";
302 | PRODUCT_NAME = Demo;
303 | };
304 | name = Debug;
305 | };
306 | 1D6058950D05DD3E006BFB54 /* Release */ = {
307 | isa = XCBuildConfiguration;
308 | buildSettings = {
309 | ALWAYS_SEARCH_USER_PATHS = NO;
310 | COPY_PHASE_STRIP = YES;
311 | GCC_PRECOMPILE_PREFIX_HEADER = YES;
312 | GCC_PREFIX_HEADER = Demo_Prefix.pch;
313 | INFOPLIST_FILE = "Demo-Info.plist";
314 | PRODUCT_NAME = Demo;
315 | };
316 | name = Release;
317 | };
318 | C01FCF4F08A954540054247B /* Debug */ = {
319 | isa = XCBuildConfiguration;
320 | buildSettings = {
321 | ARCHS = "$(ARCHS_STANDARD_32_BIT)";
322 | "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
323 | GCC_C_LANGUAGE_STANDARD = c99;
324 | GCC_WARN_ABOUT_RETURN_TYPE = YES;
325 | GCC_WARN_UNUSED_VARIABLE = YES;
326 | PREBINDING = NO;
327 | SDKROOT = iphoneos3.0;
328 | };
329 | name = Debug;
330 | };
331 | C01FCF5008A954540054247B /* Release */ = {
332 | isa = XCBuildConfiguration;
333 | buildSettings = {
334 | ARCHS = "$(ARCHS_STANDARD_32_BIT)";
335 | "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
336 | GCC_C_LANGUAGE_STANDARD = c99;
337 | GCC_WARN_ABOUT_RETURN_TYPE = YES;
338 | GCC_WARN_UNUSED_VARIABLE = YES;
339 | PREBINDING = NO;
340 | SDKROOT = iphoneos3.0;
341 | };
342 | name = Release;
343 | };
344 | /* End XCBuildConfiguration section */
345 |
346 | /* Begin XCConfigurationList section */
347 | 1D6058960D05DD3E006BFB54 /* Build configuration list for PBXNativeTarget "Demo" */ = {
348 | isa = XCConfigurationList;
349 | buildConfigurations = (
350 | 1D6058940D05DD3E006BFB54 /* Debug */,
351 | 1D6058950D05DD3E006BFB54 /* Release */,
352 | );
353 | defaultConfigurationIsVisible = 0;
354 | defaultConfigurationName = Release;
355 | };
356 | C01FCF4E08A954540054247B /* Build configuration list for PBXProject "Demo" */ = {
357 | isa = XCConfigurationList;
358 | buildConfigurations = (
359 | C01FCF4F08A954540054247B /* Debug */,
360 | C01FCF5008A954540054247B /* Release */,
361 | );
362 | defaultConfigurationIsVisible = 0;
363 | defaultConfigurationName = Release;
364 | };
365 | /* End XCConfigurationList section */
366 | };
367 | rootObject = 29B97313FDCFA39411CA2CEA /* Project object */;
368 | }
369 |
--------------------------------------------------------------------------------
/Demo/Demo_Prefix.pch:
--------------------------------------------------------------------------------
1 | //
2 | // Prefix header for all source files of the 'Demo' target in the 'Demo' project
3 | //
4 |
5 | #ifdef __OBJC__
6 | #import
7 | #import
8 | #endif
9 |
--------------------------------------------------------------------------------
/Demo/MainWindow.xib:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | 768
5 | 10A288
6 | 715
7 | 1010
8 | 411.00
9 |
10 | com.apple.InterfaceBuilder.IBCocoaTouchPlugin
11 | 46
12 |
13 |
14 | YES
15 |
16 |
17 |
18 | YES
19 | com.apple.InterfaceBuilder.IBCocoaTouchPlugin
20 |
21 |
22 | YES
23 |
24 | YES
25 |
26 |
27 | YES
28 |
29 |
30 |
31 | YES
32 |
33 | IBFilesOwner
34 |
35 |
36 | IBFirstResponder
37 |
38 |
39 |
40 | DemoViewController
41 |
42 |
43 |
44 |
45 | 292
46 | {320, 480}
47 |
48 | 1
49 | MSAxIDEAA
50 |
51 | NO
52 | NO
53 |
54 |
55 |
56 |
57 |
58 | YES
59 |
60 |
61 | delegate
62 |
63 |
64 |
65 | 4
66 |
67 |
68 |
69 | viewController
70 |
71 |
72 |
73 | 11
74 |
75 |
76 |
77 | window
78 |
79 |
80 |
81 | 14
82 |
83 |
84 |
85 |
86 | YES
87 |
88 | 0
89 |
90 |
91 |
92 |
93 |
94 | -1
95 |
96 |
97 | File's Owner
98 |
99 |
100 | 3
101 |
102 |
103 | Demo App Delegate
104 |
105 |
106 | -2
107 |
108 |
109 |
110 |
111 | 10
112 |
113 |
114 |
115 |
116 | 12
117 |
118 |
119 |
120 |
121 |
122 |
123 | YES
124 |
125 | YES
126 | -1.CustomClassName
127 | -2.CustomClassName
128 | 10.CustomClassName
129 | 10.IBEditorWindowLastContentRect
130 | 10.IBPluginDependency
131 | 12.IBEditorWindowLastContentRect
132 | 12.IBPluginDependency
133 | 3.CustomClassName
134 | 3.IBPluginDependency
135 |
136 |
137 | YES
138 | UIApplication
139 | UIResponder
140 | DemoViewController
141 | {{512, 351}, {320, 480}}
142 | com.apple.InterfaceBuilder.IBCocoaTouchPlugin
143 | {{525, 346}, {320, 480}}
144 | com.apple.InterfaceBuilder.IBCocoaTouchPlugin
145 | DemoAppDelegate
146 | com.apple.InterfaceBuilder.IBCocoaTouchPlugin
147 |
148 |
149 |
150 | YES
151 |
152 |
153 | YES
154 |
155 |
156 |
157 |
158 | YES
159 |
160 |
161 | YES
162 |
163 |
164 |
165 | 14
166 |
167 |
168 |
169 | YES
170 |
171 | DemoAppDelegate
172 | NSObject
173 |
174 | YES
175 |
176 | YES
177 | viewController
178 | window
179 |
180 |
181 | YES
182 | DemoViewController
183 | UIWindow
184 |
185 |
186 |
187 | IBProjectSource
188 | Classes/DemoAppDelegate.h
189 |
190 |
191 |
192 | DemoAppDelegate
193 | NSObject
194 |
195 | IBUserSource
196 |
197 |
198 |
199 |
200 | DemoViewController
201 | UIViewController
202 |
203 | IBProjectSource
204 | Classes/DemoViewController.h
205 |
206 |
207 |
208 |
209 | 0
210 |
211 | com.apple.InterfaceBuilder.CocoaTouchPlugin.InterfaceBuilder3
212 |
213 |
214 | YES
215 | Demo.xcodeproj
216 | 3
217 |
218 |
219 |
--------------------------------------------------------------------------------
/Demo/main.m:
--------------------------------------------------------------------------------
1 | //
2 | // main.m
3 | // Demo
4 | //
5 | // Created by Lee Buck on 8/23/09.
6 | // Copyright Blue Bright Ventures 2009. All rights reserved.
7 | //
8 |
9 | #import
10 |
11 | int main(int argc, char *argv[]) {
12 |
13 | NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
14 | int retVal = UIApplicationMain(argc, argv, nil, nil);
15 | [pool release];
16 | return retVal;
17 | }
18 |
--------------------------------------------------------------------------------
/Demo/source2.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | 1
4 | 2
5 | 3
6 | 4
7 | 5
8 | 6
9 | 7
10 | 8
11 | 9
12 | 10
13 | 11
14 | 12
15 | 13
16 | 14
17 | 15
18 | 16
19 | 17
20 | 18
21 | 19
22 | 20
23 | 21
24 | 22
25 | 23
26 | 24
27 | 25
28 | 26
29 | 27
30 | 28
31 | 29
32 | 30
33 | 31
34 | 32
35 | 33
36 | 34
37 | 35
38 | 36
39 | 37
40 | 38
41 | 39
42 | 40
43 | 41
44 | 42
45 | 43
46 | 44
47 | 45
48 | 46
49 | 47
50 | 48
51 | 49
52 | 50
53 | 51
54 | 52
55 | 53
56 | 54
57 | 55
58 | 56
59 | 57
60 | 58
61 | 59
62 | 60
63 |
64 |
--------------------------------------------------------------------------------
/ElementParser.xcodeproj/project.pbxproj:
--------------------------------------------------------------------------------
1 | // !$*UTF8*$!
2 | {
3 | archiveVersion = 1;
4 | classes = {
5 | };
6 | objectVersion = 45;
7 | objects = {
8 |
9 | /* Begin PBXBuildFile section */
10 | 3F4E18B3102DD2FA00320118 /* CDataChunk.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E1891102DD2FA00320118 /* CDataChunk.h */; };
11 | 3F4E18B4102DD2FA00320118 /* CDataChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E1892102DD2FA00320118 /* CDataChunk.m */; };
12 | 3F4E18B5102DD2FA00320118 /* Chunk.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E1893102DD2FA00320118 /* Chunk.h */; };
13 | 3F4E18B6102DD2FA00320118 /* Chunk.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E1894102DD2FA00320118 /* Chunk.m */; };
14 | 3F4E18B7102DD2FA00320118 /* CommentChunk.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E1895102DD2FA00320118 /* CommentChunk.h */; };
15 | 3F4E18B8102DD2FA00320118 /* CommentChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E1896102DD2FA00320118 /* CommentChunk.m */; };
16 | 3F4E18B9102DD2FA00320118 /* CSSPartMatcher.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E1897102DD2FA00320118 /* CSSPartMatcher.h */; };
17 | 3F4E18BA102DD2FA00320118 /* CSSPartMatcher.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E1898102DD2FA00320118 /* CSSPartMatcher.m */; };
18 | 3F4E18BB102DD2FA00320118 /* CSSSelector.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E1899102DD2FA00320118 /* CSSSelector.h */; };
19 | 3F4E18BC102DD2FA00320118 /* CSSSelector.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E189A102DD2FA00320118 /* CSSSelector.m */; };
20 | 3F4E18BD102DD2FA00320118 /* CSSSelectorMatcher.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E189B102DD2FA00320118 /* CSSSelectorMatcher.h */; };
21 | 3F4E18BE102DD2FA00320118 /* CSSSelectorMatcher.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E189C102DD2FA00320118 /* CSSSelectorMatcher.m */; };
22 | 3F4E18BF102DD2FA00320118 /* CSSSelectorPart.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E189D102DD2FA00320118 /* CSSSelectorPart.h */; };
23 | 3F4E18C0102DD2FA00320118 /* CSSSelectorPart.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E189E102DD2FA00320118 /* CSSSelectorPart.m */; };
24 | 3F4E18C1102DD2FA00320118 /* DoctypeChunk.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E189F102DD2FA00320118 /* DoctypeChunk.h */; };
25 | 3F4E18C2102DD2FA00320118 /* DoctypeChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18A0102DD2FA00320118 /* DoctypeChunk.m */; };
26 | 3F4E18C3102DD2FA00320118 /* DocumentRoot.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18A1102DD2FA00320118 /* DocumentRoot.h */; };
27 | 3F4E18C4102DD2FA00320118 /* DocumentRoot.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18A2102DD2FA00320118 /* DocumentRoot.m */; };
28 | 3F4E18C5102DD2FA00320118 /* Element.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18A3102DD2FA00320118 /* Element.h */; };
29 | 3F4E18C6102DD2FA00320118 /* Element.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18A4102DD2FA00320118 /* Element.m */; };
30 | 3F4E18C7102DD2FA00320118 /* ElementParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18A5102DD2FA00320118 /* ElementParser.h */; };
31 | 3F4E18C8102DD2FA00320118 /* ElementParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18A6102DD2FA00320118 /* ElementParser.m */; };
32 | 3F4E18C9102DD2FA00320118 /* EntityChunk.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18A7102DD2FA00320118 /* EntityChunk.h */; };
33 | 3F4E18CA102DD2FA00320118 /* EntityChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18A8102DD2FA00320118 /* EntityChunk.m */; };
34 | 3F4E18CB102DD2FA00320118 /* NSString_HTML.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18A9102DD2FA00320118 /* NSString_HTML.h */; };
35 | 3F4E18CC102DD2FA00320118 /* NSString_HTML.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18AA102DD2FA00320118 /* NSString_HTML.m */; };
36 | 3F4E18CD102DD2FA00320118 /* ProcessingInstructionChunk.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18AB102DD2FA00320118 /* ProcessingInstructionChunk.h */; };
37 | 3F4E18CE102DD2FA00320118 /* ProcessingInstructionChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18AC102DD2FA00320118 /* ProcessingInstructionChunk.m */; };
38 | 3F4E18CF102DD2FA00320118 /* TagChunk.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18AD102DD2FA00320118 /* TagChunk.h */; };
39 | 3F4E18D0102DD2FA00320118 /* TagChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18AE102DD2FA00320118 /* TagChunk.m */; };
40 | 3F4E18D1102DD2FA00320118 /* TxtChunk.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18AF102DD2FA00320118 /* TxtChunk.h */; };
41 | 3F4E18D2102DD2FA00320118 /* TxtChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18B0102DD2FA00320118 /* TxtChunk.m */; };
42 | 3F4E18D3102DD2FA00320118 /* URLParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18B1102DD2FA00320118 /* URLParser.h */; };
43 | 3F4E18D4102DD2FA00320118 /* URLParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18B2102DD2FA00320118 /* URLParser.m */; };
44 | AA747D9F0F9514B9006C5449 /* ElementParser_Prefix.pch in Headers */ = {isa = PBXBuildFile; fileRef = AA747D9E0F9514B9006C5449 /* ElementParser_Prefix.pch */; };
45 | AACBBE4A0F95108600F1A2B1 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AACBBE490F95108600F1A2B1 /* Foundation.framework */; };
46 | /* End PBXBuildFile section */
47 |
48 | /* Begin PBXFileReference section */
49 | 3F4E1891102DD2FA00320118 /* CDataChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CDataChunk.h; path = Classes/CDataChunk.h; sourceTree = ""; };
50 | 3F4E1892102DD2FA00320118 /* CDataChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CDataChunk.m; path = Classes/CDataChunk.m; sourceTree = SOURCE_ROOT; };
51 | 3F4E1893102DD2FA00320118 /* Chunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Chunk.h; path = Classes/Chunk.h; sourceTree = ""; };
52 | 3F4E1894102DD2FA00320118 /* Chunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = Chunk.m; path = Classes/Chunk.m; sourceTree = SOURCE_ROOT; };
53 | 3F4E1895102DD2FA00320118 /* CommentChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CommentChunk.h; path = Classes/CommentChunk.h; sourceTree = ""; };
54 | 3F4E1896102DD2FA00320118 /* CommentChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CommentChunk.m; path = Classes/CommentChunk.m; sourceTree = SOURCE_ROOT; };
55 | 3F4E1897102DD2FA00320118 /* CSSPartMatcher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSPartMatcher.h; path = Classes/CSSPartMatcher.h; sourceTree = ""; };
56 | 3F4E1898102DD2FA00320118 /* CSSPartMatcher.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CSSPartMatcher.m; path = Classes/CSSPartMatcher.m; sourceTree = SOURCE_ROOT; };
57 | 3F4E1899102DD2FA00320118 /* CSSSelector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSSelector.h; path = Classes/CSSSelector.h; sourceTree = ""; };
58 | 3F4E189A102DD2FA00320118 /* CSSSelector.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CSSSelector.m; path = Classes/CSSSelector.m; sourceTree = SOURCE_ROOT; };
59 | 3F4E189B102DD2FA00320118 /* CSSSelectorMatcher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSSelectorMatcher.h; path = Classes/CSSSelectorMatcher.h; sourceTree = ""; };
60 | 3F4E189C102DD2FA00320118 /* CSSSelectorMatcher.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CSSSelectorMatcher.m; path = Classes/CSSSelectorMatcher.m; sourceTree = SOURCE_ROOT; };
61 | 3F4E189D102DD2FA00320118 /* CSSSelectorPart.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSSelectorPart.h; path = Classes/CSSSelectorPart.h; sourceTree = ""; };
62 | 3F4E189E102DD2FA00320118 /* CSSSelectorPart.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CSSSelectorPart.m; path = Classes/CSSSelectorPart.m; sourceTree = SOURCE_ROOT; };
63 | 3F4E189F102DD2FA00320118 /* DoctypeChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DoctypeChunk.h; path = Classes/DoctypeChunk.h; sourceTree = ""; };
64 | 3F4E18A0102DD2FA00320118 /* DoctypeChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = DoctypeChunk.m; path = Classes/DoctypeChunk.m; sourceTree = SOURCE_ROOT; };
65 | 3F4E18A1102DD2FA00320118 /* DocumentRoot.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DocumentRoot.h; path = Classes/DocumentRoot.h; sourceTree = ""; };
66 | 3F4E18A2102DD2FA00320118 /* DocumentRoot.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = DocumentRoot.m; path = Classes/DocumentRoot.m; sourceTree = SOURCE_ROOT; };
67 | 3F4E18A3102DD2FA00320118 /* Element.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Element.h; path = Classes/Element.h; sourceTree = ""; };
68 | 3F4E18A4102DD2FA00320118 /* Element.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = Element.m; path = Classes/Element.m; sourceTree = SOURCE_ROOT; };
69 | 3F4E18A5102DD2FA00320118 /* ElementParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ElementParser.h; path = Classes/ElementParser.h; sourceTree = ""; };
70 | 3F4E18A6102DD2FA00320118 /* ElementParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = ElementParser.m; path = Classes/ElementParser.m; sourceTree = SOURCE_ROOT; };
71 | 3F4E18A7102DD2FA00320118 /* EntityChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = EntityChunk.h; path = Classes/EntityChunk.h; sourceTree = ""; };
72 | 3F4E18A8102DD2FA00320118 /* EntityChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = EntityChunk.m; path = Classes/EntityChunk.m; sourceTree = SOURCE_ROOT; };
73 | 3F4E18A9102DD2FA00320118 /* NSString_HTML.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = NSString_HTML.h; path = Classes/NSString_HTML.h; sourceTree = ""; };
74 | 3F4E18AA102DD2FA00320118 /* NSString_HTML.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = NSString_HTML.m; path = Classes/NSString_HTML.m; sourceTree = SOURCE_ROOT; };
75 | 3F4E18AB102DD2FA00320118 /* ProcessingInstructionChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ProcessingInstructionChunk.h; path = Classes/ProcessingInstructionChunk.h; sourceTree = ""; };
76 | 3F4E18AC102DD2FA00320118 /* ProcessingInstructionChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = ProcessingInstructionChunk.m; path = Classes/ProcessingInstructionChunk.m; sourceTree = SOURCE_ROOT; };
77 | 3F4E18AD102DD2FA00320118 /* TagChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TagChunk.h; path = Classes/TagChunk.h; sourceTree = ""; };
78 | 3F4E18AE102DD2FA00320118 /* TagChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = TagChunk.m; path = Classes/TagChunk.m; sourceTree = SOURCE_ROOT; };
79 | 3F4E18AF102DD2FA00320118 /* TxtChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TxtChunk.h; path = Classes/TxtChunk.h; sourceTree = ""; };
80 | 3F4E18B0102DD2FA00320118 /* TxtChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = TxtChunk.m; path = Classes/TxtChunk.m; sourceTree = SOURCE_ROOT; };
81 | 3F4E18B1102DD2FA00320118 /* URLParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = URLParser.h; path = Classes/URLParser.h; sourceTree = ""; };
82 | 3F4E18B2102DD2FA00320118 /* URLParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = URLParser.m; path = Classes/URLParser.m; sourceTree = SOURCE_ROOT; };
83 | AA747D9E0F9514B9006C5449 /* ElementParser_Prefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ElementParser_Prefix.pch; sourceTree = SOURCE_ROOT; };
84 | AACBBE490F95108600F1A2B1 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; };
85 | D2AAC07E0554694100DB518D /* libElementParser.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libElementParser.a; sourceTree = BUILT_PRODUCTS_DIR; };
86 | /* End PBXFileReference section */
87 |
88 | /* Begin PBXFrameworksBuildPhase section */
89 | D2AAC07C0554694100DB518D /* Frameworks */ = {
90 | isa = PBXFrameworksBuildPhase;
91 | buildActionMask = 2147483647;
92 | files = (
93 | AACBBE4A0F95108600F1A2B1 /* Foundation.framework in Frameworks */,
94 | );
95 | runOnlyForDeploymentPostprocessing = 0;
96 | };
97 | /* End PBXFrameworksBuildPhase section */
98 |
99 | /* Begin PBXGroup section */
100 | 034768DFFF38A50411DB9C8B /* Products */ = {
101 | isa = PBXGroup;
102 | children = (
103 | D2AAC07E0554694100DB518D /* libElementParser.a */,
104 | );
105 | name = Products;
106 | sourceTree = "";
107 | };
108 | 0867D691FE84028FC02AAC07 /* ElementParser */ = {
109 | isa = PBXGroup;
110 | children = (
111 | 3F4E1923102DD47000320118 /* Headers */,
112 | 08FB77AEFE84172EC02AAC07 /* Classes */,
113 | 32C88DFF0371C24200C91783 /* Other Sources */,
114 | 0867D69AFE84028FC02AAC07 /* Frameworks */,
115 | 034768DFFF38A50411DB9C8B /* Products */,
116 | );
117 | name = ElementParser;
118 | sourceTree = "";
119 | };
120 | 0867D69AFE84028FC02AAC07 /* Frameworks */ = {
121 | isa = PBXGroup;
122 | children = (
123 | AACBBE490F95108600F1A2B1 /* Foundation.framework */,
124 | );
125 | name = Frameworks;
126 | sourceTree = "";
127 | };
128 | 08FB77AEFE84172EC02AAC07 /* Classes */ = {
129 | isa = PBXGroup;
130 | children = (
131 | 3F4E1892102DD2FA00320118 /* CDataChunk.m */,
132 | 3F4E1894102DD2FA00320118 /* Chunk.m */,
133 | 3F4E1896102DD2FA00320118 /* CommentChunk.m */,
134 | 3F4E1898102DD2FA00320118 /* CSSPartMatcher.m */,
135 | 3F4E189A102DD2FA00320118 /* CSSSelector.m */,
136 | 3F4E189C102DD2FA00320118 /* CSSSelectorMatcher.m */,
137 | 3F4E189E102DD2FA00320118 /* CSSSelectorPart.m */,
138 | 3F4E18A0102DD2FA00320118 /* DoctypeChunk.m */,
139 | 3F4E18A2102DD2FA00320118 /* DocumentRoot.m */,
140 | 3F4E18A4102DD2FA00320118 /* Element.m */,
141 | 3F4E18A6102DD2FA00320118 /* ElementParser.m */,
142 | 3F4E18A8102DD2FA00320118 /* EntityChunk.m */,
143 | 3F4E18AA102DD2FA00320118 /* NSString_HTML.m */,
144 | 3F4E18AC102DD2FA00320118 /* ProcessingInstructionChunk.m */,
145 | 3F4E18AE102DD2FA00320118 /* TagChunk.m */,
146 | 3F4E18B0102DD2FA00320118 /* TxtChunk.m */,
147 | 3F4E18B2102DD2FA00320118 /* URLParser.m */,
148 | );
149 | name = Classes;
150 | sourceTree = "";
151 | };
152 | 32C88DFF0371C24200C91783 /* Other Sources */ = {
153 | isa = PBXGroup;
154 | children = (
155 | AA747D9E0F9514B9006C5449 /* ElementParser_Prefix.pch */,
156 | );
157 | name = "Other Sources";
158 | sourceTree = "";
159 | };
160 | 3F4E1923102DD47000320118 /* Headers */ = {
161 | isa = PBXGroup;
162 | children = (
163 | 3F4E1891102DD2FA00320118 /* CDataChunk.h */,
164 | 3F4E1893102DD2FA00320118 /* Chunk.h */,
165 | 3F4E1895102DD2FA00320118 /* CommentChunk.h */,
166 | 3F4E1897102DD2FA00320118 /* CSSPartMatcher.h */,
167 | 3F4E1899102DD2FA00320118 /* CSSSelector.h */,
168 | 3F4E189B102DD2FA00320118 /* CSSSelectorMatcher.h */,
169 | 3F4E189D102DD2FA00320118 /* CSSSelectorPart.h */,
170 | 3F4E189F102DD2FA00320118 /* DoctypeChunk.h */,
171 | 3F4E18A1102DD2FA00320118 /* DocumentRoot.h */,
172 | 3F4E18A3102DD2FA00320118 /* Element.h */,
173 | 3F4E18A5102DD2FA00320118 /* ElementParser.h */,
174 | 3F4E18A7102DD2FA00320118 /* EntityChunk.h */,
175 | 3F4E18A9102DD2FA00320118 /* NSString_HTML.h */,
176 | 3F4E18AB102DD2FA00320118 /* ProcessingInstructionChunk.h */,
177 | 3F4E18AD102DD2FA00320118 /* TagChunk.h */,
178 | 3F4E18AF102DD2FA00320118 /* TxtChunk.h */,
179 | 3F4E18B1102DD2FA00320118 /* URLParser.h */,
180 | );
181 | name = Headers;
182 | sourceTree = "";
183 | };
184 | /* End PBXGroup section */
185 |
186 | /* Begin PBXHeadersBuildPhase section */
187 | D2AAC07A0554694100DB518D /* Headers */ = {
188 | isa = PBXHeadersBuildPhase;
189 | buildActionMask = 2147483647;
190 | files = (
191 | AA747D9F0F9514B9006C5449 /* ElementParser_Prefix.pch in Headers */,
192 | 3F4E18B3102DD2FA00320118 /* CDataChunk.h in Headers */,
193 | 3F4E18B5102DD2FA00320118 /* Chunk.h in Headers */,
194 | 3F4E18B7102DD2FA00320118 /* CommentChunk.h in Headers */,
195 | 3F4E18B9102DD2FA00320118 /* CSSPartMatcher.h in Headers */,
196 | 3F4E18BB102DD2FA00320118 /* CSSSelector.h in Headers */,
197 | 3F4E18BD102DD2FA00320118 /* CSSSelectorMatcher.h in Headers */,
198 | 3F4E18BF102DD2FA00320118 /* CSSSelectorPart.h in Headers */,
199 | 3F4E18C1102DD2FA00320118 /* DoctypeChunk.h in Headers */,
200 | 3F4E18C3102DD2FA00320118 /* DocumentRoot.h in Headers */,
201 | 3F4E18C5102DD2FA00320118 /* Element.h in Headers */,
202 | 3F4E18C7102DD2FA00320118 /* ElementParser.h in Headers */,
203 | 3F4E18C9102DD2FA00320118 /* EntityChunk.h in Headers */,
204 | 3F4E18CB102DD2FA00320118 /* NSString_HTML.h in Headers */,
205 | 3F4E18CD102DD2FA00320118 /* ProcessingInstructionChunk.h in Headers */,
206 | 3F4E18CF102DD2FA00320118 /* TagChunk.h in Headers */,
207 | 3F4E18D1102DD2FA00320118 /* TxtChunk.h in Headers */,
208 | 3F4E18D3102DD2FA00320118 /* URLParser.h in Headers */,
209 | );
210 | runOnlyForDeploymentPostprocessing = 0;
211 | };
212 | /* End PBXHeadersBuildPhase section */
213 |
214 | /* Begin PBXNativeTarget section */
215 | D2AAC07D0554694100DB518D /* ElementParser */ = {
216 | isa = PBXNativeTarget;
217 | buildConfigurationList = 1DEB921E08733DC00010E9CD /* Build configuration list for PBXNativeTarget "ElementParser" */;
218 | buildPhases = (
219 | D2AAC07A0554694100DB518D /* Headers */,
220 | D2AAC07B0554694100DB518D /* Sources */,
221 | D2AAC07C0554694100DB518D /* Frameworks */,
222 | );
223 | buildRules = (
224 | );
225 | dependencies = (
226 | );
227 | name = ElementParser;
228 | productName = ElementParser;
229 | productReference = D2AAC07E0554694100DB518D /* libElementParser.a */;
230 | productType = "com.apple.product-type.library.static";
231 | };
232 | /* End PBXNativeTarget section */
233 |
234 | /* Begin PBXProject section */
235 | 0867D690FE84028FC02AAC07 /* Project object */ = {
236 | isa = PBXProject;
237 | buildConfigurationList = 1DEB922208733DC00010E9CD /* Build configuration list for PBXProject "ElementParser" */;
238 | compatibilityVersion = "Xcode 3.1";
239 | hasScannedForEncodings = 1;
240 | mainGroup = 0867D691FE84028FC02AAC07 /* ElementParser */;
241 | productRefGroup = 034768DFFF38A50411DB9C8B /* Products */;
242 | projectDirPath = "";
243 | projectRoot = "";
244 | targets = (
245 | D2AAC07D0554694100DB518D /* ElementParser */,
246 | );
247 | };
248 | /* End PBXProject section */
249 |
250 | /* Begin PBXSourcesBuildPhase section */
251 | D2AAC07B0554694100DB518D /* Sources */ = {
252 | isa = PBXSourcesBuildPhase;
253 | buildActionMask = 2147483647;
254 | files = (
255 | 3F4E18B4102DD2FA00320118 /* CDataChunk.m in Sources */,
256 | 3F4E18B6102DD2FA00320118 /* Chunk.m in Sources */,
257 | 3F4E18B8102DD2FA00320118 /* CommentChunk.m in Sources */,
258 | 3F4E18BA102DD2FA00320118 /* CSSPartMatcher.m in Sources */,
259 | 3F4E18BC102DD2FA00320118 /* CSSSelector.m in Sources */,
260 | 3F4E18BE102DD2FA00320118 /* CSSSelectorMatcher.m in Sources */,
261 | 3F4E18C0102DD2FA00320118 /* CSSSelectorPart.m in Sources */,
262 | 3F4E18C2102DD2FA00320118 /* DoctypeChunk.m in Sources */,
263 | 3F4E18C4102DD2FA00320118 /* DocumentRoot.m in Sources */,
264 | 3F4E18C6102DD2FA00320118 /* Element.m in Sources */,
265 | 3F4E18C8102DD2FA00320118 /* ElementParser.m in Sources */,
266 | 3F4E18CA102DD2FA00320118 /* EntityChunk.m in Sources */,
267 | 3F4E18CC102DD2FA00320118 /* NSString_HTML.m in Sources */,
268 | 3F4E18CE102DD2FA00320118 /* ProcessingInstructionChunk.m in Sources */,
269 | 3F4E18D0102DD2FA00320118 /* TagChunk.m in Sources */,
270 | 3F4E18D2102DD2FA00320118 /* TxtChunk.m in Sources */,
271 | 3F4E18D4102DD2FA00320118 /* URLParser.m in Sources */,
272 | );
273 | runOnlyForDeploymentPostprocessing = 0;
274 | };
275 | /* End PBXSourcesBuildPhase section */
276 |
277 | /* Begin XCBuildConfiguration section */
278 | 1DEB921F08733DC00010E9CD /* Debug */ = {
279 | isa = XCBuildConfiguration;
280 | buildSettings = {
281 | ALWAYS_SEARCH_USER_PATHS = NO;
282 | ARCHS = "$(ARCHS_STANDARD_32_BIT)";
283 | COPY_PHASE_STRIP = NO;
284 | DSTROOT = /tmp/ElementParser.dst;
285 | GCC_DYNAMIC_NO_PIC = NO;
286 | GCC_ENABLE_FIX_AND_CONTINUE = YES;
287 | GCC_MODEL_TUNING = G5;
288 | GCC_OPTIMIZATION_LEVEL = 0;
289 | GCC_PRECOMPILE_PREFIX_HEADER = YES;
290 | GCC_PREFIX_HEADER = ElementParser_Prefix.pch;
291 | INSTALL_PATH = /usr/local/lib;
292 | PRODUCT_NAME = ElementParser;
293 | };
294 | name = Debug;
295 | };
296 | 1DEB922008733DC00010E9CD /* Release */ = {
297 | isa = XCBuildConfiguration;
298 | buildSettings = {
299 | ALWAYS_SEARCH_USER_PATHS = NO;
300 | ARCHS = "$(ARCHS_STANDARD_32_BIT)";
301 | DSTROOT = /tmp/ElementParser.dst;
302 | GCC_MODEL_TUNING = G5;
303 | GCC_PRECOMPILE_PREFIX_HEADER = YES;
304 | GCC_PREFIX_HEADER = ElementParser_Prefix.pch;
305 | INSTALL_PATH = /usr/local/lib;
306 | PRODUCT_NAME = ElementParser;
307 | };
308 | name = Release;
309 | };
310 | 1DEB922308733DC00010E9CD /* Debug */ = {
311 | isa = XCBuildConfiguration;
312 | buildSettings = {
313 | ARCHS = "$(ARCHS_STANDARD_32_BIT)";
314 | GCC_C_LANGUAGE_STANDARD = c99;
315 | GCC_OPTIMIZATION_LEVEL = 0;
316 | GCC_WARN_ABOUT_RETURN_TYPE = YES;
317 | GCC_WARN_UNUSED_VARIABLE = YES;
318 | OTHER_LDFLAGS = "-ObjC";
319 | PREBINDING = NO;
320 | SDKROOT = macosx10.5;
321 | };
322 | name = Debug;
323 | };
324 | 1DEB922408733DC00010E9CD /* Release */ = {
325 | isa = XCBuildConfiguration;
326 | buildSettings = {
327 | ARCHS = "$(ARCHS_STANDARD_32_BIT)";
328 | GCC_C_LANGUAGE_STANDARD = c99;
329 | GCC_WARN_ABOUT_RETURN_TYPE = YES;
330 | GCC_WARN_UNUSED_VARIABLE = YES;
331 | OTHER_LDFLAGS = "-ObjC";
332 | PREBINDING = NO;
333 | SDKROOT = iphoneos3.0;
334 | };
335 | name = Release;
336 | };
337 | /* End XCBuildConfiguration section */
338 |
339 | /* Begin XCConfigurationList section */
340 | 1DEB921E08733DC00010E9CD /* Build configuration list for PBXNativeTarget "ElementParser" */ = {
341 | isa = XCConfigurationList;
342 | buildConfigurations = (
343 | 1DEB921F08733DC00010E9CD /* Debug */,
344 | 1DEB922008733DC00010E9CD /* Release */,
345 | );
346 | defaultConfigurationIsVisible = 0;
347 | defaultConfigurationName = Release;
348 | };
349 | 1DEB922208733DC00010E9CD /* Build configuration list for PBXProject "ElementParser" */ = {
350 | isa = XCConfigurationList;
351 | buildConfigurations = (
352 | 1DEB922308733DC00010E9CD /* Debug */,
353 | 1DEB922408733DC00010E9CD /* Release */,
354 | );
355 | defaultConfigurationIsVisible = 0;
356 | defaultConfigurationName = Release;
357 | };
358 | /* End XCConfigurationList section */
359 | };
360 | rootObject = 0867D690FE84028FC02AAC07 /* Project object */;
361 | }
362 |
--------------------------------------------------------------------------------
/ElementParser_Prefix.pch:
--------------------------------------------------------------------------------
1 | //
2 | // Prefix header for all source files of the 'CocoaTouchStaticLibrary' target in the 'CocoaTouchStaticLibrary' project.
3 | //
4 |
5 | #ifdef __OBJC__
6 | #import
7 | #endif
8 |
--------------------------------------------------------------------------------
/GPL v3 Liscense.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Objective3/ElementParser/3fa3d82f8ff05a25d40b61592e1cf18115daf306/GPL v3 Liscense.pdf
--------------------------------------------------------------------------------
/HTML Entities.plist:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Objective3/ElementParser/3fa3d82f8ff05a25d40b61592e1cf18115daf306/HTML Entities.plist
--------------------------------------------------------------------------------
/Read Me.txt:
--------------------------------------------------------------------------------
1 | Element Parser
2 | 5/4/09
3 |
4 | Accessing and manipulating HTML and XML in Cocoa can be incredibly frustrating. There are two existing choices (NSXMLParser and lib2xml) but neither work with HTML or "real-world" XML documents that are often not "perfect". Their interfaces put all the work on you to map between the document and your program's domain objects. They force you to write code that is hard to write and maintain. Somehow, something that starts out looking straightforward ends up becoming
5 | a science project or worse.
6 |
7 | ElementParser is lightweight framework to provide easy access to xml and html content. Rather than get lost in the complexities of the HTML and XML specifications, it aspires to not obscure their essential simplicity. It doesn't do everything, but aspires to do "just enough".
8 |
9 | I hope you like it.
10 |
11 | Let's begin with some examples.
12 |
13 | document = [Element parseHTML: source];
14 |
15 | Document is a special element that holds the top level element(s) (e.g. or ) of your document. You now have a tree of Element objects which you can walk using methods like firstChild, nextSybling and parent. You can also access the data each contains with methods like tagName, attributes and contentsText. Nice start. And sometimes this is enough. But let's say you don't want to walk the tree all the time to find the data you need. How about:
16 |
17 | linkElement = [element selectElement: @"div.nextLink a"];
18 |
19 | Here we're using an css-type selector to locate and return a matching element. Nice. Now we can parse a document and conveniently find elements of interest. (Yes, there is a corresponding selectElements: method that returns all matches.)
20 |
21 | Next, let's bind together your world of objects and the world of elements more closely. To do this, we'll use the ElementParser directly to register callbacks into your code when an element is found (and its contents parsed).
22 |
23 | ElementParser* parser = [[ElementParser alloc] initWithCallbacksDelegate: self];
24 | [parser performSelector:@selector(processFeedElement:) forElementsMatching: @"feed"];
25 | documentRoot = [parser parseXML: source];
26 |
27 | Your code might look like this:
28 |
29 | -(FeedItem*)processFeedItem:(Element*)element{
30 | FeedItem* feedItem = [[[FeedItem alloc] init] autorelease];
31 | feedItem.title = [[element selectElement: @"title"] contentsText];
32 | feedItem.description = [[element selectElement: @"description"] contentsText];
33 | feedItem.enclosure = [[element selectElement: @"title"] contentsText];
34 | return feedItem; // optional, sets this element's domainObject property
35 | }
36 |
37 | Finally, all these html and xml documents often reside on the web. Wouldn't it be nice if we could use the pattern above to process the documents incrementally as soon as they appear? How about:
38 |
39 | URLParser* parser = [[URLParser alloc] initWithCallbackDelegate: self];
40 | [parser performSelector:@selector(processChannelElement:) forElementsMatching: @"channel"];
41 | [parser performSelector:@selector(processFeedElement:) forElementsMatching: @"feed"];
42 | [parser parseURL: myURL];
43 |
44 | There is alot more under the covers but this may be all you need. Hopefully its just enough! We'd love your feedback at feedback@touchtankapps.com.
45 |
46 | Terms of Use
47 | The ElementParser framework (and its source code) is free of charge for non commercial uses. For other commercial uses, a fee of $100 is required per product. (That's about 2 hours of your time, right?) Support plans are also available. Please contact sales@touchtankapps.com.
48 |
49 |
--------------------------------------------------------------------------------
/Test/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Objective3/ElementParser/3fa3d82f8ff05a25d40b61592e1cf18115daf306/Test/.DS_Store
--------------------------------------------------------------------------------
/Test/CSSSelectorTest.h:
--------------------------------------------------------------------------------
1 | //
2 | // CSSSelectorTest.h
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/21/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import
26 | #import "CSSSelector.h"
27 |
28 | @interface CSSSelector (Test)
29 |
30 | +(void)testCSSSelector;
31 | +(void)testAll;
32 |
33 | @end
34 |
--------------------------------------------------------------------------------
/Test/CSSSelectorTest.m:
--------------------------------------------------------------------------------
1 | //
2 | // CSSSelectorTest.m
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/21/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import "CSSSelectorTest.h"
26 |
27 |
28 | @implementation CSSSelector (Test)
29 |
30 | +(void)assertWithCSSSelectorString:(NSString*)source expected:(NSString*)expected{
31 | CSSSelector* selector = [[CSSSelector alloc] initWithString: source];
32 | NSString* result = [selector description];
33 | assert([result isEqualToString: expected]);
34 | [selector release];
35 | }
36 |
37 | +(void)assertWithCSSSelectorString:(NSString*)source{
38 | [self assertWithCSSSelectorString: source expected: source];
39 | }
40 |
41 | +(void)testCSSSelector{
42 | //should handle tag
43 | [self assertWithCSSSelectorString: @"foo"];
44 |
45 | //should handle class
46 | [self assertWithCSSSelectorString: @".class"];
47 |
48 | //should handle multiple classes
49 | [self assertWithCSSSelectorString: @".class.another_class"];
50 |
51 | //should handle id
52 | [self assertWithCSSSelectorString: @"#identifier"];
53 |
54 | //should handle id and tag
55 | [self assertWithCSSSelectorString: @"foo#identifier"];
56 |
57 | //should handle class and tag
58 | [self assertWithCSSSelectorString: @"foo.bar"];
59 |
60 | //should handle attr
61 | [self assertWithCSSSelectorString: @"foo[bar]"];
62 |
63 | //should handle attr & value w/o quotes
64 | [self assertWithCSSSelectorString: @"foo[bar=23]" expected: @"foo[bar='23']"];
65 |
66 | //should handle attr & value w/ single quotes
67 | [self assertWithCSSSelectorString: @"foo[bar='23']"];
68 |
69 | //should handle whitespace in brackets & value w/ single quotes
70 | [self assertWithCSSSelectorString: @"foo[ bar = '23' ]" expected: @"foo[bar='23']"];
71 |
72 | //should handle attr & value w/ double quotes
73 | [self assertWithCSSSelectorString: @"foo[bar=\"23\"]" expected: @"foo[bar='23']"];
74 |
75 | //should handle descendant chains
76 | [self assertWithCSSSelectorString: @"foo bar"];
77 |
78 | //should handle successor chains
79 | [self assertWithCSSSelectorString: @"foo + bar"];
80 |
81 | //should handle child chains
82 | [self assertWithCSSSelectorString: @"foo > bar"];
83 |
84 | //should handle big and ugly
85 | [self assertWithCSSSelectorString: @"foo#ids > bar.huh + img[title]"];
86 |
87 | }
88 |
89 | +(void)testAll{
90 | [self testCSSSelector];
91 | }
92 | @end
93 |
--------------------------------------------------------------------------------
/Test/ElementParserTest.h:
--------------------------------------------------------------------------------
1 | //
2 | // ElementParserTest.h
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/21/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import
26 | #import "ElementParser.h"
27 |
28 | @interface ElementParser (Test)
29 |
30 | +(void)testElementParser;
31 | +(void)testNestedMatches;
32 | +(void)testAll;
33 |
34 |
35 | @end
36 |
--------------------------------------------------------------------------------
/Test/ElementParserTest.m:
--------------------------------------------------------------------------------
1 | //
2 | // ElementParserTest.m
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/21/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import "ElementParserTest.h"
26 | #import "NSString_HTML.h"
27 |
28 | @implementation ElementParser (Test)
29 |
30 | +(void)testElementParser{
31 | ElementParser* builder = [[ElementParser alloc] init];
32 | NSString* source = @"some test more text
";
33 | NSString* expect = @"";
34 | Element* root = [builder parseHTML: source];
35 | NSString* result = [builder description];
36 | assert([result isEqualToString: expect]);
37 |
38 | Element* body = [root selectElement: @"body"];
39 | assert([[body description] isEqualToString: @"
"]);
40 | assert([[body contentsSource] isEqualToString: @"some test more text
"]);
41 | }
42 |
43 | +(void)testShouldBeEmpty{
44 | ElementParser* builder = [[ElementParser alloc] init];
45 | assert([builder shouldBeEmptyElement: [@" " element]]);
46 | assert([builder shouldBeEmptyElement: [@" " element]]);
47 | assert(![builder shouldBeEmptyElement: [@"" element]]);
48 | assert(![builder shouldBeEmptyElement: [@"
" element]]);
49 | }
50 |
51 | +(void)testFeedPerf{
52 | NSString* file = [[NSBundle mainBundle] pathForResource: @"gizmodo" ofType: @"xml"];
53 | NSString* source = [NSString stringWithContentsOfFile: file];
54 | assert(source);
55 |
56 | int runs = 10;
57 |
58 | NSTimeInterval start;
59 | start = [NSDate timeIntervalSinceReferenceDate];
60 | for (int i = 0; i < runs; i++){
61 | Element* root = [Element parseXML: source];
62 | NSArray* items = [root selectElements: @"item"];
63 | for (Element* item in items){
64 | [[item selectElement: @"title"] contentsText];
65 | NSString* description = [[item selectElement: @"description"] contentsText];
66 |
67 | Element* descriptionDocument = [Element parseHTML: description];
68 | [descriptionDocument contentsText];
69 | [[descriptionDocument selectElement: @"img"] attribute: @"src"];
70 | }
71 | }
72 |
73 | NSLog(@"%i runs processing feed: %f", runs, [NSDate timeIntervalSinceReferenceDate] - start);
74 | }
75 |
76 | +(void)testNestedMatches{
77 | NSString* source = @"
";
78 | ElementParser* parser = [[[ElementParser alloc] init] autorelease];
79 | DocumentRoot* root = [parser parseHTML: source];
80 | NSArray* result = [root selectElements: @"div.x a"];
81 | assert([result count] == 1);
82 | }
83 |
84 | +(void)testFeed{
85 | NSString* file = [[NSBundle mainBundle] pathForResource: @"gizmodo" ofType: @"xml"];
86 | NSString* source = [NSString stringWithContentsOfFile: file];
87 | assert(source);
88 |
89 | Element* root = [Element parseXML: source];
90 | NSArray* items = [root selectElements: @"item"];
91 | for (Element* item in items){
92 | NSString* title = [[item selectElement: @"title"] contentsText];
93 | NSString* description = [[item selectElement: @"description"] contentsText];
94 |
95 | Element* descriptionDocument = [Element parseHTML: description];
96 | NSString* strippedDescr = [descriptionDocument contentsText];
97 | NSString* descrImg = [[descriptionDocument selectElement: @"img"] attribute: @"src"];
98 |
99 | NSLog(@"\n\n%@\n%i chars in descr beginning with: %@\nStripped:%@\nImage: %@", title, [description length], [description substringToIndex: MIN([description length], 32)], [strippedDescr substringToIndex: MIN([strippedDescr length], 32)], descrImg);
100 | }
101 |
102 | assert([items count] == 40);
103 | }
104 |
105 | +(void)testAll{
106 | [self testFeedPerf];
107 | [self testFeed];
108 | [self testElementParser];
109 | [self testShouldBeEmpty];
110 | }
111 |
112 |
113 | @end
114 |
--------------------------------------------------------------------------------
/Test/Element_Test.h:
--------------------------------------------------------------------------------
1 | //
2 | // Element_Test.h
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/21/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see
.
23 | //
24 |
25 | #import
26 | #import "Element.h"
27 |
28 | @interface Element (Test)
29 |
30 | +(void)testElement;
31 | +(void)testAttribute;
32 | +(void)testContentsOfChildren;
33 | +(void)testSelectElements;
34 | +(void)testElementWithCSSSelector;
35 | +(void)testElementContentsWithCSSSelector;
36 | +(void)testAll;
37 |
38 | @end
39 |
--------------------------------------------------------------------------------
/Test/Element_Test.m:
--------------------------------------------------------------------------------
1 | //
2 | // Element_Test.m
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/21/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import "Element_Test.h"
26 | #import "NSString_HTML.h"
27 | #import "DocumentRoot.h"
28 |
29 | @implementation Element (Test)
30 |
31 |
32 | +(void)testElement{
33 | NSString* result;
34 | Element* element;
35 |
36 | // should handle simple tagname
37 | result = [[@"" element] tagName];
38 | assert([result isEqualToString: @"foo"]);
39 |
40 | // should handle element with attributes
41 | element = [@"" element];
42 | result = [element tagName];
43 | assert([result isEqualToString: @"foo"]);
44 | assert([[element attribute: @"att"] isEqualToString: @"23"]);
45 |
46 | //should handle attributes
47 | element = [@"" element];
48 | assert([[element attributes] count] == 3);
49 | assert([[element attribute: @"att1"] isEqualToString: @"23"]);
50 | assert([[element attribute: @"att2"] isEqualToString: @"red"]);
51 | assert([[element attribute: @"att3"] isEqualToString: @"what"]);
52 | }
53 |
54 | +(void)testAttribute{
55 | NSString* result;
56 |
57 | // should handle missing attr
58 | result = [[@"" element] attribute: @"bar"];
59 | assert(result == nil);
60 |
61 | // should handle attr
62 | result = [[@"" element] attribute: @"bar"];
63 | assert([result isEqualToString: @"23"]);
64 |
65 | // should handle attr amoung others
66 | result = [[@"" element] attribute: @"bar"];
67 | assert([result isEqualToString: @"23"]);
68 |
69 | // should handle attr with quotes
70 | result = [[@"" element] attribute: @"bar"];
71 | assert([result isEqualToString: @"huh"]);
72 |
73 | // should handle attr without values
74 | result = [[@"" element] attribute: @"bar"];
75 | assert([result isEqualToString: @"goo"]);
76 | }
77 |
78 | +(void)testContentsOfChildren{
79 | Element* document = [Element parseXML: @"lee 919-971-1377 "];
80 | Element* item = [document selectElement:@"item"];
81 | NSDictionary* kids = [item contentsOfChildren];
82 | assert([[kids objectForKey: @"name"] isEqualToString: @"lee"]);
83 | assert([[kids objectForKey: @"phone"] isEqualToString: @"919-971-1377"]);
84 | }
85 |
86 | +(void)testSelectElements{
87 | Element* root = [Element parseXML: @"- goo
- foo
"];
88 | NSArray* found = [root selectElements: @"item"];
89 | assert([[found objectAtIndex: 0] isEqualToString: @"- "]);
90 | assert([[found objectAtIndex: 1] isEqualToString: @"
- "]);
91 | }
92 |
93 | +(void)testSelectElement:(NSString*)source selector:(NSString*)sel expect:(NSString*)expect{
94 | Element* root = [Element parseHTML: source];
95 | // NSLog([root dumpTree]);
96 | Element* found = [root selectElement: sel];
97 | assert([[found description] isEqualToString: expect]);
98 | }
99 |
100 | +(void)testElementWithCSSSelector{
101 |
102 | //should handle single single part
103 | [self testSelectElement: @"
"
104 | selector: @"img"
105 | expect: @" "];
106 |
107 | //should handle multiple parts
108 | [self testSelectElement: @" "
109 | selector: @"body img"
110 | expect: @" "];
111 |
112 | //should handle multiple parts with more complicated parts
113 | [self testSelectElement: @" "
114 | selector: @"body img[src='foo']"
115 | expect: @" "];
116 |
117 | //should handle a class
118 | [self testSelectElement: @" "
119 | selector: @"body a.one"
120 | expect: @""];
121 |
122 | //should handle a class amoung more than one
123 | [self testSelectElement: @" "
124 | selector: @"body a.two"
125 | expect: @""];
126 |
127 | //should handle multiple classes amoung
128 | [self testSelectElement: @" "
129 | selector: @"body a.one.two"
130 | expect: @""];
131 |
132 | //should handle multiple an id
133 | [self testSelectElement: @" "
134 | selector: @"#bob img"
135 | expect: @" "];
136 |
137 | //should handle multiple parts with misses
138 | [self testSelectElement: @" "
139 | selector: @"body img[src='goo']"
140 | expect: @" "];
141 |
142 | //should handle successor verb
143 | [self testSelectElement: @"here there three "
144 | selector: @"body a + img"
145 | expect: @" "];
146 |
147 | //should handle child verb
148 | [self testSelectElement: @" "
149 | selector: @"body a > img"
150 | expect: @" "];
151 |
152 | }
153 |
154 | +(void)testSelectElemenContents:(NSString*)source selector:(NSString*)sel expect:(NSString*)expect{
155 | Element* root = [Element parseHTML: source];
156 | Element* found = [root selectElement: sel];
157 | assert([[found contentsSource] isEqualToString: expect]);
158 | }
159 |
160 | +(void)testElementContentsWithCSSSelector{
161 | //should handle child verb
162 | [self testSelectElemenContents: @"not this some real text "
163 | selector: @"body a[href='2']"
164 | expect: @"some real text "];
165 | }
166 | +(void)testAll{
167 | [self testElement];
168 | [self testAttribute];
169 | [self testContentsOfChildren];
170 | [self testSelectElements];
171 | [self testElementWithCSSSelector];
172 | [self testElementContentsWithCSSSelector];
173 |
174 | }
175 | @end
176 |
--------------------------------------------------------------------------------
/Test/NSString_HTML_Test.h:
--------------------------------------------------------------------------------
1 | //
2 | // NSString_HTML_Test.h
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/16/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import
26 |
27 |
28 | @interface NSString (HTML_Test)
29 | +(void)testStripTags;
30 | +(void)testParseHTML;
31 | +(void)testStringByReplacingEntities;
32 |
33 | +(void)testAllHTMLTest;
34 |
35 | @end
36 |
--------------------------------------------------------------------------------
/Test/NSString_HTML_Test.m:
--------------------------------------------------------------------------------
1 | //
2 | // NSString_HTML_Test.m
3 | // Thumbprint
4 | //
5 | // Created by Lee Buck on 4/16/09.
6 | // Copyright 2009 Blue Bright Ventures. All rights reserved.
7 | //
8 | // This program is free software: you can redistribute it and/or modify
9 | // it under the terms of the GNU General Public License as published by
10 | // the Free Software Foundation, either version 3 of the License, or
11 | // (at your option) any later version.
12 |
13 | // This program is distributed in the hope that it will be useful,
14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | // GNU General Public License for more details.
17 |
18 | // Commercial licences without many of the obligations of GPL
19 | // are available for a nomial fee at sales@touchtankapps.com.
20 |
21 | // You should have received a copy of the GNU General Public License
22 | // along with this program. If not, see .
23 | //
24 |
25 | #import "NSString_HTML_Test.h"
26 | #import "NSString_HTML.h"
27 | #import "NSString_Additions.h"
28 | #import "Element.h"
29 | #import "Chunk.h"
30 | #import "ElementParser.h"
31 |
32 |
33 | @implementation NSString (HTML_Test)
34 |
35 | +(void)testStripTags{
36 | NSString* result;
37 | // should strip tags
38 | result = [@"this is some text " stripTags];
39 | assert([result isEqualToString: @"this is some text"]);
40 |
41 | //should handle text outside of tags
42 | result = [@"outside some stuff this is some text " stripTags];
43 | assert([result isEqualToString: @"outside some stuff this is some text"]);
44 |
45 | //should handle entities
46 | result = [@"this is some text & and more " stripTags];
47 | assert([result isEqualToString: @"this is some text & and more"]);
48 |
49 | //should handle bad entities
50 | result = [@"this is some text & and more " stripTags];
51 | assert([result isEqualToString: @"this is some text & and more"]);
52 |
53 | // should replace p tags with returns (and BRs)
54 | result = [@"this is some text
and more " stripTags];
55 | assert([result isEqualToString: @"this is \n\nsome text and more"]);
56 |
57 | //should handle not including script tags
58 | result = [@"this is some text and more
" stripTags];
59 | assert([result isEqualToString: @"this is \n\nsome text and more"]);
60 |
61 | //should handle illformed html... (and BRs)
62 | result = [@"this is some text and more
" stripTags];
63 | assert([result isEqualToString: @"this is \n\nsome text \n\nand more"]);
64 | }
65 |
66 |
67 |
68 | +(id)chunk:(Chunk*)chunk context:(NSMutableString*)result{
69 | [result appendString: [chunk interiorString]];
70 | [result appendString: @"|"];
71 | return self;//continue
72 | }
73 |
74 | +(void)testParseHTML{
75 | NSMutableString* result = [@"|" mutableCopy];
76 | NSString* source = @"some text "goes here ";
77 | [NSString parseHTML: source delegate: self selector: @selector(chunk:context:) context: result];
78 | assert([result isEqualToString: @"|foo|some |ignoreme|text |quot|goes here|/foo|b class='huh'|c /|/b|"]);
79 | }
80 |
81 | +(void)testStringByReplacingEntities{
82 | NSString* result;
83 |
84 | //should handle no entities
85 | result = [@"foo" stringByReplacingEntities];
86 | assert([result isEqualToString: @"foo"]);
87 |
88 | //should handle entity at start
89 | result = [@">foo" stringByReplacingEntities];
90 | assert([result isEqualToString: @">foo"]);
91 |
92 | //should handle entity at end
93 | result = [@"foo<" stringByReplacingEntities];
94 | assert([result isEqualToString: @"foo<"]);
95 |
96 | //should handle unknown entity
97 | result = [@"foo&dddlt;" stringByReplacingEntities];
98 | assert([result isEqualToString: @"foo&dddlt;"]);
99 |
100 | //should handle badly formed entity
101 | result = [@"foo&dddlt" stringByReplacingEntities];
102 | assert([result isEqualToString: @"foo&dddlt"]);
103 | }
104 |
105 |
106 |
107 | +(void)testStartsWithStr{
108 | NSString* string = @"foo 23 oa";
109 | CFStringInlineBuffer buffer;
110 |
111 | CFRange range = CFRangeMake(0, [string length]);
112 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range);
113 |
114 | //should match or not
115 | assert(startsWithStr(&buffer, 1, "oo"));
116 | assert(!startsWithStr(&buffer, 0, "oo"));
117 | assert(!startsWithStr(&buffer, 5, "oo"));
118 | }
119 |
120 | +(void)testLenEntityName{
121 | NSString* string = @"#foo;ng";
122 | CFStringInlineBuffer buffer;
123 |
124 | CFRange range = CFRangeMake(0, [string length]);
125 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range);
126 |
127 | //should match or not
128 | assert(lenEntityName(&buffer, 0)==5);
129 | assert(lenEntityName(&buffer, 5)==0);
130 | }
131 |
132 | +(void)testLenThruOr{
133 | NSString* string = @"foo 23 oa";
134 | CFStringInlineBuffer buffer;
135 |
136 | CFRange range = CFRangeMake(0, [string length]);
137 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range);
138 |
139 | //lenThruOr
140 | assert(lenThruOr(&buffer, 0, 'o', '2')==1);
141 | assert(lenThruOr(&buffer, 0, '2', '1')==4);
142 | assert(lenThruOr(&buffer, 0, 'w', '2')==4);
143 | assert(lenThruOr(&buffer, 0, 'x', 'z')==9);
144 | assert(lenThruOr(&buffer, 3, 'o', '2')==1);
145 | }
146 |
147 | +(void)testLenThru{
148 | NSString* string = @"foo 23 oa";
149 | CFStringInlineBuffer buffer;
150 |
151 | CFRange range = CFRangeMake(0, [string length]);
152 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range);
153 |
154 | assert(lenThru(&buffer, 2, "23")==4);
155 | assert(lenThru(&buffer, 0, "23")==6);
156 | assert(lenThru(&buffer, 0, "oa")==9);
157 | assert(lenThru(&buffer, 0, "XXX")==0);
158 |
159 | }
160 |
161 | +(void)testSkipNonWhitespace{
162 | NSString* string = @"foo 23 oa";
163 | CFStringInlineBuffer buffer;
164 |
165 | CFRange range = CFRangeMake(0, [string length]);
166 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range);
167 |
168 | CFIndex index = 0;
169 | // skips non white
170 | assert(skipNonWhitespace(&buffer, &index)==' ');
171 | assert(index == 3);
172 |
173 | //don't skip if already nonwhite
174 | assert(skipNonWhitespace(&buffer, &index)==' ');
175 | assert(index == 3);
176 |
177 | //return 0 if hit end of string before white char
178 | index = 7;
179 | assert(skipNonWhitespace(&buffer, &index)==0);
180 | assert(index == 9);
181 | }
182 |
183 | +(void)testSkipWhitespace{
184 | NSString* string = @"foo 23 oa";
185 | CFStringInlineBuffer buffer;
186 |
187 | CFRange range = CFRangeMake(0, [string length]);
188 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range);
189 |
190 | CFIndex index;
191 |
192 | //skip a white
193 | index = 3;
194 | assert(skipWhitespace(&buffer, &index)=='2');
195 | assert(index == 4);
196 |
197 | // don't skip if already non white
198 | assert(skipWhitespace(&buffer, &index)=='2');
199 | assert(index == 4);
200 |
201 | }
202 |
203 | +(void)testLenToken{
204 | NSString* string = @"foo 23 oa";
205 | CFStringInlineBuffer buffer;
206 |
207 | CFRange range = CFRangeMake(0, [string length]);
208 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range);
209 |
210 | assert(lenToken(&buffer, 0)==3);
211 | // should return 0 if not in a token
212 | assert(lenToken(&buffer, 3)==0);
213 |
214 | }
215 | +(void)testParseEntity{
216 | NSString* string = @"foo 23 oa";
217 | CFStringInlineBuffer buffer;
218 |
219 | CFRange range = CFRangeMake(0, [string length]);
220 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range);
221 |
222 | CFIndex len;
223 |
224 | string = @"&";
225 | range = CFRangeMake(0, [string length]);
226 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range);
227 | assert(parseEntity(&buffer, 0, &len)=='&');
228 |
229 | string = @"'";
230 | range = CFRangeMake(0, [string length]);
231 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range);
232 | assert(parseEntity(&buffer, 0, &len)=='\'');
233 |
234 | string = @" ";
235 | range = CFRangeMake(0, [string length]);
236 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range);
237 | char c = parseEntity(&buffer, 0, &len);
238 | assert(c==' ');
239 |
240 | string = @" ";
241 | range = CFRangeMake(0, [string length]);
242 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range);
243 | assert(parseEntity(&buffer, 0, &len)==' ');
244 |
245 | string = @"&foo;";
246 | range = CFRangeMake(0, [string length]);
247 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range);
248 | assert(parseEntity(&buffer, 0, &len)==0);
249 |
250 | string = @"&mdas";
251 | range = CFRangeMake(0, [string length]);
252 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range);
253 | assert(parseEntity(&buffer, 0, &len)==0);
254 |
255 | }
256 |
257 | +(void)testLenDoctype{
258 | NSString* string = @"";
259 | CFStringInlineBuffer buffer;
260 | CFRange range = CFRangeMake(0, [string length]);
261 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range);
262 | assert(lenDoctype(&buffer, 0)==[string length]);
263 |
264 | string = @"";
265 | range = CFRangeMake(0, [string length]);
266 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range);
267 | assert(lenDoctype(&buffer, 0)==[string length]);
268 |
269 | }
270 |
271 | +(void)testHexValue{
272 | // should handle vanilla input
273 | assert([@"12" hexValue] == 18);
274 | // should handle upper case digits
275 | assert([@"1A" hexValue] == 26);
276 | // should handle lower case digits
277 | assert([@"1c" hexValue] == 28);
278 | // should handle bad characters by stopping
279 | assert([@"10g34" hexValue] == 16);
280 | // should handle empty string by returning 0
281 | assert([@"" hexValue] == 0);
282 | // should handle bad strings by returning 0
283 | assert([@"wywt" hexValue] == 0);
284 | }
285 |
286 |
287 | +(void)testAllHTMLTest{
288 | NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
289 | [NSString testStartsWithStr];
290 | [NSString testLenEntityName];
291 | [NSString testLenThruOr];
292 | [NSString testLenThru];
293 | [NSString testSkipNonWhitespace];
294 | [NSString testSkipWhitespace];
295 | [NSString testLenToken];
296 | [NSString testParseEntity];
297 | [NSString testLenDoctype];
298 | [NSString testStripTags];
299 | [NSString testParseHTML];
300 | [NSString testStringByReplacingEntities];
301 | [NSString testHexValue];
302 | [pool release];
303 | }
304 | @end
305 |
306 |
--------------------------------------------------------------------------------