├── .gitignore ├── Classes ├── CDataChunk.h ├── CDataChunk.m ├── CSSPartMatcher.h ├── CSSPartMatcher.m ├── CSSSelector.h ├── CSSSelector.m ├── CSSSelectorMatcher.h ├── CSSSelectorMatcher.m ├── CSSSelectorPart.h ├── CSSSelectorPart.m ├── Chunk.h ├── Chunk.m ├── CommentChunk.h ├── CommentChunk.m ├── DoctypeChunk.h ├── DoctypeChunk.m ├── DocumentRoot.h ├── DocumentRoot.m ├── Element.h ├── Element.m ├── ElementParser.h ├── ElementParser.m ├── EntityChunk.h ├── EntityChunk.m ├── NSString_HTML.h ├── NSString_HTML.m ├── ProcessingInstructionChunk.h ├── ProcessingInstructionChunk.m ├── TagChunk.h ├── TagChunk.m ├── TxtChunk.h ├── TxtChunk.m ├── URLParser.h └── URLParser.m ├── Code Overview.txt ├── Demo ├── Classes │ ├── DemoAppDelegate.h │ ├── DemoAppDelegate.m │ ├── DemoViewController.h │ └── DemoViewController.m ├── Demo-Info.plist ├── Demo.xcodeproj │ └── project.pbxproj ├── DemoViewController.xib ├── Demo_Prefix.pch ├── MainWindow.xib ├── main.m ├── source.html └── source2.html ├── ElementParser.xcodeproj └── project.pbxproj ├── ElementParser_Prefix.pch ├── GPL v3 Liscense.pdf ├── HTML Entities.plist ├── Read Me.txt └── Test ├── .DS_Store ├── CSSSelectorTest.h ├── CSSSelectorTest.m ├── ElementParserTest.h ├── ElementParserTest.m ├── Element_Test.h ├── Element_Test.m ├── NSString_HTML_Test.h ├── NSString_HTML_Test.m └── gizmodo.xml /.gitignore: -------------------------------------------------------------------------------- 1 | # the build 2 | build 3 | 4 | # temp nibs and swap files 5 | *~.nib 6 | *.swp 7 | 8 | # OS X folder attributes 9 | .DS_Store 10 | 11 | # user-specific XCode stuff 12 | *.mode1v3 13 | *.mode2v3 14 | *.pbxuser 15 | *.perspectivev3 16 | 17 | -------------------------------------------------------------------------------- /Classes/CDataChunk.h: -------------------------------------------------------------------------------- 1 | // 2 | // CDataChunk.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "Chunk.h" 27 | 28 | /** 29 | CDataChunk a chunk corresponding to a CDATA section 30 | */ 31 | @interface CDataChunk : Chunk { 32 | 33 | } 34 | 35 | @end 36 | -------------------------------------------------------------------------------- /Classes/CDataChunk.m: -------------------------------------------------------------------------------- 1 | // 2 | // CDataChunk.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "CDataChunk.h" 26 | 27 | 28 | @implementation CDataChunk 29 | 30 | -(NSString*)kind{ 31 | return ChunkKindCData; 32 | } 33 | 34 | -(NSRange)interiorRange{ 35 | return NSMakeRange(range.location + 9, range.length - 12); 36 | } 37 | 38 | +(NSString*)humanName{ 39 | return @"cdata"; 40 | } 41 | 42 | @end 43 | -------------------------------------------------------------------------------- /Classes/CSSPartMatcher.h: -------------------------------------------------------------------------------- 1 | // 2 | // CSSPartMatcher.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/19/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "Element.h" 27 | @class CSSSelectorMatcher; 28 | 29 | /** 30 | * Responsible for representing a successful match on a part. 31 | * It is presented elements in an attempt to complete the next part of the match 32 | * 33 | */ 34 | @interface CSSPartMatcher : NSObject { 35 | CSSSelectorMatcher* selectorMatcher; // not retained 36 | Element* matchedElement; 37 | int matchedPartIndex; 38 | NSMutableArray* matchersForNextPart; 39 | } 40 | @property (nonatomic, retain) Element* matchedElement; 41 | @property int matchedPartIndex; 42 | 43 | -(id)initWithElement:(Element*) anElement selectorMatcher:(CSSSelectorMatcher*)aSelectorMatcher; 44 | //-(void)pruneMatchesForElement:(Element*)anElement; 45 | -(BOOL)matchNextElement:(Element*) nextElement forIndex: (int) index; 46 | 47 | @end 48 | -------------------------------------------------------------------------------- /Classes/CSSPartMatcher.m: -------------------------------------------------------------------------------- 1 | // 2 | // CSSPartMatcher.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/19/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "CSSPartMatcher.h" 26 | #import "CSSSelectorPart.h" 27 | #import "CSSSelectorMatcher.h" 28 | 29 | @implementation CSSPartMatcher 30 | 31 | @synthesize matchedElement, matchedPartIndex; 32 | 33 | -(id)initWithElement:(Element*) anElement selectorMatcher:(CSSSelectorMatcher*)aSelectorMatcher{ 34 | self = [super init]; 35 | matchedElement = [anElement retain]; 36 | selectorMatcher = aSelectorMatcher; 37 | return self; 38 | } 39 | 40 | -(void)dealloc{ 41 | // NSLog(@"pruned: %@", [self description]); 42 | [matchedElement release]; 43 | [matchersForNextPart release]; 44 | [super dealloc]; 45 | } 46 | 47 | /* we don't do this yet... 48 | -(void)pruneMatchesForElement: (Element*)anElement{ 49 | if (!matchersForNextPart) return; 50 | for (CSSPartMatcher* match in matchersForNextPart){ 51 | if ([match scopeElement] == anElement) 52 | [matchersForNextPart removeObject: match]; 53 | else 54 | [match pruneMatchesForElement: anElement]; 55 | } 56 | } 57 | */ 58 | 59 | -(void)addNextMatch:(Element*)nextElement withIndex:(int)index{ 60 | CSSPartMatcher* nextMatch = [[CSSPartMatcher alloc] initWithElement: nextElement selectorMatcher: selectorMatcher]; 61 | nextMatch.matchedPartIndex = index; 62 | if (!matchersForNextPart) 63 | matchersForNextPart = [[NSMutableArray alloc] initWithCapacity: 4]; 64 | [matchersForNextPart addObject: nextMatch]; 65 | [nextMatch release]; 66 | } 67 | 68 | -(BOOL)matchNextElement:(Element*) nextElement forIndex: (int) index{ 69 | CSSSelectorPart* nextPart = [[selectorMatcher selector] partAtIndex: index]; 70 | CSSVerb nextVerb = [[selectorMatcher selector] verbAtIndex: index]; 71 | BOOL verbMatches = NO; 72 | if ([nextPart matchesElement: nextElement]){ 73 | if (nextVerb == CSSVerbAny) 74 | verbMatches = YES; 75 | else if (nextVerb == CSSVerbDescendant) 76 | verbMatches = [nextElement hasAncestor: self.matchedElement];//wasteful to not prune matches as they go out of scope 77 | else if (nextVerb == CSSVerbChild) 78 | verbMatches = nextElement.parent == self.matchedElement; 79 | else if (nextVerb == CSSVerbSuccessor) 80 | verbMatches = nextElement == self.matchedElement.nextSybling; 81 | } 82 | 83 | BOOL completeMatch = verbMatches && (index == [[selectorMatcher selector] countOfParts] - 1); 84 | 85 | if (matchersForNextPart){ 86 | for (CSSPartMatcher* match in matchersForNextPart){ 87 | completeMatch = completeMatch || [match matchNextElement: nextElement forIndex: index + 1]; 88 | } 89 | } 90 | 91 | if (!completeMatch && verbMatches)//actually part and verb match 92 | [self addNextMatch: nextElement withIndex: index]; 93 | 94 | return completeMatch; 95 | } 96 | 97 | -(CSSSelectorPart*)matchedPart{ 98 | return [[selectorMatcher selector] partAtIndex: matchedPartIndex]; 99 | } 100 | 101 | -(NSString*)description{ 102 | return [NSString stringWithFormat: @"%@ matched %@ -- %i matchersForNextPart", [[self matchedPart] description], [matchedElement description], (matchersForNextPart) ? [matchersForNextPart count] : 0]; 103 | } 104 | 105 | @end 106 | -------------------------------------------------------------------------------- /Classes/CSSSelector.h: -------------------------------------------------------------------------------- 1 | // 2 | // CSSSelector.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/17/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "Element.h" 27 | 28 | @class CSSSelectorMatcher; 29 | @class CSSSelectorPart; 30 | 31 | #define CSSVerbChild @" > " 32 | #define CSSVerbSuccessor @" + " 33 | #define CSSVerbDescendant @" " 34 | #define CSSVerbAny @"" 35 | #define CSSVerb NSString* 36 | 37 | /** 38 | * CSSSelector is responsible for modeling a chain of CSSSelectorParts. For example 39 | * 40 | * body a.link 41 | * 42 | * is a chain of two parts "body" and "a.link" 43 | * 44 | * Parts are joined by "verbs" which correspond to symbols " ", "+", and ">" 45 | * These parts define the relative position of the second part to the first 46 | * Supported parts are: 47 | * space within - the second part must match an Element within the 48 | * Element matching the first part 49 | * 50 | * > child - the second part must match an Element whose parent is 51 | * the Element matching the first part 52 | * 53 | * + successor - the second part must match an Element whose previous 54 | * sybling was the Element matching the first part 55 | */ 56 | 57 | @interface CSSSelector : NSObject { 58 | NSMutableArray* chain; 59 | } 60 | -(id)initWithString:(NSString*)string; 61 | -(NSString*)description; 62 | 63 | -(int)countOfParts; 64 | -(CSSSelectorPart*)partAtIndex:(int)index; 65 | -(CSSVerb)verbAtIndex:(int)index; 66 | -(CSSVerb)verbAfterIndex:(int)index; 67 | 68 | @end 69 | -------------------------------------------------------------------------------- /Classes/CSSSelector.m: -------------------------------------------------------------------------------- 1 | // 2 | // CSSSelector.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/17/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "CSSSelector.h" 26 | #import "CSSSelectorPart.h" 27 | #import "NSString_HTML.h" 28 | #import "CSSSelectorMatcher.h" 29 | 30 | 31 | @implementation CSSSelector 32 | 33 | 34 | -(id)initWithString:(NSString*)string{ 35 | CFStringInlineBuffer buffer; 36 | CFRange range = CFRangeMake(0, [string length]); 37 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 38 | 39 | chain = [[NSMutableArray alloc] initWithCapacity: 10]; 40 | unichar c; 41 | CFIndex index = 0; 42 | while (c = skipWhitespace(&buffer, &index)){ 43 | CSSSelectorPart* part = [[CSSSelectorPart alloc] initWithIndex: &index inBuffer: &buffer]; 44 | [chain addObject: part]; 45 | [part release]; 46 | 47 | c = skipWhitespace(&buffer, &index); 48 | if (!c) break; 49 | 50 | if (c=='+'){ 51 | [chain addObject: CSSVerbSuccessor]; 52 | index++; 53 | } 54 | else if (c=='>'){ 55 | [chain addObject: CSSVerbChild]; 56 | index++; 57 | } 58 | else 59 | [chain addObject: CSSVerbDescendant]; 60 | } 61 | 62 | return self; 63 | } 64 | 65 | -(void)dealloc{ 66 | // NSLog(@"disposing of %@", [self description]); 67 | [chain release]; 68 | [super dealloc]; 69 | } 70 | 71 | -(NSString*)description{ 72 | NSMutableString* result = [NSMutableString string]; 73 | for (id item in chain){ 74 | [result appendString: [item description]]; 75 | } 76 | return result; 77 | } 78 | 79 | -(int)countOfParts{ 80 | return ([chain count] + 1) / 2; 81 | } 82 | -(CSSSelectorPart*)partAtIndex:(int)index{ 83 | return [chain objectAtIndex: index * 2]; 84 | } 85 | 86 | -(CSSVerb)verbAtIndex:(int)index{ 87 | return (index > 0) ? [chain objectAtIndex: index * 2 - 1] : CSSVerbAny; 88 | } 89 | 90 | // sometime we need to access the next verb after an index... see scopingElement 91 | -(CSSVerb)verbAfterIndex:(int)index{ 92 | return (index < [self countOfParts] - 1) ? [self verbAtIndex: index + 1] : CSSVerbAny; 93 | } 94 | 95 | @end 96 | 97 | 98 | -------------------------------------------------------------------------------- /Classes/CSSSelectorMatcher.h: -------------------------------------------------------------------------------- 1 | // 2 | // CSSSelectorMatcher.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/19/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "Element.h" 27 | #import "CSSSelector.h" 28 | #import "CSSPartMatcher.h" 29 | 30 | /** 31 | * Responsible for matching a CSSSelector. 32 | * It does this by minting matching parts and creating 33 | * CSSPartMatchers for all intermediate potential matches 34 | * 35 | */ 36 | 37 | @interface CSSSelectorMatcher : NSObject { 38 | CSSSelector* selector; 39 | CSSPartMatcher* rootMatch; 40 | NSMutableArray* matches; 41 | } 42 | @property (nonatomic, retain) CSSSelector* selector; 43 | @property (nonatomic, retain) NSMutableArray* matches; 44 | 45 | -(id)initWithSelector:(CSSSelector*)selector; 46 | -(BOOL)matchElement:(Element*) element; 47 | -(Element*)firstMatch; 48 | @end 49 | -------------------------------------------------------------------------------- /Classes/CSSSelectorMatcher.m: -------------------------------------------------------------------------------- 1 | // 2 | // CSSSelectorMatcher.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/19/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "CSSSelectorMatcher.h" 26 | #import "CSSSelectorPart.h" 27 | 28 | @implementation CSSSelectorMatcher 29 | 30 | @synthesize selector, matches; 31 | 32 | -(id)initWithSelector:(CSSSelector*)aSelector{ 33 | self = [super init]; 34 | selector = [aSelector retain]; 35 | rootMatch = [[CSSPartMatcher alloc] initWithElement: nil selectorMatcher: self]; 36 | matches = [[NSMutableArray alloc] initWithCapacity: 1]; 37 | return self; 38 | } 39 | 40 | -(void)dealloc{ 41 | [selector release]; 42 | [rootMatch release]; 43 | [matches release]; 44 | [super dealloc]; 45 | } 46 | 47 | -(Element*)firstMatch{ 48 | return ([matches count] > 0) ? [matches objectAtIndex: 0] : nil; 49 | } 50 | 51 | -(BOOL)matchElement:(Element*) element{ 52 | if ([element isCloseTag]) return NO; 53 | BOOL matchComplete = [rootMatch matchNextElement: element forIndex: 0]; 54 | if (matchComplete) 55 | [matches addObject: element]; 56 | return matchComplete; 57 | } 58 | 59 | 60 | @end 61 | -------------------------------------------------------------------------------- /Classes/CSSSelectorPart.h: -------------------------------------------------------------------------------- 1 | // 2 | // CSSSelectorPart.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/17/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "Element.h" 27 | 28 | /** 29 | * CSSSelectorPart is responsible for modeling one part of CSSSelector. For example 30 | * 31 | * a.link[target] 32 | * 33 | * is a part which matches tags which have a link class name and an attribute 'target' 34 | * 35 | * A part can consist of one or more of the following: 36 | * 37 | * * All elements match (used when no tagname is supplied) 38 | * tagname Matching elements have this tag name 39 | * #id Matching elements have this as their id attribute 40 | * .class Matching elements have this as one of their class names 41 | * [attr] Matching elements have this attribute (regarless of its value) 42 | * [attr=val] Matching elements have this attribute with this value 43 | * 44 | */ 45 | 46 | 47 | @interface CSSSelectorPart : NSObject { 48 | NSString* identifier; 49 | NSString* tag; 50 | NSMutableArray* classNames; 51 | NSString* attrName; 52 | NSString* attrValue; 53 | } 54 | 55 | 56 | @property (nonatomic, retain) NSString* identifier; 57 | @property (nonatomic, retain) NSString* tag; 58 | @property (nonatomic, retain) NSArray* classNames; 59 | @property (nonatomic, retain) NSString* attrName; 60 | @property (nonatomic, retain) NSString* attrValue; 61 | 62 | -(id)initWithIndex:(int*) index inString:(NSString*)string; 63 | -(id)initWithIndex:(CFIndex*) index inBuffer:(CFStringInlineBuffer*)buffer; 64 | -(NSString*)description; 65 | -(BOOL)matchesElement:(Element*)element; 66 | 67 | @end 68 | -------------------------------------------------------------------------------- /Classes/CSSSelectorPart.m: -------------------------------------------------------------------------------- 1 | // 2 | // CSSSelectorPart.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/17/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "CSSSelectorPart.h" 26 | #import "NSString_HTML.h" 27 | 28 | @implementation CSSSelectorPart 29 | 30 | @synthesize identifier, tag, classNames, attrName, attrValue; 31 | 32 | -(id)initWithIndex:(int*) index inString:(NSString*)string{ 33 | CFStringInlineBuffer buffer; 34 | CFRange range = CFRangeMake(0, [string length]); 35 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 36 | CFIndex i = 0; 37 | self = [self initWithIndex: &i inBuffer: &buffer]; 38 | *index = i; 39 | return self; 40 | 41 | } 42 | 43 | -(id)initWithIndex:(CFIndex*) index inBuffer:(CFStringInlineBuffer*)buffer{ 44 | unichar c; 45 | CFIndex len; 46 | c = skipWhitespace(buffer, index); 47 | while (c > 32){ 48 | if (c=='#'){ 49 | len = lenToken(buffer, *index + 1); 50 | assert(len); 51 | self.identifier = createStringFromBuffer(buffer, *index + 1, len); 52 | [self.identifier release];//retained by property setter 53 | (*index) += len + 1; 54 | } 55 | else if (c == '.'){ 56 | len = lenToken(buffer, *index + 1); 57 | assert(len); 58 | NSString* className = createStringFromBuffer(buffer, *index + 1, len); 59 | if (!classNames) 60 | classNames = [[NSMutableArray alloc] initWithObjects: className, nil]; 61 | else 62 | [classNames addObject: className]; 63 | [className release]; 64 | (*index) += len + 1; 65 | } 66 | else if (c == '['){ 67 | (*index)++; 68 | c = skipWhitespace(buffer, index); 69 | len = lenToken(buffer, *index); 70 | assert(len); 71 | self.attrName = createStringFromBuffer(buffer, *index, len); 72 | [self.attrName release];//retained by property setter 73 | (*index) += len; 74 | 75 | c = skipWhitespace(buffer, index); 76 | 77 | if (c == '='){ 78 | (*index)++; 79 | c = skipWhitespace(buffer, index); 80 | if (c=='\''){ 81 | len = lenThru(buffer, (*index) + 1, "'"); 82 | assert(len); 83 | self.attrValue = createStringFromBuffer(buffer, *index + 1, len-1); 84 | (*index)++; 85 | } 86 | else if (c == '"'){ 87 | len = lenThru(buffer, (*index) + 1, "\""); 88 | assert(len); 89 | self.attrValue = createStringFromBuffer(buffer, *index + 1, len-1); 90 | (*index)++; 91 | } 92 | else{ 93 | len = lenToken(buffer, (*index)); 94 | assert(len); 95 | self.attrValue = createStringFromBuffer(buffer, *index, len); 96 | } 97 | [self.attrValue release];//retained by property setter 98 | (*index) += len; 99 | c = skipWhitespace(buffer, index); 100 | } 101 | assert(c==']'); 102 | (*index) += 1; 103 | } 104 | else if (c == '*') 105 | (*index)++; 106 | else{ 107 | len = lenToken(buffer, (*index)); 108 | assert(len); 109 | self.tag = createStringFromBuffer(buffer, *index, len); 110 | [self.tag release];//retained by property setter 111 | (*index) += len; 112 | } 113 | c = CFStringGetCharacterFromInlineBuffer(buffer, *index); 114 | } 115 | return self; 116 | } 117 | 118 | -(void)dealloc{ 119 | [identifier release]; 120 | [tag release]; 121 | [classNames release]; 122 | [attrName release]; 123 | [attrValue release]; 124 | [super dealloc]; 125 | } 126 | 127 | -(BOOL)matchesElement:(Element*)element{ 128 | if(element.range.length == 0) return NO; //ElementParser's root 129 | if (tag && ![element tagNameEquals: tag]) return NO; 130 | if (identifier && ![identifier isEqualToString: [element attribute: @"id"]]) return NO; 131 | if (classNames){ 132 | for (NSString* className in classNames) 133 | if (![element hasClassName: className]) 134 | return NO; 135 | } 136 | if (attrName && attrValue && ![attrValue isEqualToString: [element attribute: attrName]]) return NO; 137 | if (attrName && ![element hasAttribute: attrName]) return NO; 138 | return YES; 139 | } 140 | 141 | -(NSString*)description{ 142 | NSMutableString* result = [NSMutableString string]; 143 | if (tag) 144 | [result appendString: tag]; 145 | if (identifier) 146 | [result appendFormat: @"#%@", identifier]; 147 | if (classNames){ 148 | for (NSString* className in classNames) 149 | [result appendFormat: @".%@", className]; 150 | } 151 | if (attrName){ 152 | [result appendFormat: @"[%@", attrName]; 153 | if (attrValue) 154 | [result appendFormat: @"='%@']", attrValue]; 155 | else 156 | [result appendString: @"]"]; 157 | } 158 | return result; 159 | } 160 | 161 | @end 162 | -------------------------------------------------------------------------------- /Classes/Chunk.h: -------------------------------------------------------------------------------- 1 | // 2 | // Chunk.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | 27 | #define ChunkKindDocument @"ChunkKindDocument" 28 | #define ChunkKindTag @"ChunkKindTag" 29 | #define ChunkKindPI @"ChunkKindPI" 30 | #define ChunkKindComment @"ChunkKindComment" 31 | #define ChunkKindEntity @"ChunkKindEntity" 32 | #define ChunkKindCData @"ChunkKindCData" 33 | #define ChunkKindDoctype @"ChunkKindDoctype" 34 | #define ChunkKindText @"ChunkKindText" 35 | 36 | /** 37 | Chunk is a range of source text that has been divided into a meaningful "chunk" by 38 | the NSString_HTML parser. Examples of a chunk include an element, a cdata section, an entity, 39 | character data, etc. It is an abstract base class that handles basic housekeeping. 40 | Subclasses include TagChunk, TxtChunk, CommentChunk, EntityChunk, etc. 41 | */ 42 | @interface Chunk : NSObject { 43 | CFStringInlineBuffer* buffer; 44 | NSString* source; 45 | NSRange range; 46 | } 47 | 48 | 49 | 50 | /** 51 | A human readable name for the chunk. Used for debugging purposes. 52 | */ 53 | +(NSString*)humanName; 54 | 55 | 56 | /** 57 | The string that contains the whole source being parsed. 58 | */ 59 | @property (nonatomic, retain) NSString* source; 60 | 61 | 62 | /** 63 | The range within the source of this chunk. Includes delimiters like '<' and '>' 64 | */ 65 | @property NSRange range; 66 | 67 | 68 | /** During the parse (only) this buffer provides faster access to individual characters */ 69 | @property CFStringInlineBuffer* buffer; 70 | 71 | 72 | /** 73 | Only some of the whole string is buffered... when this chunk is delivered by the parser 74 | the whole chunk will be available in the buffer 75 | */ 76 | @property (readonly) CFRange rangeInBuffer; 77 | 78 | 79 | /** 80 | The interior of a chunk ususally excludes the delimiters. This method does the index 81 | math to point inside the buffer. Currently only used to access the character 82 | data within a cdata section. 83 | */ 84 | @property (readonly) CFRange interiorRangeInBuffer; 85 | 86 | 87 | /** 88 | Creates a new chunk from the range aRange in aSource string 89 | */ 90 | -(id)initWithString: (NSString*)aSource range:(NSRange)aRange; 91 | 92 | 93 | /** 94 | The interior of a chunk ususally excludes the delimiters of the chunk. 95 | */ 96 | -(NSRange)interiorRange; 97 | 98 | 99 | /** 100 | Convenience method that returns a string corresponding to the interior of the chunk. 101 | */ 102 | -(NSString*)interiorString; 103 | 104 | 105 | /** 106 | Each chunk has a kind denotes what type of chunk it is. 107 | */ 108 | -(NSString*)kind; 109 | 110 | 111 | /** 112 | Convenience method to test kind 113 | */ 114 | -(BOOL)isKind:(NSString*)aKind; 115 | 116 | 117 | @end 118 | -------------------------------------------------------------------------------- /Classes/Chunk.m: -------------------------------------------------------------------------------- 1 | // 2 | // Chunk.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "Chunk.h" 26 | 27 | 28 | @implementation Chunk 29 | 30 | @synthesize source, range, buffer; 31 | 32 | -(id)initWithString: (NSString*)aSource range:(NSRange)aRange{ 33 | source = [aSource retain]; 34 | range = aRange; 35 | return self; 36 | } 37 | 38 | -(void)dealloc{ 39 | [source release]; 40 | [super dealloc]; 41 | } 42 | 43 | -(CFRange)rangeInBuffer{ 44 | if (buffer) 45 | return CFRangeMake(range.location + buffer->rangeToBuffer.location, range.length); 46 | else 47 | return CFRangeMake(kCFNotFound, 0); 48 | } 49 | 50 | -(CFRange)interiorRangeInBuffer{ 51 | if (buffer){ 52 | NSRange inRange = self.interiorRange; 53 | return CFRangeMake(inRange.location + buffer->rangeToBuffer.location, inRange.length); 54 | } 55 | else 56 | return CFRangeMake(kCFNotFound, 0); 57 | } 58 | 59 | -(NSRange)interiorRange{ 60 | return range; 61 | } 62 | 63 | -(NSString*)interiorString{ 64 | return [source substringWithRange: [self interiorRange]]; 65 | } 66 | 67 | -(NSString*)kind{ 68 | [self doesNotRecognizeSelector: _cmd]; 69 | return nil; 70 | } 71 | 72 | -(BOOL)isKind:(NSString*)aKind{ 73 | return aKind == [self kind]; 74 | } 75 | 76 | -(NSString*)description{ 77 | return [source substringWithRange: range]; 78 | } 79 | 80 | +(NSString*)humanName{ 81 | return @"generic"; 82 | } 83 | 84 | 85 | @end 86 | -------------------------------------------------------------------------------- /Classes/CommentChunk.h: -------------------------------------------------------------------------------- 1 | // 2 | // CommentChunk.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "Chunk.h" 27 | 28 | 29 | /** 30 | CommentChunk corresponds to a comment section. 31 | */ 32 | @interface CommentChunk : Chunk { 33 | 34 | } 35 | 36 | @end 37 | -------------------------------------------------------------------------------- /Classes/CommentChunk.m: -------------------------------------------------------------------------------- 1 | // 2 | // CommentChunk.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "CommentChunk.h" 26 | 27 | 28 | @implementation CommentChunk 29 | 30 | -(NSRange)interiorRange{ 31 | return NSMakeRange(range.location + 4, range.length - 7); 32 | } 33 | -(NSString*)kind{ 34 | return ChunkKindComment; 35 | } 36 | 37 | +(NSString*)humanName{ 38 | return @"comment"; 39 | } 40 | 41 | @end 42 | -------------------------------------------------------------------------------- /Classes/DoctypeChunk.h: -------------------------------------------------------------------------------- 1 | // 2 | // DoctypeChunk.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/25/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "Chunk.h" 27 | 28 | /** 29 | DoctypeChunk corresponds to a Doctype section. Note that the parser does not 30 | extract declarations within an internal subset, and it ignores references to system and external ids. 31 | */ 32 | @interface DoctypeChunk : Chunk { 33 | 34 | } 35 | 36 | @end 37 | -------------------------------------------------------------------------------- /Classes/DoctypeChunk.m: -------------------------------------------------------------------------------- 1 | // 2 | // DoctypeChunk.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/25/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "DoctypeChunk.h" 26 | 27 | 28 | @implementation DoctypeChunk 29 | 30 | -(NSString*)kind{ 31 | return ChunkKindDoctype; 32 | } 33 | 34 | -(NSRange)interiorRange{ 35 | return NSMakeRange(range.location + 9, range.length - 10); 36 | } 37 | 38 | +(NSString*)humanName{ 39 | return @"doctype"; 40 | } 41 | 42 | @end 43 | -------------------------------------------------------------------------------- /Classes/DocumentRoot.h: -------------------------------------------------------------------------------- 1 | // 2 | // DocumentRoot.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "Element.h" 27 | 28 | @interface DocumentRoot : Element { 29 | } 30 | @end 31 | -------------------------------------------------------------------------------- /Classes/DocumentRoot.m: -------------------------------------------------------------------------------- 1 | // 2 | // DocumentRoot.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "DocumentRoot.h" 26 | 27 | @implementation DocumentRoot 28 | 29 | 30 | -(id)initWithString: (NSString*)aSource range:(NSRange)aRange{ 31 | self = [super initWithString: aSource range:aRange tagName: @"DOCUMENT ROOT"]; 32 | self.contentsLength = [aSource length]; 33 | return self; 34 | } 35 | 36 | -(NSString*)kind{ 37 | return ChunkKindDocument; 38 | } 39 | 40 | -(BOOL)isEmptyTag{ 41 | return NO; 42 | } 43 | 44 | -(BOOL)isCloseTag{ 45 | return NO; 46 | } 47 | 48 | @end 49 | -------------------------------------------------------------------------------- /Classes/Element.h: -------------------------------------------------------------------------------- 1 | // 2 | // Element.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/18/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "TagChunk.h" 27 | 28 | @class CSSSelector; 29 | @class DocumentRoot; 30 | 31 | /** 32 | * An Element is the fundemental building block for ElementParser. 33 | */ 34 | @interface Element : TagChunk { 35 | NSMutableDictionary* attributes; 36 | BOOL attributesParsed; 37 | Element* nextElement; 38 | Element* nextSybling; 39 | Element* parent; 40 | int contentsLength; 41 | NSString* contentsText; 42 | NSString* key; 43 | BOOL containsMarkup; // includes entities 44 | 45 | NSObject* domainObject; 46 | } 47 | 48 | /** 49 | * Returns a dictionary of attributes name/values. 50 | * If an attribute had no value in the source (e.g. ) then the value will be NSNull 51 | * If the attributes have not yet been parsed, this will parser them first. 52 | */ 53 | @property (nonatomic, readonly) NSDictionary* attributes; 54 | 55 | 56 | /** 57 | * The character data inside the element. This text is stripped of tags, whitespace, etc 58 | * by stripTags. To see the actual source within the element, use contentsSource 59 | */ 60 | @property (nonatomic, retain) NSString* contentsText; 61 | 62 | 63 | /** 64 | * A case-normalized version of the tagName when appropriate. Used in situations 65 | * where the tag name might need to serve as a key into a dictionary 66 | */ 67 | @property (nonatomic, retain) NSString* key; 68 | 69 | /** 70 | * One or more chunks where encountered within this element 71 | * Used for more efficient return of contentsText 72 | */ 73 | @property BOOL containsMarkup; 74 | 75 | 76 | /** 77 | * The length of the text from the end of the start tag to the start of the end tag 78 | */ 79 | @property int contentsLength; 80 | 81 | 82 | /** 83 | * The next Element encountered in the document 84 | */ 85 | @property (nonatomic, retain) Element* nextElement; 86 | 87 | 88 | /** 89 | * The next sybling Element (ie the Element at the same depth with the same parent) 90 | */ 91 | @property (nonatomic, retain) Element* nextSybling; 92 | 93 | 94 | /** 95 | * The parent Element to this Element 96 | */ 97 | @property (nonatomic, assign) Element* parent; 98 | 99 | 100 | /** 101 | * Available for developer's use to hang an object onto this Element 102 | */ 103 | @property (nonatomic, retain) NSObject* domainObject; 104 | 105 | 106 | /** 107 | * Parses the supplied source and return an Element tree with Document element serving as the root 108 | * or all top level elements. As HTML, Elements shall be considered case insensative and tag 109 | * specific heuristics will be used to close tags intelligently. See ElementParser for details. 110 | */ 111 | +(DocumentRoot*)parseHTML:(NSString*)source; 112 | 113 | 114 | /** 115 | * Parses the supplied source and return an Element tree with Document element serving as the root 116 | * or all top level elements. XML, Elements shall be considered case sensative. See ElementParser for details. 117 | */ 118 | +(DocumentRoot*)parseXML:(NSString*)source; 119 | 120 | /** 121 | * Initializer used by ElementParser. See TagChunk for other intializers 122 | */ 123 | -(id)initWithTag:(TagChunk*)tag caseSensative:(BOOL)aCaseSensative; 124 | 125 | 126 | /** 127 | * Returns true if the element contains the specified attribute. 128 | * If the attributes have not yet been parsed, this will parser them first. 129 | */ 130 | -(BOOL)hasAttribute:(NSString*)attr; 131 | 132 | 133 | /** 134 | * Returns the value of a particular attribute (or nil if it doesn't exist) 135 | * Note: ElementParser does not support default attributes 136 | * If the attributes have not yet been parsed, this will parser them first. 137 | */ 138 | -(NSString*)attribute:(NSString*)attr; 139 | 140 | 141 | /** 142 | * Convenience method to compare an element's tag name. 143 | * Comparision will be cases sensative for XML elements and insensative for HTML elements. 144 | */ 145 | -(BOOL)isEqualToString:(NSString*)string; 146 | 147 | /** 148 | * Convenience methods for getting NSObjects from elements and their children 149 | */ 150 | - (NSString*)contentsTextOfChildElement:(NSString*)selector; 151 | 152 | - (NSNumber*)contentsNumber; 153 | 154 | - (NSNumber*)contentsNumberOfChildElement:(NSString*)selector; 155 | 156 | 157 | /** 158 | * An array of child Elements in document order 159 | */ 160 | -(NSArray*)childElements; 161 | 162 | /** 163 | * An array of child Elements in document order 164 | */ 165 | -(NSArray*)syblingElements; 166 | 167 | 168 | /** 169 | * The first child Element for this element (or nil if none). 170 | */ 171 | -(Element*)firstChild; 172 | 173 | 174 | /** 175 | * A dictionary containing the tagnames of children as keys 176 | * and the contentsText of the children as values. 177 | * If duplicate children tag names are encountered, only the last will 178 | * appear in the dictionary. 179 | */ 180 | -(NSDictionary*)contentsOfChildren; 181 | 182 | /** 183 | * Returns true if the supplied Element is a parent of receiver or one of its parents 184 | */ 185 | -(BOOL)hasAncestor:(Element*)ancestor; 186 | 187 | 188 | /** 189 | * Returns the nextElement but only if it has the scope Element as an ancestor 190 | */ 191 | -(Element*)nextElementWithinScope:(Element*)scope; 192 | 193 | 194 | /** 195 | * Returns true if the class attribute contains the class name (perhaps as one of multiple classes). 196 | */ 197 | -(BOOL)hasClassName:(NSString*)aClassName; 198 | 199 | 200 | /** 201 | * Returns true receiver can be a chlid of aParent. Used by ElementParser to prevent inappropriate 202 | * nesting in HTML (e.g.

) 203 | */ 204 | -(BOOL)acceptsParent:(Element*)aParent; 205 | 206 | /** 207 | * Debugging method 208 | */ 209 | -(NSString*)dumpTree; 210 | 211 | 212 | /** 213 | * The source between the end of the open tag and the beginning of the close tag 214 | */ 215 | -(NSString*)contentsSource; 216 | 217 | 218 | /** 219 | * Convenience method for using a selector to find elements within the receiver that match. 220 | * See CSSSelector for details. 221 | */ 222 | -(Element*)elementWithCSSSelector:(CSSSelector*)selector; 223 | 224 | /** 225 | * Convenience method for using a selector to find elements within the receiver that match 226 | * See CSSSelector for details. 227 | */ 228 | -(Element*)selectElement:(NSString*)cssSelectorString; 229 | 230 | 231 | /** 232 | * Convenience method for using a selector to find elements within the receiver that match 233 | * See CSSSelector for details. 234 | */ 235 | -(NSArray*)elementsWithCSSSelector:(CSSSelector*)selector; 236 | 237 | /** 238 | * Convenience method for using a selector to find elements within the receiver that match 239 | * See CSSSelector for details. 240 | */ 241 | -(NSArray*)selectElements:(NSString*)cssSelectorString; 242 | 243 | @end 244 | -------------------------------------------------------------------------------- /Classes/Element.m: -------------------------------------------------------------------------------- 1 | // 2 | // Element.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/18/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "Element.h" 26 | #import "NSString_HTML.h" 27 | #import "CSSSelectorMatcher.h" 28 | #import "ElementParser.h" 29 | 30 | @interface Element () 31 | -(void)setAttributes:(NSDictionary*)dict; 32 | @end 33 | 34 | @implementation Element 35 | 36 | @synthesize nextElement, nextSybling, parent, contentsLength, contentsText, key, containsMarkup, domainObject; 37 | 38 | 39 | +(DocumentRoot*)parseHTML:(NSString*)source{ 40 | ElementParser* parser = [[ElementParser alloc] init]; 41 | DocumentRoot* root = [parser parseHTML: source]; 42 | [[root retain] autorelease]; 43 | [parser release]; 44 | return root; 45 | } 46 | 47 | +(DocumentRoot*)parseXML:(NSString*)source{ 48 | ElementParser* parser = [[ElementParser alloc] init]; 49 | DocumentRoot* root = [parser parseXML: source]; 50 | [[root retain] autorelease]; 51 | [parser release]; 52 | return root; 53 | } 54 | 55 | -(id)initWithString:(NSString*)string{ 56 | return [self initWithString: string range: NSMakeRange(0, [string length])]; 57 | } 58 | 59 | -(id)initWithTag:(TagChunk*)tag caseSensative:(BOOL)aCaseSensative{ 60 | self = [self initWithString: tag.source range: tag.range tagName: tag.tagName]; 61 | [self setCaseSensative: aCaseSensative]; 62 | return self; 63 | } 64 | 65 | -(void)dealloc{ 66 | [attributes release]; 67 | [contentsText release]; 68 | [nextElement release]; 69 | [nextSybling release]; 70 | [key release]; 71 | [super dealloc]; 72 | } 73 | 74 | 75 | -(void)setRange: (NSRange)aRange{ 76 | attributesParsed = NO; 77 | [attributes removeAllObjects]; 78 | [super setRange: aRange]; 79 | } 80 | 81 | //cleans up nested p tags 82 | -(BOOL)acceptsParent:(Element*)aParent{ 83 | if ([self tagNameEquals: @"p"] && [aParent tagNameEquals: @"p"]) 84 | return NO; 85 | return YES; 86 | } 87 | 88 | 89 | -(BOOL)closesTag:(TagChunk*)aTag{ 90 | if (self == aTag || [self isEmptyTag]) //former case is true when shouldBeEmptyTag 91 | return self == aTag; 92 | else 93 | return [super closesTag: aTag]; 94 | } 95 | 96 | -(BOOL)hasAttribute:(NSString*)attr{ 97 | return [[[self attributes] allKeys] containsObject: attr]; 98 | } 99 | 100 | -(NSString*)attribute:(NSString*)attr{ 101 | return [[self attributes] objectForKey: attr]; 102 | } 103 | 104 | // warning, may contain empty classnames 105 | -(NSArray*)classNames{ 106 | NSString* classNames = [self attribute: @"class"]; 107 | if (!classNames) return [NSArray array]; 108 | return [classNames componentsSeparatedByCharactersInSet: [NSCharacterSet whitespaceCharacterSet]]; 109 | } 110 | 111 | -(BOOL)hasClassName:(NSString*)aClassName{ 112 | if (![self attribute: @"class"]) return NO; 113 | for (NSString* className in [self classNames]) 114 | if ([className isEqualToString: aClassName]) 115 | return YES; 116 | return NO; 117 | } 118 | 119 | -(NSDictionary*)attributes{ 120 | if (!attributesParsed){ 121 | [self setAttributes: [source parseElementAttributesWithRange: range caseSensative: [self caseSensative]]]; 122 | attributesParsed = YES; 123 | } 124 | return attributes; 125 | } 126 | 127 | -(void)setAttributes:(NSDictionary*)dict{ 128 | [attributes release]; 129 | attributes = [dict retain]; 130 | } 131 | 132 | -(Element*)firstChild{ 133 | if ([nextElement parent] == self) 134 | return nextElement; 135 | else 136 | return nil; 137 | } 138 | 139 | -(BOOL)hasAncestor:(Element*)ancestor{ 140 | for (Element* p = parent; p; p = p.parent){ 141 | if (p == ancestor) 142 | return YES; 143 | } 144 | return NO; 145 | } 146 | 147 | -(Element*)nextElementWithinScope:(Element*)scope{ 148 | if ((nextElement.parent == self) || nextSybling) 149 | return nextElement; 150 | else 151 | return ([nextElement hasAncestor: scope]) ? nextElement : nil; 152 | } 153 | 154 | -(NSString*)contentsText{ 155 | if (!contentsText){ 156 | // NSRange contentsRange = NSMakeRange(NSMaxRange(range), contentsLength); 157 | self.contentsText = (containsMarkup) ? [[self contentsSource] stripTags] : [self contentsSource];//[source stringByReplacingEntitiesInRange: contentsRange]; 158 | } 159 | return contentsText; 160 | } 161 | 162 | - (NSString*)contentsTextOfChildElement:(NSString*)selector { 163 | return [[self selectElement:selector] contentsText]; 164 | } 165 | 166 | - (NSNumber*)contentsNumber { 167 | return [NSNumber numberWithInt:[[self contentsText] intValue]]; 168 | } 169 | 170 | - (NSNumber*)contentsNumberOfChildElement:(NSString*)selector { 171 | return [[self selectElement:selector] contentsNumber]; 172 | } 173 | 174 | -(NSString*)contentsSource{ 175 | NSRange contentsRange = NSMakeRange(NSMaxRange(range), contentsLength); 176 | NSString* result = [source substringWithRange: contentsRange]; 177 | return result; 178 | } 179 | 180 | -(NSArray*)selectElements:(NSString*)cssSelectorString{ 181 | if (!cssSelectorString) return [NSArray array]; 182 | CSSSelector* selector = [[CSSSelector alloc] initWithString: cssSelectorString]; 183 | NSArray* result = [self elementsWithCSSSelector: selector]; 184 | [selector release]; 185 | return result; 186 | } 187 | 188 | -(Element*)selectElement:(NSString*)cssSelectorString{ 189 | if (!cssSelectorString) return nil; 190 | CSSSelector* selector = [[CSSSelector alloc] initWithString: cssSelectorString]; 191 | Element* result = [self elementWithCSSSelector: selector]; 192 | [selector release]; 193 | return result; 194 | } 195 | 196 | -(NSArray*)elementsWithCSSSelector:(CSSSelector*)selector{ 197 | CSSSelectorMatcher* matcher = [[CSSSelectorMatcher alloc] initWithSelector: selector]; 198 | Element* e = self; 199 | while (e){ 200 | [matcher matchElement: e]; 201 | // e = e.nextElement; 202 | e = [e nextElementWithinScope: self]; 203 | } 204 | NSArray* result = [[[matcher matches] retain] autorelease]; 205 | [matcher release]; 206 | return result; 207 | } 208 | 209 | -(Element*)elementWithCSSSelector:(CSSSelector*)selector{ 210 | CSSSelectorMatcher* matcher = [[CSSSelectorMatcher alloc] initWithSelector: selector]; 211 | Element* e = self; 212 | BOOL success = NO; 213 | while (e && !success){ 214 | success = [matcher matchElement: e]; 215 | e = [e nextElementWithinScope: self]; 216 | } 217 | Element* result = [matcher firstMatch]; 218 | [matcher release]; 219 | return result; 220 | } 221 | 222 | -(NSArray*)childElements{ 223 | NSMutableArray* kids = [NSMutableArray array]; 224 | Element* e = [self firstChild]; 225 | while (e){ 226 | [kids addObject: e]; 227 | e = e.nextSybling; 228 | } 229 | return kids; 230 | } 231 | 232 | -(NSArray*)syblingElements{ 233 | NSMutableArray* syblings = [NSMutableArray array]; 234 | Element* e = self; 235 | while (e){ 236 | [syblings addObject: e]; 237 | e = e.nextSybling; 238 | } 239 | return syblings; 240 | } 241 | 242 | -(NSDictionary*)contentsOfChildren{ 243 | NSMutableDictionary* result = [NSMutableDictionary dictionary]; 244 | Element* e = [self firstChild]; 245 | while (e){ 246 | [result setObject: [e contentsText] forKey: [e key]]; 247 | e = e.nextSybling; 248 | } 249 | return result; 250 | } 251 | 252 | -(BOOL)isEqualToString:(NSString*)string{ 253 | return [[self description] isEqualToString: string]; 254 | } 255 | 256 | -(NSString*)key{ 257 | if (!key) 258 | self.key = ([self caseSensative]) 259 | ? [self tagName] 260 | : [[self tagName] lowercaseString]; 261 | return key; 262 | } 263 | 264 | -(NSString*)description{ 265 | NSMutableString* result = [NSMutableString string]; 266 | if (!source) return result;//root element has no source 267 | [result appendString: @"<"]; 268 | [result appendString: [self tagName]]; 269 | for (NSString* att in [[self attributes] allKeys]){ 270 | [result appendFormat: @" %@='%@'", att, [attributes objectForKey: att]]; 271 | } 272 | if ([self isEmptyTag]) 273 | [result appendString: @" />"]; 274 | else 275 | [result appendString: @">"]; 276 | return result; 277 | } 278 | 279 | -(NSString*)dumpTree{ 280 | NSMutableString* result = [NSMutableString string]; 281 | Element* e = self; 282 | while (e){ 283 | for (Element* ee = e; ee; ee = [ee parent]) 284 | [result appendString: @" "]; 285 | [result appendString: [e description]]; 286 | NSString* txt = (e.containsMarkup) ? @"..." : e.contentsText; 287 | [result appendFormat: @"%@\n", txt]; 288 | e = e.nextElement; 289 | } 290 | return result; 291 | } 292 | 293 | @end 294 | -------------------------------------------------------------------------------- /Classes/ElementParser.h: -------------------------------------------------------------------------------- 1 | // 2 | // ElementParser.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/20/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "Element.h" 27 | #import "DocumentRoot.h" 28 | 29 | typedef enum{ 30 | ElementParserModeHTML, 31 | ElementParserModeXML 32 | } ElementParserMode; 33 | 34 | #define ElementParserErrorDomain 1022 35 | typedef enum{ 36 | ElementParserTagNotClosedError = -1, 37 | ElementParserGeneralError = -2 38 | }ElementParserErrors; 39 | 40 | @interface ElementParser : NSObject { 41 | NSMutableArray* tagStack; 42 | DocumentRoot* root; 43 | Element* lastOpened; //assigned 44 | Element* lastClosedBeforeOpen; 45 | Chunk* lastChunk; 46 | 47 | CFMutableArrayRef callbackMethods; 48 | NSMutableArray* callbackMatchers; 49 | id delegate; 50 | ElementParserMode mode; 51 | } 52 | 53 | /** 54 | * The delegate that is called when selectors match 55 | */ 56 | @property (nonatomic, assign) id delegate; 57 | 58 | /** 59 | * HTML or XML 60 | */ 61 | @property ElementParserMode mode; 62 | 63 | 64 | /** 65 | * The source being parsed. 66 | */ 67 | @property (readonly) NSString* source; 68 | 69 | 70 | /** 71 | * Parse an HMTL document and return a tree of Elements corresponding to the document. 72 | * The DocumentRoot is a special Element that contains all the top-level Elements in the 73 | * source. 74 | */ 75 | -(DocumentRoot*)parseHTML:(NSString*)source; 76 | 77 | 78 | /** 79 | * Parse an XML document and return a tree of Elements corresponding to the document. 80 | * The DocumentRoot is a special Element that contains all the top-level Elements in the 81 | * source. 82 | */ 83 | -(DocumentRoot*)parseXML:(NSString*)source; 84 | 85 | /** 86 | * When parsing a document incrementally, begin with a single call to beginParsing, 87 | * followed by multiple calls to continueParsing as text arrives and finaly a single 88 | * call to finishParsing 89 | */ 90 | -(DocumentRoot*)beginParsing; 91 | -(void)continueParsingString:(NSString*)string; 92 | -(void)finishParsing; 93 | 94 | /** 95 | * Registers a callback to be performed whenever the supplied selector matches 96 | */ 97 | -(void)performSelector:(SEL)method forElementsMatching:(NSString*)cssSelector; 98 | 99 | /** 100 | * returns true for html elements like 101 | */ 102 | -(BOOL)shouldBeEmptyElement:(Element*)element; 103 | 104 | /** 105 | * internal callback when a warning condition occurs. May be overidden to surface an 106 | * NSError 107 | */ 108 | -(void)warning:(int)code description:(NSString*)description chunk: (Chunk*)chunk; 109 | 110 | /** 111 | * internal callback when an info condition occurs. May be overidden for debugging purposes 112 | */ 113 | -(void)info:(NSString*)info atIndex:(int)sourceIndex; 114 | 115 | @end 116 | -------------------------------------------------------------------------------- /Classes/ElementParser.m: -------------------------------------------------------------------------------- 1 | // 2 | // ElementParser.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/20/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "ElementParser.h" 26 | #import "NSString_HTML.h" 27 | #import "Chunk.h" 28 | #import "TagChunk.h" 29 | #import "CSSSelectorMatcher.h" 30 | 31 | static NSSet* HTML_TAGS_THAT_SHOULD_BE_EMPTY; 32 | 33 | 34 | @interface ElementParser() 35 | 36 | @property (nonatomic, assign) Element* lastOpened; 37 | @property (nonatomic, assign) Element* lastClosedBeforeOpen; 38 | @property (nonatomic, retain) DocumentRoot* root; 39 | @property (nonatomic, retain) Chunk* lastChunk; 40 | 41 | -(void)closeAllTags; 42 | -(void)prepareParseWithString:(NSString*)string; 43 | -(void)parseMoreWithPartial:(BOOL)partial; 44 | 45 | @end 46 | 47 | 48 | @implementation ElementParser 49 | 50 | @synthesize root, lastOpened, lastClosedBeforeOpen, lastChunk, delegate, mode; 51 | 52 | +(void)initialize{ 53 | HTML_TAGS_THAT_SHOULD_BE_EMPTY = [[NSSet alloc] initWithObjects: @"img", @"meta", @"br", @"hr", @"area", @"base", @"basefont", @"col", @"frame", @"input", @"isindex", @"link", @"param", nil]; 54 | } 55 | 56 | -(id)init{ 57 | self = [super init]; 58 | tagStack = [[NSMutableArray alloc] initWithCapacity: 24]; 59 | mode = ElementParserModeHTML; 60 | return self; 61 | } 62 | 63 | -(void)dealloc{ 64 | [tagStack release]; 65 | [root release]; 66 | [lastChunk release]; 67 | if (callbackMethods){ 68 | CFRelease(callbackMethods); 69 | [callbackMatchers release]; 70 | } 71 | [super dealloc]; 72 | } 73 | 74 | -(DocumentRoot*)parseHTML:(NSString*)source{ 75 | if (!source) return nil; 76 | self.mode = ElementParserModeHTML; 77 | [self prepareParseWithString: source]; 78 | [self parseMoreWithPartial: NO]; 79 | [self closeAllTags]; 80 | return root; 81 | } 82 | 83 | -(DocumentRoot*)parseXML:(NSString*)source{ 84 | if (!source) return nil; 85 | self.mode = ElementParserModeXML; 86 | [self prepareParseWithString: source]; 87 | [self parseMoreWithPartial: NO]; 88 | [self closeAllTags]; 89 | return root; 90 | } 91 | 92 | 93 | -(DocumentRoot*)beginParsing{ 94 | NSMutableString* source = [NSMutableString string]; 95 | [self prepareParseWithString: source]; 96 | return root; 97 | } 98 | 99 | -(void)continueParsingString:(NSString*)moreString{ 100 | [(NSMutableString*)self.source appendString:moreString]; 101 | [self parseMoreWithPartial: YES]; 102 | } 103 | 104 | -(void)finishParsing{ 105 | [self parseMoreWithPartial: NO]; 106 | [self closeAllTags]; 107 | } 108 | 109 | -(NSString*)source{ 110 | return root.source; 111 | } 112 | 113 | -(void)prepareParseWithString:(NSString*)string{ 114 | root = [[DocumentRoot alloc] initWithString: string range: NSMakeRange(0,0)]; 115 | lastOpened = root; 116 | [tagStack removeAllObjects]; 117 | [tagStack addObject: root]; 118 | } 119 | 120 | -(void)parseMoreWithPartial:(BOOL)partial{ 121 | int index = lastChunk ? NSMaxRange(lastChunk.range) : 0; 122 | NSString* source = [root source]; 123 | root.contentsLength = [source length]; 124 | [NSString parseHTML: source delegate: self selector: @selector(buildElementTreeWithChunk:context:) context: self index: &index partial: partial]; 125 | } 126 | 127 | 128 | -(Element*)parentElement{ 129 | return [tagStack objectAtIndex: [tagStack count] - 1]; 130 | } 131 | 132 | -(void)matchElement:(Element*)element{ 133 | for (int i = 0; i < [callbackMatchers count]; i++){ 134 | CSSSelectorMatcher* matcher = [callbackMatchers objectAtIndex: i]; 135 | BOOL matchComplete = [matcher matchElement: element]; 136 | if (matchComplete){ 137 | SEL selector = (SEL)CFArrayGetValueAtIndex(callbackMethods, i); 138 | NSObject* domainObject = [delegate performSelector: selector withObject: element]; 139 | if (domainObject) 140 | element.domainObject = domainObject; 141 | } 142 | } 143 | } 144 | 145 | // nil is a valid value... closed first open tag 146 | -(void)closeElementWithTag:(TagChunk*) tag{ 147 | int depthIndex; 148 | for (depthIndex = [tagStack count] - 1; depthIndex > 0; depthIndex--){ 149 | // crawl up stack to find matching element 150 | Element* stackElement = [tagStack objectAtIndex: depthIndex]; 151 | if (!tag || [tag closesTag: stackElement]) 152 | break; 153 | } 154 | if (depthIndex > 0){ 155 | Element* closedElement; 156 | // close everything up to found element 157 | while ([tagStack count] > depthIndex){//int ii=[tagStack count] - 1; ii >= depth; ii-- 158 | closedElement = [tagStack lastObject]; 159 | closedElement.contentsLength = 160 | (tag == nil) ? lastChunk.range.location - NSMaxRange(closedElement.range) : 161 | (tag == closedElement) ? 0 : 162 | tag.range.location - NSMaxRange(closedElement.range); 163 | if(!tag && closedElement.contentsLength == 0) 164 | [self warning: ElementParserGeneralError description:@"Contents may not be right" chunk: closedElement]; 165 | // NSLog(@"Close %@", [closedElement description]); 166 | self.lastClosedBeforeOpen = closedElement; 167 | [tagStack removeObjectsInRange: NSMakeRange([tagStack count] - 1, 1)]; 168 | if (delegate && callbackMatchers) 169 | [self matchElement: closedElement]; 170 | } 171 | // self.lastClosedBeforeOpen = closedElement; 172 | // [tagStack removeObjectsInRange: NSMakeRange(i, [tagStack count] - i)]; 173 | } 174 | else{ 175 | // orphan close tag - ignore 176 | } 177 | } 178 | 179 | -(void)openElement:(Element*) element{ 180 | // NSLog(@"Open %@", [element description]); 181 | element.parent = [self parentElement]; 182 | lastOpened.nextElement = element; 183 | self.lastClosedBeforeOpen.nextSybling = element; 184 | [tagStack addObject: element]; 185 | self.lastOpened = element; 186 | self.lastClosedBeforeOpen = nil; 187 | } 188 | 189 | -(void)closeAllTags{ 190 | for (int i = [tagStack count] - 1; i >= 0; i--){ 191 | Element* stackElement = [tagStack objectAtIndex: i]; 192 | if (i > 0) 193 | [self warning: ElementParserTagNotClosedError description:@"document left tag open" chunk: stackElement]; 194 | [self closeElementWithTag: nil]; 195 | } 196 | } 197 | 198 | -(void)info:(NSString*)info atIndex:(int)sourceIndex{ 199 | NSLog(@"INFO [index: %i]: %@", sourceIndex, info); 200 | } 201 | 202 | -(void)warning:(int)code description:(NSString*)description chunk: (Chunk*)chunk{ 203 | NSLog(@"WARN [index: %i]: %@\n%@", chunk.range.location, description, [chunk description]); 204 | /* subclasses should do this work if they want to do something with the warnings 205 | NSMutableDictionary* info = [NSMutableDictionary dictionaryWithCapacity: 2]; 206 | if (description) 207 | [info addObject: description forKey: NSLocalizedDescriptionKey]; 208 | if (chunk) 209 | [info addObject: chunk forKey: ElementParserErrorChunk]; 210 | NSError* error = [NSError errorWithDomain: ElementParserErrorDomain code: code userInfo: info]; 211 | */ 212 | } 213 | 214 | -(BOOL)shouldBeEmptyElement:(Element*)element{ 215 | if (mode == ElementParserModeXML) return NO; 216 | BOOL result = [HTML_TAGS_THAT_SHOULD_BE_EMPTY containsObject: [element key]]; 217 | return result; 218 | } 219 | 220 | -(id)buildElementTreeWithChunk:(Chunk*)chunk context:(void*)builder{ 221 | /* 222 | used to hunt down problem strings in example documents 223 | BOOL breakpoint = [[chunk description] rangeOfString: @""].location != NSNotFound; 224 | if (breakpoint) 225 | NSLog(@"found breakpoint"); 226 | */ 227 | self.lastChunk = chunk; 228 | TagChunk* tag = [chunk isKind: ChunkKindTag] ? (TagChunk*) chunk : nil; 229 | 230 | if (![chunk isKind: ChunkKindText] && ![tag isCloseTag]) 231 | [self parentElement].containsMarkup = YES; 232 | 233 | if (!tag) 234 | return self; 235 | else if ([tag isCloseTag]) 236 | [self closeElementWithTag: tag]; 237 | else { 238 | Element* element = [[Element alloc] initWithTag: tag caseSensative: mode == ElementParserModeXML]; 239 | if ([element isEmptyTag] || [self shouldBeEmptyElement: element]){ 240 | [self openElement: element]; 241 | [self closeElementWithTag: element]; 242 | } 243 | else { 244 | if (![element acceptsParent: [self parentElement]]) 245 | [self closeElementWithTag: [self parentElement]]; 246 | [self openElement: element]; 247 | } 248 | [element release]; 249 | } 250 | return self;//to continue parsing 251 | } 252 | 253 | -(void)performSelector:(SEL)method forElementsMatching:(NSString*)cssSelector{ 254 | if (!callbackMethods){ 255 | callbackMethods = CFArrayCreateMutable(NULL, 0, NULL); 256 | callbackMatchers = [[NSMutableArray alloc] initWithCapacity: 10]; 257 | } 258 | CFArrayAppendValue(callbackMethods, method); 259 | CSSSelector* css = [[CSSSelector alloc] initWithString: cssSelector]; 260 | CSSSelectorMatcher* matcher = [[CSSSelectorMatcher alloc] initWithSelector: css]; 261 | [callbackMatchers addObject: matcher]; 262 | [css release]; 263 | [matcher release]; 264 | } 265 | 266 | -(NSString*)description{ 267 | NSMutableString* result = [NSMutableString string]; 268 | Element* e = root.nextElement; 269 | while (e){ 270 | [result appendString: [e description]]; 271 | e = e.nextElement; 272 | } 273 | return result; 274 | } 275 | 276 | @end 277 | -------------------------------------------------------------------------------- /Classes/EntityChunk.h: -------------------------------------------------------------------------------- 1 | // 2 | // EntityChunk.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "Chunk.h" 27 | 28 | 29 | /** 30 | EntityChunk corresponds to a Entity section (e.g. &) 31 | */ 32 | 33 | @interface EntityChunk : Chunk { 34 | 35 | } 36 | 37 | @end 38 | -------------------------------------------------------------------------------- /Classes/EntityChunk.m: -------------------------------------------------------------------------------- 1 | // 2 | // EntityChunk.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "EntityChunk.h" 26 | 27 | 28 | @implementation EntityChunk 29 | 30 | -(NSRange)interiorRange{ 31 | return NSMakeRange(range.location + 1, range.length - 2); 32 | } 33 | 34 | -(NSString*)kind{ 35 | return ChunkKindEntity; 36 | } 37 | 38 | +(NSString*)humanName{ 39 | return @"entity"; 40 | } 41 | 42 | @end 43 | -------------------------------------------------------------------------------- /Classes/NSString_HTML.h: -------------------------------------------------------------------------------- 1 | // 2 | // NSString_HTML.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 3/27/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "CSSSelector.h" 27 | @class Element; 28 | 29 | /** 30 | * spins through string buffer until character a or b encountered (or end of buffer) 31 | */ 32 | CFIndex lenThruOr(CFStringInlineBuffer* buffer, CFIndex index, const char a, const char b); 33 | 34 | /** 35 | * spins though an attribute/value pair inside an element 36 | */ 37 | 38 | CFIndex lenAttributeAndValue(CFStringInlineBuffer* buffer, CFIndex index, NSString** attrName, NSString**attrValue); 39 | 40 | /** 41 | * spins through string buffer until a white character is encountered. 42 | * Assumes <32 denotes whitespace. Returns 0 if end of buffer encountered. 43 | */ 44 | unichar skipNonWhitespace(CFStringInlineBuffer* buffer, CFIndex* index); 45 | 46 | 47 | /** 48 | * spins through string buffer until a non white character is encountered. 49 | * Assumes <32 denotes whitespace. Returns 0 if end of buffer encountered. 50 | */ 51 | unichar skipWhitespace(CFStringInlineBuffer* buffer, CFIndex* index); 52 | 53 | 54 | /** 55 | * spins through string buffer until a non token character is encountered. 56 | * Returns length of the token. Used for attributes, class names, identifiers and tag names. 57 | * Does not accommodate non latin characters. 58 | * Accepts '-', '_', ':' even when in first character position 59 | * Also permits '/' to begin the token (simplifies parsing close tags). 60 | */ 61 | CFIndex lenToken(CFStringInlineBuffer* buffer, CFIndex index); 62 | 63 | 64 | /** 65 | * Returns true if the characters in th buffer at index begin with the supplied string 66 | */ 67 | CFIndex startsWithStr(CFStringInlineBuffer* buffer, CFIndex index, const char* prefix); 68 | 69 | /** 70 | * Parses an entity name and returns its length. Returns 0 if end of buffer 71 | * is encountered or NSNotFound of an invalid entity is encountered. 72 | */ 73 | CFIndex lenEntityName(CFStringInlineBuffer* buffer, CFIndex index); 74 | 75 | 76 | /** 77 | * Spins through buffer until the supplied suffix is encountered. Returns 78 | * 0 if end of buffer is encountered before the suffix. 79 | */ 80 | CFIndex lenThru(CFStringInlineBuffer* buffer, CFIndex index, const char* suffix); 81 | 82 | /** 83 | * Spins through buffer until the supplied suffix is encountered. Does not 84 | * match on characters with single or double quotes. 85 | * Returns 0 if end of buffer is encountered before the suffix. 86 | */ 87 | CFIndex lenThruRespectingQuotes(CFStringInlineBuffer* buffer, CFIndex index, const char* suffix); 88 | 89 | /** 90 | * Returns the character corresponding to the entity at the supplied index in the buffer 91 | */ 92 | unichar parseEntity(CFStringInlineBuffer* buffer, CFIndex index, CFIndex* len); 93 | 94 | 95 | /** 96 | * Parses the doctype at the suppied index in the buffer and returns its length. 97 | * Return 0 if end of buffer encountered first 98 | */ 99 | CFIndex lenDoctype(CFStringInlineBuffer* buffer, CFIndex index); 100 | 101 | 102 | /** 103 | * Convenience method that creates an string from a range in the buffer 104 | */ 105 | NSString* createStringFromBuffer(CFStringInlineBuffer* buffer, CFIndex index, CFIndex length); 106 | 107 | @interface NSString (HTML) 108 | 109 | /** 110 | * converts the string assuming it is a hex number 111 | */ 112 | -(int)hexValue; 113 | 114 | 115 | /** 116 | * Returns a string in which 117 | * a) all the tags have been removed 118 | * b) entities are resolved 119 | * c) cdata sections are processed 120 | * d) whitespace is compressed 121 | * e) html markup like
and

are used to provide minimal formatting 122 | */ 123 | -(NSString*)stripTags; 124 | 125 | 126 | /** 127 | * Convenience method to url encode a string 128 | */ 129 | -(NSString*)stringByAddingPercentEscaping; 130 | 131 | 132 | /** 133 | * Convenience method to url decode a string 134 | */ 135 | -(NSString*)stringByRemovingPercentEscaping; 136 | 137 | 138 | /** 139 | * Resolves entities in string 140 | */ 141 | -(NSString*)stringByReplacingEntities; 142 | 143 | 144 | /** 145 | * Convenienece method that replaces entities for a range 146 | */ 147 | -(NSString*)stringByReplacingEntitiesInRange:(NSRange)range; 148 | 149 | 150 | /** 151 | * Convenienece method to create an element 152 | */ 153 | -(Element*)element; 154 | 155 | 156 | /** 157 | * Parses an element returning its attributes. 158 | */ 159 | -(NSDictionary*)parseElementAttributesWithRange:(NSRange) range caseSensative:(BOOL)caseSensative; 160 | 161 | 162 | /** 163 | * Very simpleminded parsing out of character encoding based on an http header contentType 164 | */ 165 | + (NSStringEncoding) encodingForContentType:(NSString *)contentType; 166 | 167 | 168 | /** 169 | * The base parser that spins through a string and calls a delegate for each chunk encountered. 170 | * Chucks include: tags, entities, comments, cdata, characters and others. 171 | * ElementParser uses this low level parser to build an Element tree. 172 | */ 173 | +(void)parseHTML:(NSString*) source delegate:(id)delegate selector:(SEL)selector context: (void*) context; 174 | 175 | /** 176 | * The base parser that spins through a string and calls a delegate for each chunk encountered. 177 | * This version of the method permits partial parsing... ie the parser will stop if 178 | * it encounters a chunk that extends beyond the end of the string. It can be called 179 | * repeatedly as more text arrives and is appended to the string. 180 | */ 181 | +(void)parseHTML:(NSString*)source delegate:(id)delegate selector:(SEL)selector context: (void*) context index:(int*)sourceIndex partial:(BOOL)partial; 182 | 183 | @end 184 | -------------------------------------------------------------------------------- /Classes/NSString_HTML.m: -------------------------------------------------------------------------------- 1 | // 2 | // NSString_HTML.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 3/27/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "NSString_HTML.h" 26 | #import "Element.h" 27 | #import "CSSSelectorMatcher.h" 28 | #import "ElementParser.h" 29 | #import "TagChunk.h" 30 | #import "CommentChunk.h" 31 | #import "EntityChunk.h" 32 | #import "ProcessingInstructionChunk.h" 33 | #import "CDataChunk.h" 34 | #import "DoctypeChunk.h" 35 | #import "TxtChunk.h" 36 | 37 | #define OUT_BUFFER_LENGTH 20000 38 | #define MAX_READ_BUFFER_LENGTH 60000 39 | static const NSDictionary* ENTITIES_MAP; 40 | 41 | 42 | 43 | CFIndex lenThruOr(CFStringInlineBuffer* buffer, CFIndex index, const char a, const char b){ 44 | CFIndex startIndex = index; 45 | unichar c; 46 | while ((c = CFStringGetCharacterFromInlineBuffer(buffer, index)) && (c!=a) && (c != b)) 47 | index++; 48 | return index - startIndex; 49 | } 50 | 51 | 52 | unichar skipNonWhitespace(CFStringInlineBuffer* buffer, CFIndex* index){ 53 | unichar c; 54 | for (;(c = CFStringGetCharacterFromInlineBuffer(buffer, *index)); (*index)++){ 55 | if (c <= 32) 56 | return c; 57 | } 58 | return 0; 59 | } 60 | 61 | 62 | unichar skipWhitespace(CFStringInlineBuffer* buffer, CFIndex* index){ 63 | unichar c; 64 | for (;(c = CFStringGetCharacterFromInlineBuffer(buffer, *index)); (*index)++){ 65 | if (c > 32) 66 | return c; 67 | } 68 | return 0; 69 | } 70 | 71 | 72 | // allowed to start with / or close elements 73 | CFIndex lenToken(CFStringInlineBuffer* buffer, CFIndex index){ 74 | CFIndex maxIndex = buffer->rangeToBuffer.location + buffer->rangeToBuffer.length; 75 | CFIndex i; 76 | for (i = index; i < maxIndex; i++){ 77 | unichar c = CFStringGetCharacterFromInlineBuffer(buffer, i); 78 | BOOL valid = ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')) || ((c >= '0') && (c <= '9')) || (c=='-') || (c=='_') || (c == ':'); 79 | if ((valid == NO) && (i == index) && ((c == '/'))) 80 | valid = YES; 81 | if (valid == NO) 82 | break; 83 | } 84 | return i - index; 85 | } 86 | 87 | 88 | CFIndex startsWithStr(CFStringInlineBuffer* buffer, CFIndex index, const char* prefix){ 89 | CFIndex startIndex = index; 90 | while (*prefix){ 91 | unichar c = CFStringGetCharacterFromInlineBuffer(buffer, index); 92 | if (c != *prefix) 93 | return 0; 94 | else 95 | prefix++; 96 | index++; 97 | } 98 | return startIndex - index; 99 | } 100 | 101 | 102 | CFIndex lenEntityName(CFStringInlineBuffer* buffer, CFIndex index){ 103 | CFIndex len = 1; 104 | index++; // first char is assumed to be a '&' 105 | unichar c; 106 | while (c = CFStringGetCharacterFromInlineBuffer(buffer, index++)){ 107 | if (c==';') 108 | return len + 1; 109 | if (((c < 'a') || (c > 'z')) && ((c < 'A') || (c > 'Z')) && ((c < '0') || (c > '9')) && (c != '#')) 110 | return NSNotFound; 111 | len++; 112 | } 113 | return 0; 114 | } 115 | 116 | CFIndex lenThruRespectingQuotes(CFStringInlineBuffer* buffer, CFIndex index, const char* suffix){ 117 | CFIndex startIndex = index; 118 | int numCharsMatched = 0; 119 | const char* suffixStart = suffix; 120 | char openQuote = 0; 121 | while (*suffix){ 122 | unichar c = CFStringGetCharacterFromInlineBuffer(buffer, index); 123 | if (c==0) 124 | return 0; 125 | else if (c == openQuote) 126 | openQuote = 0; 127 | else if ((c == *suffix) && (openQuote == 0)){ 128 | suffix++; 129 | numCharsMatched++; 130 | } 131 | else { 132 | // reset the suffix ptr 133 | if (numCharsMatched){ 134 | index -= numCharsMatched; 135 | suffix = suffixStart; 136 | numCharsMatched = 0; 137 | } 138 | if ((openQuote == 0) && ((c == '"') || (c == '\''))) 139 | openQuote = c; 140 | } 141 | index++; 142 | } 143 | return index - startIndex; 144 | } 145 | 146 | CFIndex lenThru(CFStringInlineBuffer* buffer, CFIndex index, const char* suffix){ 147 | CFIndex startIndex = index; 148 | int numCharsMatched = 0; 149 | const char* suffixStart = suffix; 150 | while (*suffix){ 151 | unichar c = CFStringGetCharacterFromInlineBuffer(buffer, index); 152 | if (c==0) 153 | return 0; 154 | else if (c == *suffix){ 155 | suffix++; 156 | numCharsMatched++; 157 | } 158 | else if (suffix != suffixStart){ 159 | // reset the suffix ptr 160 | index -= numCharsMatched; 161 | suffix = suffixStart; 162 | numCharsMatched = 0; 163 | } 164 | index++; 165 | } 166 | return index - startIndex; 167 | } 168 | 169 | unichar parseEntity(CFStringInlineBuffer* buffer, CFIndex index, CFIndex* len){ 170 | // assert(CFStringGetCharacterFromInlineBuffer(&buffer, index) == '&'); 171 | if (startsWithStr(buffer, index+1, "gt;")){ 172 | (*len) = 4; 173 | return '>'; 174 | } 175 | else if (startsWithStr(buffer, index+1, "lt;")){ 176 | (*len) = 4; 177 | return '<'; 178 | } 179 | else if (startsWithStr(buffer, index+1, "amp;")){ 180 | (*len) = 5; 181 | return '&'; 182 | } 183 | else{ 184 | (*len) = lenThru(buffer, index + 1, ";") + 1; 185 | if (((*len) < 2) || ((*len) > 12)) return 0; 186 | unichar c = CFStringGetCharacterFromInlineBuffer(buffer, index + 1); 187 | if (c == '#'){ 188 | unichar c = CFStringGetCharacterFromInlineBuffer(buffer, index + 2); 189 | if (c == 'x'){ 190 | // hex entity 191 | NSString* hexString = [(NSString*)buffer->theString substringWithRange: NSMakeRange(index + 3, (*len) - 4)]; 192 | return [hexString hexValue]; 193 | } 194 | else{ 195 | // decimal entity 196 | NSString* decString = [(NSString*)buffer->theString substringWithRange: NSMakeRange(index + 2, (*len) - 3)]; 197 | return CFStringGetIntValue((CFStringRef)decString); 198 | } 199 | } 200 | else{ 201 | //named enityt 202 | if (ENTITIES_MAP == nil) 203 | ENTITIES_MAP = [[NSDictionary alloc] initWithContentsOfFile: [[NSBundle mainBundle] pathForResource: @"HTML Entities" ofType: @"plist"]]; 204 | if (!ENTITIES_MAP) return 0; 205 | NSString* key = [(NSString*)buffer->theString substringWithRange: NSMakeRange(index + 1, (*len) - 2)]; 206 | NSString* result = [ENTITIES_MAP objectForKey: key]; 207 | return (result) ? [result characterAtIndex: 0] : 0; 208 | } 209 | } 210 | return 0; 211 | } 212 | 213 | 214 | /* 215 | assumes starting at the '<' of '' 218 | 2. will get confused if public identifier or system id have a '>' or a '[' in them 219 | */ 220 | CFIndex lenDoctype(CFStringInlineBuffer* buffer, CFIndex index){ 221 | CFIndex startIndex = index; 222 | index += 9; 223 | CFIndex len = lenThruOr(buffer, index, '>', '['); 224 | unichar c = CFStringGetCharacterFromInlineBuffer(buffer, index + len); 225 | if (c == '>') //no internal decls 226 | return len + 10; 227 | 228 | // skip thru the internal decls / pe references 229 | while ((c = skipWhitespace(buffer, &index)) != ']'){ 230 | if (c == '<')//elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment 231 | len = lenThru(buffer, index, ">"); 232 | else if (c == '%')//PEReference 233 | len = lenThru(buffer, index, ";"); 234 | if (len < 0) return 0; //end of decl not found, fail 235 | index += len; 236 | } 237 | 238 | if (!c) return 0; // ran out of buffer 239 | 240 | // found end of internal subset, just need the closing '>' 241 | 242 | index++; 243 | 244 | c = skipWhitespace(buffer, &index); 245 | if (c != '>') return 0; // ran out of buffer 246 | 247 | return index - startIndex + 1; 248 | } 249 | 250 | NSString* createStringFromBuffer(CFStringInlineBuffer* buffer, CFIndex index, CFIndex length){ 251 | return (NSString*) CFStringCreateWithSubstring(NULL, buffer->theString, CFRangeMake(buffer->rangeToBuffer.location + index, length)); 252 | } 253 | 254 | @implementation NSString (HTML) 255 | 256 | -(int)hexValue{ 257 | int base = 16; 258 | int result = 0; 259 | for (int i = 0; i < [self length]; i++){ 260 | unichar c = [self characterAtIndex: i]; 261 | if ((c >= '0') && (c <= '9')) 262 | result = (result * base) + (c - '0'); 263 | else if ((c >= 'A') && (c <= 'F')) 264 | result = (result * base) + (c - 'A' + 10); 265 | else if ((c >= 'a') && (c <= 'f')) 266 | result = (result * base) + (c - 'a' + 10); 267 | else 268 | return result; 269 | } 270 | return result; 271 | } 272 | 273 | -(NSString*)stringByReplacingEntitiesInRange:(NSRange)range{ 274 | int bufferLength = range.length; 275 | unichar *outBuffer = malloc(sizeof(unichar) * bufferLength); 276 | CFIndex index = 0; 277 | int writeIndex = 0; 278 | CFStringInlineBuffer buffer; 279 | CFStringInitInlineBuffer((CFStringRef)self, &buffer, CFRangeMake(range.location, range.length)); 280 | 281 | while (index < range.length){ 282 | unichar c = CFStringGetCharacterFromInlineBuffer(&buffer, index); 283 | CFIndex len; 284 | unichar entity = (c == '&') ? parseEntity(&buffer, index, &len) : 0; 285 | if (entity){ 286 | outBuffer[writeIndex++] = entity; 287 | index += len; 288 | } 289 | else { 290 | outBuffer[writeIndex++] = c; 291 | index++; 292 | } 293 | } 294 | NSString* result = [NSString stringWithCharacters: outBuffer length: writeIndex]; 295 | free(outBuffer); 296 | return result; 297 | } 298 | 299 | -(NSString*)stringByReplacingEntities{ 300 | return [self stringByReplacingEntitiesInRange: NSMakeRange(0, [self length])]; 301 | } 302 | 303 | 304 | -(NSDictionary*)parseElementAttributesWithRange:(NSRange) range caseSensative:(BOOL)caseSensative{ 305 | NSMutableDictionary* attributes = [[[NSMutableDictionary alloc] initWithCapacity: 8] autorelease]; 306 | 307 | CFStringInlineBuffer localBuffer; 308 | CFStringInitInlineBuffer((CFStringRef)self, &localBuffer, CFRangeMake(range.location, range.length)); 309 | 310 | CFIndex index = 1; // skip the leading '<' 311 | 312 | unichar c = skipNonWhitespace(&localBuffer, &index); 313 | 314 | while (c){ 315 | NSString* attrName; 316 | NSString* attrValue; 317 | 318 | c = skipWhitespace(&localBuffer, &index); 319 | if (c == '/'){ 320 | //the empty tag char at the end 321 | index++; 322 | break; 323 | } 324 | CFIndex tokenLen = lenToken(&localBuffer, index); 325 | if (tokenLen == 0) 326 | break; 327 | attrName = [self substringWithRange: NSMakeRange(index + localBuffer.rangeToBuffer.location, tokenLen)]; 328 | index += [attrName length]; 329 | c = skipWhitespace(&localBuffer, &index); 330 | if (c == '='){ 331 | index++;//skip the = 332 | c = skipWhitespace(&localBuffer, &index); 333 | NSRange valueRange; 334 | if (c=='"'){ 335 | CFIndex valueLen = lenThru(&localBuffer, index + 1, "\""); 336 | valueRange = NSMakeRange(index + localBuffer.rangeToBuffer.location + 1, valueLen - 1); 337 | index += 2; 338 | } 339 | else if (c=='\''){ 340 | CFIndex valueLen = lenThru(&localBuffer, index + 1, "'"); 341 | valueRange = NSMakeRange(index + localBuffer.rangeToBuffer.location + 1, valueLen - 1); 342 | index += 2; 343 | } 344 | else{ 345 | CFIndex tokenLen = lenToken(&localBuffer, index); 346 | valueRange = NSMakeRange(index + localBuffer.rangeToBuffer.location, tokenLen); 347 | } 348 | attrValue = [self stringByReplacingEntitiesInRange: valueRange]; 349 | [attributes setObject: attrValue forKey: caseSensative ? attrName : [attrName lowercaseString]]; 350 | index += valueRange.length; 351 | } 352 | else{ 353 | [attributes setObject: [NSNull null] forKey: caseSensative ? attrName : [attrName lowercaseString]]; 354 | } 355 | } 356 | return attributes; 357 | } 358 | 359 | 360 | static inline int moveBufferToIndex(CFStringInlineBuffer *buffer, CFIndex index){ 361 | CFIndex lengthLeftInString = CFStringGetLength(buffer->theString) - index; 362 | if (!lengthLeftInString) { 363 | // NSLog(@"done with string"); 364 | return false; 365 | } 366 | int bufferLength = MIN(lengthLeftInString, MAX_READ_BUFFER_LENGTH); 367 | CFRange range = CFRangeMake(index, bufferLength); 368 | if (range.location + range.length == buffer->rangeToBuffer.location + buffer->rangeToBuffer.length){ 369 | // NSLog(@"end of string already buffered"); 370 | return false; 371 | } 372 | CFStringInitInlineBuffer(buffer->theString, buffer, range); 373 | // if(range.location) 374 | // NSLog(@"moved buffer beyond 0"); 375 | return true; 376 | } 377 | 378 | +(void)parseHTML:(NSString*)source delegate:(id)delegate selector:(SEL)selector context: (void*) context index:(int*)sourceIndex partial:(BOOL)partial{ 379 | NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; 380 | ElementParser* parser = ([delegate respondsToSelector:@selector(isKindOfClass:)] && [delegate isKindOfClass: [ElementParser class]]) ? delegate : nil; 381 | CFIndex index = *sourceIndex; 382 | CFIndex maxSourceIndex = [source length]; 383 | CFStringInlineBuffer buffer; 384 | buffer.theString = (CFStringRef)source; 385 | buffer.rangeToBuffer.location = buffer.rangeToBuffer.length = 0; 386 | 387 | TagChunk* tag = [[TagChunk alloc] initWithString: source range: NSMakeRange(0,0) tagName: nil]; 388 | CommentChunk* comment = [[CommentChunk alloc] initWithString: source range: NSMakeRange(0,0)]; 389 | EntityChunk* entity = [[EntityChunk alloc] initWithString: source range: NSMakeRange(0,0)]; 390 | DoctypeChunk* doctype = [[DoctypeChunk alloc] initWithString: source range: NSMakeRange(0,0)]; 391 | ProcessingInstructionChunk* pi = [[ProcessingInstructionChunk alloc] initWithString: source range: NSMakeRange(0,0)]; 392 | CDataChunk* cdata = [[CDataChunk alloc] initWithString: source range: NSMakeRange(0,0)]; 393 | TxtChunk* text = [[TxtChunk alloc] initWithString: source range: NSMakeRange(0,0)]; 394 | 395 | BOOL delegateWantsToContinue = YES; 396 | unichar c; 397 | 398 | while (delegateWantsToContinue && moveBufferToIndex(&buffer, buffer.rangeToBuffer.location + index)){ 399 | index = 0; 400 | 401 | while (delegateWantsToContinue && (c = CFStringGetCharacterFromInlineBuffer(&buffer, index))){ 402 | 403 | int tagLen; 404 | int len = 0; 405 | int interior; 406 | Chunk* chunk = nil; 407 | Chunk* partialChunk = nil; 408 | 409 | if (c == '<'){ 410 | if (tagLen = lenToken(&buffer, index + 1)){ 411 | interior = lenThruRespectingQuotes(&buffer, index + tagLen + 1, ">") + tagLen - 1; 412 | if (interior > 0){ 413 | tag.tagName = createStringFromBuffer(&buffer, index + 1, tagLen); 414 | [tag.tagName release]; 415 | chunk = tag; 416 | len = interior + 2; 417 | } 418 | else 419 | partialChunk = tag; 420 | } 421 | else if (startsWithStr(&buffer, index + 1, "!--")){ 422 | interior = lenThru(&buffer, index + 4, "-->")-3; 423 | if (interior > 0){ 424 | chunk = comment; 425 | len = interior + 7; 426 | } 427 | else 428 | partialChunk = comment; 429 | } 430 | else if (startsWithStr(&buffer, index + 1, "![CDATA[")){ 431 | interior = lenThru(&buffer, index + 9, "]]>")-3; 432 | if (interior > 0){ 433 | chunk = cdata; 434 | len = interior + 12; 435 | } 436 | else 437 | partialChunk = cdata; 438 | } 439 | else if (startsWithStr(&buffer, index + 1, "?")){ 440 | interior = lenThru(&buffer, index + 2, ">")-1; 441 | if (interior > 0){ 442 | chunk = pi; 443 | len = interior + 3; 444 | } 445 | else 446 | partialChunk = pi; 447 | } 448 | else if (startsWithStr(&buffer, index + 1, "!DOCTYPE")){ 449 | interior = lenDoctype(&buffer, index + 9) - 1; 450 | if (interior > 0){ 451 | chunk = doctype; 452 | len = interior + 10; 453 | } 454 | else 455 | partialChunk = doctype; 456 | } 457 | else 458 | partialChunk = tag; 459 | } 460 | else if (c == '&'){ 461 | // complicated by the fact that what appears to be an entity may infact just be text 462 | CFIndex entityLen = lenEntityName(&buffer, index); 463 | if (entityLen == NSNotFound){ 464 | len = lenThruOr(&buffer, index + 1, '<', '&') + 1; 465 | chunk = text; 466 | } 467 | else if (entityLen > 0){ 468 | chunk = entity; 469 | len = entityLen; 470 | } 471 | else 472 | partialChunk = entity; 473 | } 474 | else{ 475 | len = lenThruOr(&buffer, index + 1, '<', '&') + 1; 476 | chunk = text; 477 | } 478 | 479 | if (partialChunk){ // recover from a partial chunk 480 | BOOL bytesLeftBeyondBuffer = maxSourceIndex > (buffer.rangeToBuffer.location + buffer.rangeToBuffer.length); 481 | if (bytesLeftBeyondBuffer || partial) 482 | break; // go get more bytes in the buffer / or exit 483 | 484 | // recover by emiting as text 485 | len = lenThruOr(&buffer, index + 1, '<', '&') + 1; 486 | chunk = text; 487 | 488 | NSString* fragment = [source substringWithRange: NSMakeRange(buffer.rangeToBuffer.location + index, MIN(8, [source length] - buffer.rangeToBuffer.location + index))]; 489 | [parser info: [NSString stringWithFormat: @"Unable to parse '%@' as %@", fragment, [[partialChunk class] humanName]] atIndex: buffer.rangeToBuffer.location + index]; 490 | } 491 | 492 | // hand the chunk to the delgate 493 | chunk.range = NSMakeRange(index + buffer.rangeToBuffer.location, len); 494 | // NSLog(@"%@: %@", [[chunk class] humanName], [source substringWithRange: chunk.range]); 495 | chunk.buffer = &buffer; 496 | delegateWantsToContinue = [delegate performSelector: selector withObject: chunk withObject: context] != nil; 497 | index += len; 498 | 499 | assert(index > 0); 500 | } 501 | } 502 | 503 | if (!delegateWantsToContinue) 504 | [parser info: @"delegate stopped the parsing" atIndex: buffer.rangeToBuffer.location + index]; 505 | 506 | [tag release]; 507 | [comment release]; 508 | [entity release]; 509 | [pi release]; 510 | [cdata release]; 511 | [doctype release]; 512 | [text release]; 513 | 514 | *sourceIndex = index + buffer.rangeToBuffer.location; 515 | [pool release]; 516 | } 517 | 518 | 519 | +(void)parseHTML:(NSString*) source delegate:(id)delegate selector:(SEL)selector context: (void*) context{ 520 | int index = 0; 521 | [self parseHTML: source delegate: delegate selector: selector context: context index: &index partial: NO]; 522 | NSAssert2(index == [source length], @"%i != %i", index, [source length]); 523 | } 524 | 525 | typedef struct{ 526 | NSMutableString* result; 527 | unichar* outBuffer; 528 | int outBufferLength; 529 | int writeIndex; 530 | BOOL inScriptElement; 531 | BOOL inWhite; 532 | BOOL inPara; 533 | } StripTagsContext; 534 | 535 | 536 | -(NSString*)stripTags{ 537 | NSMutableString* result = [NSMutableString stringWithCapacity: [self length]]; 538 | StripTagsContext context; 539 | context.result = result; 540 | context.outBufferLength = MIN([self length], OUT_BUFFER_LENGTH); 541 | context.outBuffer = malloc(sizeof(unichar) * context.outBufferLength); 542 | context.writeIndex = 0; 543 | context.inScriptElement = NO; 544 | context.inWhite = YES; 545 | context.inPara = YES; 546 | 547 | [NSString parseHTML: self delegate: self selector:@selector(chunk:context:) context: &context]; 548 | 549 | if (context.writeIndex > 0) 550 | CFStringAppendCharacters((CFMutableStringRef)result, context.outBuffer, context.writeIndex); 551 | 552 | free(context.outBuffer); 553 | 554 | return result; 555 | } 556 | 557 | -(id)chunk:(Chunk*)chunk context:(StripTagsContext*)context{ 558 | //write the outBuffer if there isn't enough room for the whole chunk 559 | if (context->writeIndex + chunk.range.length > context->outBufferLength){ 560 | CFStringAppendCharacters((CFMutableStringRef)context->result, context->outBuffer, context->writeIndex); 561 | context->writeIndex = 0; 562 | if (chunk.range.length > context->outBufferLength){ 563 | // need to grow buffer 564 | free(context->outBuffer); 565 | context->outBufferLength = chunk.range.length; 566 | context->outBuffer = malloc(sizeof(unichar) * context->outBufferLength); 567 | } 568 | } 569 | assert(context->writeIndex + chunk.range.length <= context->outBufferLength); 570 | 571 | CFRange bufferRangeToAppend = CFRangeMake(0, 0); 572 | CFStringInlineBuffer* buffer = chunk.buffer; 573 | 574 | if ([chunk isKind: ChunkKindTag]){ 575 | TagChunk* tag = (TagChunk*)chunk; 576 | if (context->inScriptElement == YES){ 577 | if ([tag tagNameEquals: @"/script"]) 578 | context->inScriptElement = NO; 579 | } 580 | else if ([tag tagNameEquals: @"script"]) 581 | context->inScriptElement = YES; 582 | else if ([tag emitsNewLineInContents]){ 583 | if (!context->inPara){//dont do double paras 584 | context->outBuffer[context->writeIndex++] = '\n'; 585 | context->outBuffer[context->writeIndex++] = '\n'; 586 | context->inWhite = YES; 587 | context->inPara = YES; 588 | } 589 | } 590 | } 591 | else if (context->inScriptElement == YES) 592 | ; // do nothing 593 | else if ([chunk isKind: ChunkKindText]){ 594 | bufferRangeToAppend = chunk.rangeInBuffer; 595 | } 596 | else if ([chunk isKind: ChunkKindCData]){ 597 | bufferRangeToAppend = [chunk interiorRangeInBuffer]; 598 | } 599 | else if ([chunk isKind: ChunkKindEntity]){ 600 | CFRange rangeInBuffer = [chunk rangeInBuffer]; 601 | unichar entity = parseEntity(chunk.buffer, rangeInBuffer.location, &rangeInBuffer.length); 602 | if (entity){ 603 | context->outBuffer[context->writeIndex++] = entity; 604 | context->inWhite = NO; 605 | } 606 | else{ 607 | //we regurgitate unrecognized entities 608 | bufferRangeToAppend = rangeInBuffer; 609 | } 610 | } 611 | 612 | int maxBufferIndex = bufferRangeToAppend.location + bufferRangeToAppend.length; 613 | for (int bufferIndex = bufferRangeToAppend.location; bufferIndex < maxBufferIndex; bufferIndex ++){ 614 | unichar c = CFStringGetCharacterFromInlineBuffer(buffer, bufferIndex); 615 | if (c <= 32){ 616 | if (!context->inWhite) 617 | context->outBuffer[context->writeIndex++] = 32; 618 | context->inWhite = YES; 619 | } 620 | else{ 621 | context->outBuffer[context->writeIndex++] = c; 622 | context->inWhite = NO; 623 | context->inPara = NO; 624 | } 625 | } 626 | return self; 627 | } 628 | 629 | -(Element*)element{ 630 | CFStringInlineBuffer buffer; 631 | CFStringInitInlineBuffer((CFStringRef)self, &buffer, CFRangeMake(0, [self length])); 632 | int len = lenToken(&buffer, 1); 633 | NSString* tagName = createStringFromBuffer(&buffer, 1, len); 634 | Element* result = [[[Element alloc] initWithString: self range: NSMakeRange(0, [self length]) tagName: tagName] autorelease]; 635 | [tagName release]; 636 | return result; 637 | } 638 | 639 | 640 | -(NSString*)stringByAddingPercentEscaping{ 641 | return [(NSString*)CFURLCreateStringByAddingPercentEscapes(NULL, (CFStringRef)self, NULL, NULL, kCFStringEncodingUTF8) autorelease]; 642 | } 643 | 644 | -(NSString*)stringByRemovingPercentEscaping{ 645 | return [(NSString*)CFURLCreateStringByReplacingPercentEscapes(NULL, (CFStringRef)self, NULL) autorelease]; 646 | } 647 | 648 | // TODO Handle different encodings 649 | + (NSStringEncoding) encodingForContentType:(NSString *)contentType{ 650 | if ([contentType rangeOfString: @"utf-8" options: NSCaseInsensitiveSearch].location != NSNotFound) 651 | return NSUTF8StringEncoding; 652 | else if ([contentType rangeOfString: @"iso-8859-1" options: NSCaseInsensitiveSearch].location != NSNotFound) 653 | return NSISOLatin1StringEncoding; 654 | else if ([contentType rangeOfString: @"windows-1252" options: NSCaseInsensitiveSearch].location != NSNotFound) 655 | return NSWindowsCP1252StringEncoding; 656 | else if ([contentType rangeOfString: @"encoding=" options: NSCaseInsensitiveSearch].location != NSNotFound) 657 | NSLog(@"unknown encoding: %@", contentType); 658 | return NSISOLatin1StringEncoding; 659 | } 660 | 661 | 662 | @end 663 | -------------------------------------------------------------------------------- /Classes/ProcessingInstructionChunk.h: -------------------------------------------------------------------------------- 1 | // 2 | // ProcessingInstructionChunk.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "Chunk.h" 27 | 28 | /** 29 | ProcessingInstructionChunk corresponds to a Processing Instruction (e.g. ) 30 | */ 31 | 32 | @interface ProcessingInstructionChunk : Chunk { 33 | 34 | } 35 | 36 | @end 37 | -------------------------------------------------------------------------------- /Classes/ProcessingInstructionChunk.m: -------------------------------------------------------------------------------- 1 | // 2 | // ProcessingInstructionChunk.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "ProcessingInstructionChunk.h" 26 | 27 | 28 | @implementation ProcessingInstructionChunk 29 | 30 | -(NSRange)interiorRange{ 31 | return NSMakeRange(range.location + 2, range.length - 3); 32 | } 33 | 34 | -(NSString*)kind{ 35 | return ChunkKindPI; 36 | } 37 | 38 | +(NSString*)humanName{ 39 | return @"processing instruction"; 40 | } 41 | 42 | @end 43 | -------------------------------------------------------------------------------- /Classes/TagChunk.h: -------------------------------------------------------------------------------- 1 | // 2 | // TagChunk.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "Chunk.h" 27 | 28 | 29 | /** 30 | TagChunk corresponds to a tag (e.g. ). It may be an open, close or empty tag. It includes 31 | the text of the attributes. 32 | */ 33 | 34 | @interface TagChunk : Chunk { 35 | NSString* tagName; 36 | NSStringCompareOptions compareOptions; 37 | } 38 | 39 | /** 40 | The name of the tag. include leading '/' for close tags 41 | */ 42 | @property (nonatomic, retain) NSString* tagName; 43 | 44 | 45 | /** 46 | Determines if tagName comparisons aer case sensative (XML) or not (HTML). 47 | */ 48 | @property BOOL caseSensative; 49 | 50 | /** 51 | Use this initializer when the tagname has already been created as a string to reduce object allocations 52 | */ 53 | -(id)initWithString: (NSString*)aSource range:(NSRange)aRange tagName:(NSString*)aTagName; 54 | 55 | /** 56 | A tag that ends with '/>' 57 | */ 58 | -(BOOL)isEmptyTag; 59 | 60 | 61 | /** 62 | A tag that starts with ' and

tags. 88 | */ 89 | -(BOOL)emitsNewLineInContents; 90 | 91 | @end 92 | -------------------------------------------------------------------------------- /Classes/TagChunk.m: -------------------------------------------------------------------------------- 1 | // 2 | // TagChunk.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "TagChunk.h" 26 | #import "Element.h" 27 | #import "NSString_HTML.h" 28 | 29 | 30 | @interface TagChunk() 31 | @property NSStringCompareOptions compareOptions; 32 | @end 33 | 34 | @implementation TagChunk 35 | 36 | @synthesize tagName, compareOptions; 37 | 38 | -(id)initWithString: (NSString*)aSource range:(NSRange)aRange{ 39 | assert(NO); 40 | } 41 | 42 | -(id)initWithString: (NSString*)aSource range:(NSRange)aRange tagName:(NSString*)aTagName{ 43 | source = [aSource retain]; 44 | range = aRange; 45 | tagName = [aTagName retain]; 46 | compareOptions = NSCaseInsensitiveSearch; 47 | return self; 48 | } 49 | 50 | -(void)dealloc{ 51 | [tagName release]; 52 | [super dealloc]; 53 | } 54 | 55 | -(NSRange)interiorRange{ 56 | return NSMakeRange(range.location +1, range.length - 2); 57 | } 58 | 59 | -(NSString*)kind{ 60 | return ChunkKindTag; 61 | } 62 | 63 | -(BOOL)isEmptyTag{ 64 | return [source characterAtIndex: range.location + range.length - 2] == '/'; 65 | } 66 | 67 | -(BOOL)isCloseTag{ 68 | return [source characterAtIndex: range.location + 1] == '/'; 69 | } 70 | 71 | -(BOOL)closesTag:(TagChunk*)aTag{ 72 | NSComparisonResult result = [[self tagName] compare: [aTag tagName] 73 | options: compareOptions 74 | range: NSMakeRange(1, [[self tagName] length] - 1)]; 75 | return result == NSOrderedSame; 76 | } 77 | 78 | -(BOOL)tagNameEquals:(NSString*)anotherTagName{ 79 | NSComparisonResult result = [[self tagName] compare: anotherTagName options: compareOptions]; 80 | return result == NSOrderedSame; 81 | } 82 | 83 | -(BOOL)emitsNewLineInContents{ 84 | return [self tagNameEquals: @"p"] || [self tagNameEquals: @"br"]; 85 | } 86 | 87 | 88 | -(void)setRange: (NSRange)aRange{ 89 | range = aRange; 90 | } 91 | 92 | -(BOOL)caseSensative{ 93 | return compareOptions == NSLiteralSearch; 94 | } 95 | 96 | -(void)setCaseSensative:(BOOL)flag{ 97 | compareOptions = (flag) ? NSLiteralSearch : NSCaseInsensitiveSearch; 98 | } 99 | 100 | -(NSString*)description{ 101 | return [source substringWithRange: range]; 102 | } 103 | 104 | -(NSString*)tagName{ 105 | assert(tagName); 106 | return tagName; 107 | } 108 | 109 | +(NSString*)humanName{ 110 | return @"tag"; 111 | } 112 | 113 | @end 114 | -------------------------------------------------------------------------------- /Classes/TxtChunk.h: -------------------------------------------------------------------------------- 1 | // 2 | // TxtChunk.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "Chunk.h" 27 | 28 | 29 | /** 30 | Represents a chunk of text. Note that we don't distinguish between ignoreable whitespace or not... 31 | */ 32 | @interface TxtChunk : Chunk { 33 | 34 | } 35 | 36 | @end 37 | -------------------------------------------------------------------------------- /Classes/TxtChunk.m: -------------------------------------------------------------------------------- 1 | // 2 | // TxtChunk.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "TxtChunk.h" 26 | 27 | 28 | @implementation TxtChunk 29 | 30 | -(NSString*)kind{ 31 | return ChunkKindText; 32 | } 33 | 34 | +(NSString*)humanName{ 35 | return @"text"; 36 | } 37 | 38 | @end 39 | -------------------------------------------------------------------------------- /Classes/URLParser.h: -------------------------------------------------------------------------------- 1 | // 2 | // URLParser.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/25/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "ElementParser.h" 27 | 28 | 29 | @interface URLParser : NSObject { 30 | NSError* lastError; 31 | NSURLConnection* connection; 32 | ElementParser* parser; 33 | NSString* contentType; 34 | NSStringEncoding encoding; 35 | NSObject* connectionDelegate; 36 | NSMutableData* partialStringData; 37 | } 38 | 39 | @property(retain, nonatomic) NSObject* connectionDelegate; 40 | @property(retain, readonly) NSURLConnection* connection; 41 | @property(retain, readonly) ElementParser* parser; 42 | @property(retain, nonatomic) NSError* lastError; 43 | @property(retain, nonatomic) NSString* contentType; 44 | @property NSStringEncoding encoding; 45 | @property (retain, nonatomic) NSMutableData* partialStringData; 46 | 47 | 48 | -(id)initWithCallbackDelegate:(id)delegate; 49 | -(void)performSelector:(SEL)method forElementsMatching:(NSString*)cssSelector; 50 | -(void)parseURL:(NSURL*) url; 51 | -(void)cancelLoading; 52 | 53 | @end 54 | -------------------------------------------------------------------------------- /Classes/URLParser.m: -------------------------------------------------------------------------------- 1 | // 2 | // URLParser.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/25/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "URLParser.h" 26 | #import "NSString_HTML.h" 27 | 28 | @implementation URLParser 29 | 30 | @synthesize parser, connection, lastError, contentType, encoding, connectionDelegate, partialStringData; 31 | 32 | - (id)initWithCallbackDelegate:(id)delegate{ 33 | parser = [[ElementParser alloc] init]; 34 | parser.delegate = delegate; 35 | encoding = NSISOLatin1StringEncoding; 36 | return self; 37 | } 38 | 39 | -(void) dealloc{ 40 | [connection cancel]; 41 | [connection release]; 42 | [parser release]; 43 | [lastError release]; 44 | [partialStringData release]; 45 | [super dealloc]; 46 | } 47 | 48 | -(void)parseURL:(NSURL*) url{ 49 | NSURLRequest* request = [[NSURLRequest alloc] initWithURL: url]; 50 | connection = [[NSURLConnection alloc] initWithRequest:request delegate:self]; 51 | [request release]; 52 | [parser beginParsing]; 53 | } 54 | 55 | 56 | -(void)performSelector:(SEL)method forElementsMatching:(NSString*)cssSelector{ 57 | [parser performSelector: method forElementsMatching: cssSelector]; 58 | } 59 | 60 | -(void)cancelLoading{ 61 | [connection cancel]; 62 | } 63 | 64 | 65 | #pragma mark NSURLConnection Delegate methods 66 | 67 | - (void)connection:(NSURLConnection *)aConnection didReceiveResponse:(NSURLResponse *)response{ 68 | assert(aConnection = connection); 69 | NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; 70 | if ([response respondsToSelector: @selector(allHeaderFields)]){ 71 | self.contentType = [[(NSHTTPURLResponse*)response allHeaderFields] valueForKey: @"Content-Type"]; 72 | encoding = [NSString encodingForContentType: contentType]; 73 | if ([contentType rangeOfString: @"html" options: NSCaseInsensitiveSearch].location != NSNotFound) 74 | parser.mode = ElementParserModeHTML; 75 | else 76 | parser.mode = ElementParserModeXML; 77 | } 78 | if ([connectionDelegate respondsToSelector:@selector(connection:didReceiveResponse:)]) 79 | [connectionDelegate connection:connection didReceiveResponse: response]; 80 | [pool release]; 81 | } 82 | 83 | - (void)connection:(NSURLConnection *)aConnection didFailWithError:(NSError *)error { 84 | assert(aConnection = connection); 85 | NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; 86 | self.lastError = error; 87 | [connection cancel]; 88 | if ([connectionDelegate respondsToSelector:@selector(connection:didFailWithError:)]) 89 | [connectionDelegate connection:connection didFailWithError: error]; 90 | [pool release]; 91 | } 92 | 93 | - (void)connection:(NSURLConnection *)aConnection didReceiveData:(NSData *)data { 94 | NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; 95 | if (partialStringData){ 96 | [partialStringData appendData: data]; 97 | data = partialStringData; 98 | } 99 | int less; 100 | NSString* moreSource = nil; 101 | for (less = 0; less <= 3 && !moreSource; less++) 102 | moreSource = [[NSString alloc] initWithBytes: data.bytes length: (data.length - less) encoding: encoding]; 103 | NSAssert(moreSource, @"unable to make string from data"); 104 | if (--less){//decrement b/c we incremented before loop exit test 105 | char* charPtr = (char*) data.bytes; 106 | unichar c = *(charPtr + data.length - less); 107 | NSLog(@"Partial string received storing %i bytes, first char=%i", less, c); 108 | self.partialStringData = [[NSMutableData alloc] initWithBytes: charPtr + (data.length - less) length: less]; 109 | [partialStringData release]; // setter has retained it 110 | } 111 | [parser continueParsingString: moreSource]; 112 | [moreSource release]; 113 | if ([connectionDelegate respondsToSelector:@selector(connection:didReceiveData:)]) 114 | [connectionDelegate connection:connection didReceiveData: data]; 115 | [pool release]; 116 | } 117 | 118 | - (void)connectionDidFinishLoading:(NSURLConnection *)aConnection { 119 | NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; 120 | [parser finishParsing]; 121 | if ([connectionDelegate respondsToSelector:@selector(connectionDidFinishLoading:)]) 122 | [connectionDelegate connectionDidFinishLoading:connection]; 123 | [pool release]; 124 | } 125 | 126 | 127 | @end 128 | -------------------------------------------------------------------------------- /Code Overview.txt: -------------------------------------------------------------------------------- 1 | Code Overview 2 | 3 | This overview will introduce you to the major classes of ElementParser and give you enough information to find your way around the code. It complements the read me document which has some simple usage examples to get you started quickly. 4 | 5 | ElementParser is architected in layers. The lowest layer is a simple-minded parser that spins through an NSString and spits out (ie hands to the delegate) "chunks" corresponding to things like tags, comments, entities, etc. It is defined in the HTML category of NSString and looks like this: 6 | 7 | +(void)parseHTML:(NSString*) source 8 | delegate:(id)delegate 9 | selector:(SEL)selector 10 | context: (void*) context; 11 | 12 | You'll notice that many of the classes in are just different flavors of Chunks to be served up to the delegate of the parseHTML method. These include: Chunk, CDataChunk, DoctypeChunk, CommentChunk, EntityChunk, TagChunk and TxtChunk. 13 | 14 | The ElementParser class itself is the namesake of the library. It uses the low-level chunk parser to build a tree of Element objects. 15 | 16 | Element is the basic building block for the document tree. Each of these correspond to an element (ie tag) in the document. Unlike other DOM builders, only Elements are modeled in the tree... everything else (even attributes) are parsed out only when needed. 17 | 18 | ElementParser supports one-shot parsing like: 19 | 20 | -(DocumentRoot*)parseHTML:(NSString*)source; 21 | 22 | and 23 | 24 | -(DocumentRoot*)parseXML:(NSString*)source; 25 | 26 | and it support incremental parsing with: 27 | 28 | -(DocumentRoot*)beginParsing; 29 | -(void)continueParsingString:(NSString*)string; 30 | -(void)finishParsing; 31 | 32 | 33 | Note: DocumentRoot is a subclass of Element (see below) to hold all the top-level Elements in the document. 34 | 35 | One of the most powerful parts of ElementParser is its ability to fire high-level callbacks into your code when css-style selectors are matched. You register these with the ElementParser before parsing and then are handed matching elements as they come across. 36 | 37 | URLParser ties an NSURLConnection and an ElementParser together to provide a very convenient way to parse documents out in the world leveraging the incremental features of the parser. 38 | 39 | CSSSelector and CSSSelectorPart are used to model a CSS selector. These provide a lightweight way to access elements within an XML or HTML document. 40 | 41 | CSSSelectorMatcher and CSSPartMatcher are used during the evaluation of a CSS selector against an Element tree. 42 | 43 | That's it. Comments welcome at feedback@touchtankapps.com. -------------------------------------------------------------------------------- /Demo/Classes/DemoAppDelegate.h: -------------------------------------------------------------------------------- 1 | // 2 | // DemoAppDelegate.h 3 | // Demo 4 | // 5 | // Created by Lee Buck on 8/23/09. 6 | // Copyright Blue Bright Ventures 2009. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | @class DemoViewController; 12 | 13 | @interface DemoAppDelegate : NSObject { 14 | UIWindow *window; 15 | DemoViewController *viewController; 16 | } 17 | 18 | @property (nonatomic, retain) IBOutlet UIWindow *window; 19 | @property (nonatomic, retain) IBOutlet DemoViewController *viewController; 20 | 21 | @end 22 | 23 | -------------------------------------------------------------------------------- /Demo/Classes/DemoAppDelegate.m: -------------------------------------------------------------------------------- 1 | // 2 | // DemoAppDelegate.m 3 | // Demo 4 | // 5 | // Created by Lee Buck on 8/23/09. 6 | // Copyright Blue Bright Ventures 2009. All rights reserved. 7 | // 8 | 9 | #import "DemoAppDelegate.h" 10 | #import "DemoViewController.h" 11 | 12 | @implementation DemoAppDelegate 13 | 14 | @synthesize window; 15 | @synthesize viewController; 16 | 17 | 18 | - (void)applicationDidFinishLaunching:(UIApplication *)application { 19 | 20 | // Override point for customization after app launch 21 | [window addSubview:viewController.view]; 22 | [window makeKeyAndVisible]; 23 | } 24 | 25 | 26 | - (void)dealloc { 27 | [viewController release]; 28 | [window release]; 29 | [super dealloc]; 30 | } 31 | 32 | 33 | @end 34 | -------------------------------------------------------------------------------- /Demo/Classes/DemoViewController.h: -------------------------------------------------------------------------------- 1 | // 2 | // DemoViewController.h 3 | // Demo 4 | // 5 | // Created by Lee Buck on 8/23/09. 6 | // Copyright Blue Bright Ventures 2009. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | @interface DemoViewController : UIViewController { 12 | 13 | IBOutlet UITextView* textView; 14 | IBOutlet UISegmentedControl* segmentControl; 15 | NSString* source; 16 | NSString* pattern; 17 | NSString* result; 18 | int selectedIndex; 19 | } 20 | 21 | @property (nonatomic, retain) NSString* source; 22 | @property (nonatomic, retain) NSString* pattern; 23 | @property (nonatomic, retain) NSString* result; 24 | 25 | -(IBAction)updateView:(id)sender; 26 | 27 | @end 28 | 29 | -------------------------------------------------------------------------------- /Demo/Classes/DemoViewController.m: -------------------------------------------------------------------------------- 1 | // 2 | // DemoViewController.m 3 | // Demo 4 | // 5 | // Created by Lee Buck on 8/23/09. 6 | // Copyright Blue Bright Ventures 2009. All rights reserved. 7 | // 8 | 9 | #import "DemoViewController.h" 10 | #import "Element.h" 11 | #import "DocumentRoot.h" 12 | 13 | @implementation DemoViewController 14 | 15 | @synthesize source, pattern, result; 16 | 17 | /* 18 | // The designated initializer. Override to perform setup that is required before the view is loaded. 19 | - (id)initWithNibName:(NSString *)nibNameOrNil bundle:(NSBundle *)nibBundleOrNil { 20 | if (self = [super initWithNibName:nibNameOrNil bundle:nibBundleOrNil]) { 21 | // Custom initialization 22 | } 23 | return self; 24 | } 25 | */ 26 | 27 | /* 28 | // Implement loadView to create a view hierarchy programmatically, without using a nib. 29 | - (void)loadView { 30 | } 31 | */ 32 | 33 | 34 | 35 | // Implement viewDidLoad to do additional setup after loading the view, typically from a nib. 36 | - (void)viewDidLoad { 37 | [super viewDidLoad]; 38 | /* pre populate source with the source.html file */ 39 | NSString* path = [[NSBundle mainBundle] pathForResource: @"source2" ofType: @"html"]; 40 | NSStringEncoding encoding; 41 | self.source = [NSString stringWithContentsOfFile: path usedEncoding: &encoding error: NULL]; 42 | textView.text = self.source; 43 | self.pattern = @"*"; 44 | } 45 | 46 | 47 | 48 | /* 49 | // Override to allow orientations other than the default portrait orientation. 50 | - (BOOL)shouldAutorotateToInterfaceOrientation:(UIInterfaceOrientation)interfaceOrientation { 51 | // Return YES for supported orientations 52 | return (interfaceOrientation == UIInterfaceOrientationPortrait); 53 | } 54 | */ 55 | 56 | - (void)didReceiveMemoryWarning { 57 | // Releases the view if it doesn't have a superview. 58 | [super didReceiveMemoryWarning]; 59 | 60 | // Release any cached data, images, etc that aren't in use. 61 | } 62 | 63 | - (void)viewDidUnload { 64 | // Release any retained subviews of the main view. 65 | // e.g. self.myOutlet = nil; 66 | } 67 | 68 | 69 | - (void)dealloc { 70 | [source dealloc]; 71 | [pattern dealloc]; 72 | [result dealloc]; 73 | [super dealloc]; 74 | } 75 | 76 | -(NSString*)matchResult{ 77 | DocumentRoot* document = [Element parseHTML: source]; 78 | NSArray* elements = [document selectElements: pattern]; 79 | NSMutableArray* results = [NSMutableArray array]; 80 | for (Element* element in elements){ 81 | NSString* snipet = [element contentsSource]; 82 | snipet = ([snipet length] > 5) ? [snipet substringToIndex: 5] : snipet; 83 | snipet = [[element description] stringByAppendingFormat: @"%@...", snipet]; 84 | [results addObject: snipet]; 85 | } 86 | return [results componentsJoinedByString: @"\n—————————————————\n"]; 87 | } 88 | 89 | -(IBAction)updateView:(id)sender{ 90 | if (selectedIndex == [segmentControl selectedSegmentIndex]) return; 91 | 92 | if (selectedIndex == 0){ 93 | self.source = [textView text]; 94 | } 95 | else if (selectedIndex == 1){ 96 | self.pattern = [textView text]; 97 | } 98 | else if (selectedIndex == 2){ 99 | } 100 | 101 | if ([segmentControl selectedSegmentIndex] == 0) { 102 | textView.text = self.source; 103 | textView.editable = YES; 104 | } 105 | else if ([segmentControl selectedSegmentIndex] == 1) { 106 | textView.text = self.pattern; 107 | textView.editable = YES; 108 | } 109 | else if ([segmentControl selectedSegmentIndex] == 2) { 110 | textView.editable = NO; 111 | textView.text = [self matchResult]; 112 | } 113 | selectedIndex = [segmentControl selectedSegmentIndex]; 114 | } 115 | 116 | 117 | @end 118 | -------------------------------------------------------------------------------- /Demo/Demo-Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | English 7 | CFBundleDisplayName 8 | ${PRODUCT_NAME} 9 | CFBundleExecutable 10 | ${EXECUTABLE_NAME} 11 | CFBundleIconFile 12 | 13 | CFBundleIdentifier 14 | com.yourcompany.${PRODUCT_NAME:rfc1034identifier} 15 | CFBundleInfoDictionaryVersion 16 | 6.0 17 | CFBundleName 18 | ${PRODUCT_NAME} 19 | CFBundlePackageType 20 | APPL 21 | CFBundleSignature 22 | ???? 23 | CFBundleVersion 24 | 1.0 25 | LSRequiresIPhoneOS 26 | 27 | NSMainNibFile 28 | MainWindow 29 | 30 | 31 | -------------------------------------------------------------------------------- /Demo/Demo.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 45; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | 1D3623260D0F684500981E51 /* DemoAppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 1D3623250D0F684500981E51 /* DemoAppDelegate.m */; }; 11 | 1D60589B0D05DD56006BFB54 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 29B97316FDCFA39411CA2CEA /* main.m */; }; 12 | 1D60589F0D05DD5A006BFB54 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 1D30AB110D05D00D00671497 /* Foundation.framework */; }; 13 | 1DF5F4E00D08C38300B7A737 /* UIKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 1DF5F4DF0D08C38300B7A737 /* UIKit.framework */; }; 14 | 288765A50DF7441C002DB57D /* CoreGraphics.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 288765A40DF7441C002DB57D /* CoreGraphics.framework */; }; 15 | 2899E5220DE3E06400AC0155 /* DemoViewController.xib in Resources */ = {isa = PBXBuildFile; fileRef = 2899E5210DE3E06400AC0155 /* DemoViewController.xib */; }; 16 | 28AD733F0D9D9553002E5188 /* MainWindow.xib in Resources */ = {isa = PBXBuildFile; fileRef = 28AD733E0D9D9553002E5188 /* MainWindow.xib */; }; 17 | 28D7ACF80DDB3853001CB0EB /* DemoViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = 28D7ACF70DDB3853001CB0EB /* DemoViewController.m */; }; 18 | DC65742F1042E7C500BE3D62 /* CDataChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = DC65740E1042E7C500BE3D62 /* CDataChunk.m */; }; 19 | DC6574301042E7C500BE3D62 /* Chunk.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574101042E7C500BE3D62 /* Chunk.m */; }; 20 | DC6574311042E7C500BE3D62 /* CommentChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574121042E7C500BE3D62 /* CommentChunk.m */; }; 21 | DC6574321042E7C500BE3D62 /* CSSPartMatcher.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574141042E7C500BE3D62 /* CSSPartMatcher.m */; }; 22 | DC6574331042E7C500BE3D62 /* CSSSelector.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574161042E7C500BE3D62 /* CSSSelector.m */; }; 23 | DC6574341042E7C500BE3D62 /* CSSSelectorMatcher.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574181042E7C500BE3D62 /* CSSSelectorMatcher.m */; }; 24 | DC6574351042E7C500BE3D62 /* CSSSelectorPart.m in Sources */ = {isa = PBXBuildFile; fileRef = DC65741A1042E7C500BE3D62 /* CSSSelectorPart.m */; }; 25 | DC6574361042E7C500BE3D62 /* DoctypeChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = DC65741C1042E7C500BE3D62 /* DoctypeChunk.m */; }; 26 | DC6574371042E7C500BE3D62 /* DocumentRoot.m in Sources */ = {isa = PBXBuildFile; fileRef = DC65741E1042E7C500BE3D62 /* DocumentRoot.m */; }; 27 | DC6574381042E7C500BE3D62 /* Element.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574201042E7C500BE3D62 /* Element.m */; }; 28 | DC6574391042E7C500BE3D62 /* ElementParser.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574221042E7C500BE3D62 /* ElementParser.m */; }; 29 | DC65743A1042E7C500BE3D62 /* EntityChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574241042E7C500BE3D62 /* EntityChunk.m */; }; 30 | DC65743B1042E7C500BE3D62 /* NSString_HTML.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574261042E7C500BE3D62 /* NSString_HTML.m */; }; 31 | DC65743C1042E7C500BE3D62 /* ProcessingInstructionChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = DC6574281042E7C500BE3D62 /* ProcessingInstructionChunk.m */; }; 32 | DC65743D1042E7C500BE3D62 /* TagChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = DC65742A1042E7C500BE3D62 /* TagChunk.m */; }; 33 | DC65743E1042E7C500BE3D62 /* TxtChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = DC65742C1042E7C500BE3D62 /* TxtChunk.m */; }; 34 | DC65743F1042E7C500BE3D62 /* URLParser.m in Sources */ = {isa = PBXBuildFile; fileRef = DC65742E1042E7C500BE3D62 /* URLParser.m */; }; 35 | DC903C0610777B8000F65535 /* source.html in Resources */ = {isa = PBXBuildFile; fileRef = DC903C0510777B8000F65535 /* source.html */; }; 36 | DCBCF2791078D72F00B671ED /* source2.html in Resources */ = {isa = PBXBuildFile; fileRef = DCBCF2781078D72F00B671ED /* source2.html */; }; 37 | /* End PBXBuildFile section */ 38 | 39 | /* Begin PBXFileReference section */ 40 | 1D30AB110D05D00D00671497 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; }; 41 | 1D3623240D0F684500981E51 /* DemoAppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = DemoAppDelegate.h; sourceTree = ""; }; 42 | 1D3623250D0F684500981E51 /* DemoAppDelegate.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = DemoAppDelegate.m; sourceTree = ""; }; 43 | 1D6058910D05DD3D006BFB54 /* Demo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = Demo.app; sourceTree = BUILT_PRODUCTS_DIR; }; 44 | 1DF5F4DF0D08C38300B7A737 /* UIKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = UIKit.framework; path = System/Library/Frameworks/UIKit.framework; sourceTree = SDKROOT; }; 45 | 288765A40DF7441C002DB57D /* CoreGraphics.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreGraphics.framework; path = System/Library/Frameworks/CoreGraphics.framework; sourceTree = SDKROOT; }; 46 | 2899E5210DE3E06400AC0155 /* DemoViewController.xib */ = {isa = PBXFileReference; lastKnownFileType = file.xib; path = DemoViewController.xib; sourceTree = ""; }; 47 | 28AD733E0D9D9553002E5188 /* MainWindow.xib */ = {isa = PBXFileReference; lastKnownFileType = file.xib; path = MainWindow.xib; sourceTree = ""; }; 48 | 28D7ACF60DDB3853001CB0EB /* DemoViewController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = DemoViewController.h; sourceTree = ""; }; 49 | 28D7ACF70DDB3853001CB0EB /* DemoViewController.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = DemoViewController.m; sourceTree = ""; }; 50 | 29B97316FDCFA39411CA2CEA /* main.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = ""; }; 51 | 32CA4F630368D1EE00C91783 /* Demo_Prefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Demo_Prefix.pch; sourceTree = ""; }; 52 | 8D1107310486CEB800E47090 /* Demo-Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = "Demo-Info.plist"; plistStructureDefinitionIdentifier = "com.apple.xcode.plist.structure-definition.iphone.info-plist"; sourceTree = ""; }; 53 | DC65740D1042E7C500BE3D62 /* CDataChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CDataChunk.h; path = ../Classes/CDataChunk.h; sourceTree = SOURCE_ROOT; }; 54 | DC65740E1042E7C500BE3D62 /* CDataChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CDataChunk.m; path = ../Classes/CDataChunk.m; sourceTree = SOURCE_ROOT; }; 55 | DC65740F1042E7C500BE3D62 /* Chunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Chunk.h; path = ../Classes/Chunk.h; sourceTree = SOURCE_ROOT; }; 56 | DC6574101042E7C500BE3D62 /* Chunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = Chunk.m; path = ../Classes/Chunk.m; sourceTree = SOURCE_ROOT; }; 57 | DC6574111042E7C500BE3D62 /* CommentChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CommentChunk.h; path = ../Classes/CommentChunk.h; sourceTree = SOURCE_ROOT; }; 58 | DC6574121042E7C500BE3D62 /* CommentChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CommentChunk.m; path = ../Classes/CommentChunk.m; sourceTree = SOURCE_ROOT; }; 59 | DC6574131042E7C500BE3D62 /* CSSPartMatcher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSPartMatcher.h; path = ../Classes/CSSPartMatcher.h; sourceTree = SOURCE_ROOT; }; 60 | DC6574141042E7C500BE3D62 /* CSSPartMatcher.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CSSPartMatcher.m; path = ../Classes/CSSPartMatcher.m; sourceTree = SOURCE_ROOT; }; 61 | DC6574151042E7C500BE3D62 /* CSSSelector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSSelector.h; path = ../Classes/CSSSelector.h; sourceTree = SOURCE_ROOT; }; 62 | DC6574161042E7C500BE3D62 /* CSSSelector.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CSSSelector.m; path = ../Classes/CSSSelector.m; sourceTree = SOURCE_ROOT; }; 63 | DC6574171042E7C500BE3D62 /* CSSSelectorMatcher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSSelectorMatcher.h; path = ../Classes/CSSSelectorMatcher.h; sourceTree = SOURCE_ROOT; }; 64 | DC6574181042E7C500BE3D62 /* CSSSelectorMatcher.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CSSSelectorMatcher.m; path = ../Classes/CSSSelectorMatcher.m; sourceTree = SOURCE_ROOT; }; 65 | DC6574191042E7C500BE3D62 /* CSSSelectorPart.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSSelectorPart.h; path = ../Classes/CSSSelectorPart.h; sourceTree = SOURCE_ROOT; }; 66 | DC65741A1042E7C500BE3D62 /* CSSSelectorPart.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CSSSelectorPart.m; path = ../Classes/CSSSelectorPart.m; sourceTree = SOURCE_ROOT; }; 67 | DC65741B1042E7C500BE3D62 /* DoctypeChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DoctypeChunk.h; path = ../Classes/DoctypeChunk.h; sourceTree = SOURCE_ROOT; }; 68 | DC65741C1042E7C500BE3D62 /* DoctypeChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = DoctypeChunk.m; path = ../Classes/DoctypeChunk.m; sourceTree = SOURCE_ROOT; }; 69 | DC65741D1042E7C500BE3D62 /* DocumentRoot.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DocumentRoot.h; path = ../Classes/DocumentRoot.h; sourceTree = SOURCE_ROOT; }; 70 | DC65741E1042E7C500BE3D62 /* DocumentRoot.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = DocumentRoot.m; path = ../Classes/DocumentRoot.m; sourceTree = SOURCE_ROOT; }; 71 | DC65741F1042E7C500BE3D62 /* Element.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Element.h; path = ../Classes/Element.h; sourceTree = SOURCE_ROOT; }; 72 | DC6574201042E7C500BE3D62 /* Element.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = Element.m; path = ../Classes/Element.m; sourceTree = SOURCE_ROOT; }; 73 | DC6574211042E7C500BE3D62 /* ElementParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ElementParser.h; path = ../Classes/ElementParser.h; sourceTree = SOURCE_ROOT; }; 74 | DC6574221042E7C500BE3D62 /* ElementParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = ElementParser.m; path = ../Classes/ElementParser.m; sourceTree = SOURCE_ROOT; }; 75 | DC6574231042E7C500BE3D62 /* EntityChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = EntityChunk.h; path = ../Classes/EntityChunk.h; sourceTree = SOURCE_ROOT; }; 76 | DC6574241042E7C500BE3D62 /* EntityChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = EntityChunk.m; path = ../Classes/EntityChunk.m; sourceTree = SOURCE_ROOT; }; 77 | DC6574251042E7C500BE3D62 /* NSString_HTML.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = NSString_HTML.h; path = ../Classes/NSString_HTML.h; sourceTree = SOURCE_ROOT; }; 78 | DC6574261042E7C500BE3D62 /* NSString_HTML.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = NSString_HTML.m; path = ../Classes/NSString_HTML.m; sourceTree = SOURCE_ROOT; }; 79 | DC6574271042E7C500BE3D62 /* ProcessingInstructionChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ProcessingInstructionChunk.h; path = ../Classes/ProcessingInstructionChunk.h; sourceTree = SOURCE_ROOT; }; 80 | DC6574281042E7C500BE3D62 /* ProcessingInstructionChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = ProcessingInstructionChunk.m; path = ../Classes/ProcessingInstructionChunk.m; sourceTree = SOURCE_ROOT; }; 81 | DC6574291042E7C500BE3D62 /* TagChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TagChunk.h; path = ../Classes/TagChunk.h; sourceTree = SOURCE_ROOT; }; 82 | DC65742A1042E7C500BE3D62 /* TagChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = TagChunk.m; path = ../Classes/TagChunk.m; sourceTree = SOURCE_ROOT; }; 83 | DC65742B1042E7C500BE3D62 /* TxtChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TxtChunk.h; path = ../Classes/TxtChunk.h; sourceTree = SOURCE_ROOT; }; 84 | DC65742C1042E7C500BE3D62 /* TxtChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = TxtChunk.m; path = ../Classes/TxtChunk.m; sourceTree = SOURCE_ROOT; }; 85 | DC65742D1042E7C500BE3D62 /* URLParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = URLParser.h; path = ../Classes/URLParser.h; sourceTree = SOURCE_ROOT; }; 86 | DC65742E1042E7C500BE3D62 /* URLParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = URLParser.m; path = ../Classes/URLParser.m; sourceTree = SOURCE_ROOT; }; 87 | DC903C0510777B8000F65535 /* source.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = source.html; sourceTree = ""; }; 88 | DCBCF2781078D72F00B671ED /* source2.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = source2.html; sourceTree = ""; }; 89 | /* End PBXFileReference section */ 90 | 91 | /* Begin PBXFrameworksBuildPhase section */ 92 | 1D60588F0D05DD3D006BFB54 /* Frameworks */ = { 93 | isa = PBXFrameworksBuildPhase; 94 | buildActionMask = 2147483647; 95 | files = ( 96 | 1D60589F0D05DD5A006BFB54 /* Foundation.framework in Frameworks */, 97 | 1DF5F4E00D08C38300B7A737 /* UIKit.framework in Frameworks */, 98 | 288765A50DF7441C002DB57D /* CoreGraphics.framework in Frameworks */, 99 | ); 100 | runOnlyForDeploymentPostprocessing = 0; 101 | }; 102 | /* End PBXFrameworksBuildPhase section */ 103 | 104 | /* Begin PBXGroup section */ 105 | 080E96DDFE201D6D7F000001 /* Classes */ = { 106 | isa = PBXGroup; 107 | children = ( 108 | 1D3623240D0F684500981E51 /* DemoAppDelegate.h */, 109 | 1D3623250D0F684500981E51 /* DemoAppDelegate.m */, 110 | 28D7ACF60DDB3853001CB0EB /* DemoViewController.h */, 111 | 28D7ACF70DDB3853001CB0EB /* DemoViewController.m */, 112 | ); 113 | path = Classes; 114 | sourceTree = ""; 115 | }; 116 | 19C28FACFE9D520D11CA2CBB /* Products */ = { 117 | isa = PBXGroup; 118 | children = ( 119 | 1D6058910D05DD3D006BFB54 /* Demo.app */, 120 | ); 121 | name = Products; 122 | sourceTree = ""; 123 | }; 124 | 29B97314FDCFA39411CA2CEA /* CustomTemplate */ = { 125 | isa = PBXGroup; 126 | children = ( 127 | DC65740C1042E7A500BE3D62 /* ElementParser */, 128 | 080E96DDFE201D6D7F000001 /* Classes */, 129 | 29B97315FDCFA39411CA2CEA /* Other Sources */, 130 | 29B97317FDCFA39411CA2CEA /* Resources */, 131 | 29B97323FDCFA39411CA2CEA /* Frameworks */, 132 | 19C28FACFE9D520D11CA2CBB /* Products */, 133 | ); 134 | name = CustomTemplate; 135 | sourceTree = ""; 136 | }; 137 | 29B97315FDCFA39411CA2CEA /* Other Sources */ = { 138 | isa = PBXGroup; 139 | children = ( 140 | 32CA4F630368D1EE00C91783 /* Demo_Prefix.pch */, 141 | 29B97316FDCFA39411CA2CEA /* main.m */, 142 | ); 143 | name = "Other Sources"; 144 | sourceTree = ""; 145 | }; 146 | 29B97317FDCFA39411CA2CEA /* Resources */ = { 147 | isa = PBXGroup; 148 | children = ( 149 | 2899E5210DE3E06400AC0155 /* DemoViewController.xib */, 150 | DC903C0510777B8000F65535 /* source.html */, 151 | DCBCF2781078D72F00B671ED /* source2.html */, 152 | 28AD733E0D9D9553002E5188 /* MainWindow.xib */, 153 | 8D1107310486CEB800E47090 /* Demo-Info.plist */, 154 | ); 155 | name = Resources; 156 | sourceTree = ""; 157 | }; 158 | 29B97323FDCFA39411CA2CEA /* Frameworks */ = { 159 | isa = PBXGroup; 160 | children = ( 161 | 1DF5F4DF0D08C38300B7A737 /* UIKit.framework */, 162 | 1D30AB110D05D00D00671497 /* Foundation.framework */, 163 | 288765A40DF7441C002DB57D /* CoreGraphics.framework */, 164 | ); 165 | name = Frameworks; 166 | sourceTree = ""; 167 | }; 168 | DC65740C1042E7A500BE3D62 /* ElementParser */ = { 169 | isa = PBXGroup; 170 | children = ( 171 | DC65740D1042E7C500BE3D62 /* CDataChunk.h */, 172 | DC65740E1042E7C500BE3D62 /* CDataChunk.m */, 173 | DC65740F1042E7C500BE3D62 /* Chunk.h */, 174 | DC6574101042E7C500BE3D62 /* Chunk.m */, 175 | DC6574111042E7C500BE3D62 /* CommentChunk.h */, 176 | DC6574121042E7C500BE3D62 /* CommentChunk.m */, 177 | DC6574131042E7C500BE3D62 /* CSSPartMatcher.h */, 178 | DC6574141042E7C500BE3D62 /* CSSPartMatcher.m */, 179 | DC6574151042E7C500BE3D62 /* CSSSelector.h */, 180 | DC6574161042E7C500BE3D62 /* CSSSelector.m */, 181 | DC6574171042E7C500BE3D62 /* CSSSelectorMatcher.h */, 182 | DC6574181042E7C500BE3D62 /* CSSSelectorMatcher.m */, 183 | DC6574191042E7C500BE3D62 /* CSSSelectorPart.h */, 184 | DC65741A1042E7C500BE3D62 /* CSSSelectorPart.m */, 185 | DC65741B1042E7C500BE3D62 /* DoctypeChunk.h */, 186 | DC65741C1042E7C500BE3D62 /* DoctypeChunk.m */, 187 | DC65741D1042E7C500BE3D62 /* DocumentRoot.h */, 188 | DC65741E1042E7C500BE3D62 /* DocumentRoot.m */, 189 | DC65741F1042E7C500BE3D62 /* Element.h */, 190 | DC6574201042E7C500BE3D62 /* Element.m */, 191 | DC6574211042E7C500BE3D62 /* ElementParser.h */, 192 | DC6574221042E7C500BE3D62 /* ElementParser.m */, 193 | DC6574231042E7C500BE3D62 /* EntityChunk.h */, 194 | DC6574241042E7C500BE3D62 /* EntityChunk.m */, 195 | DC6574251042E7C500BE3D62 /* NSString_HTML.h */, 196 | DC6574261042E7C500BE3D62 /* NSString_HTML.m */, 197 | DC6574271042E7C500BE3D62 /* ProcessingInstructionChunk.h */, 198 | DC6574281042E7C500BE3D62 /* ProcessingInstructionChunk.m */, 199 | DC6574291042E7C500BE3D62 /* TagChunk.h */, 200 | DC65742A1042E7C500BE3D62 /* TagChunk.m */, 201 | DC65742B1042E7C500BE3D62 /* TxtChunk.h */, 202 | DC65742C1042E7C500BE3D62 /* TxtChunk.m */, 203 | DC65742D1042E7C500BE3D62 /* URLParser.h */, 204 | DC65742E1042E7C500BE3D62 /* URLParser.m */, 205 | ); 206 | name = ElementParser; 207 | path = Classes; 208 | sourceTree = ""; 209 | }; 210 | /* End PBXGroup section */ 211 | 212 | /* Begin PBXNativeTarget section */ 213 | 1D6058900D05DD3D006BFB54 /* Demo */ = { 214 | isa = PBXNativeTarget; 215 | buildConfigurationList = 1D6058960D05DD3E006BFB54 /* Build configuration list for PBXNativeTarget "Demo" */; 216 | buildPhases = ( 217 | 1D60588D0D05DD3D006BFB54 /* Resources */, 218 | 1D60588E0D05DD3D006BFB54 /* Sources */, 219 | 1D60588F0D05DD3D006BFB54 /* Frameworks */, 220 | ); 221 | buildRules = ( 222 | ); 223 | dependencies = ( 224 | ); 225 | name = Demo; 226 | productName = Demo; 227 | productReference = 1D6058910D05DD3D006BFB54 /* Demo.app */; 228 | productType = "com.apple.product-type.application"; 229 | }; 230 | /* End PBXNativeTarget section */ 231 | 232 | /* Begin PBXProject section */ 233 | 29B97313FDCFA39411CA2CEA /* Project object */ = { 234 | isa = PBXProject; 235 | buildConfigurationList = C01FCF4E08A954540054247B /* Build configuration list for PBXProject "Demo" */; 236 | compatibilityVersion = "Xcode 3.1"; 237 | hasScannedForEncodings = 1; 238 | mainGroup = 29B97314FDCFA39411CA2CEA /* CustomTemplate */; 239 | projectDirPath = ""; 240 | projectRoot = ""; 241 | targets = ( 242 | 1D6058900D05DD3D006BFB54 /* Demo */, 243 | ); 244 | }; 245 | /* End PBXProject section */ 246 | 247 | /* Begin PBXResourcesBuildPhase section */ 248 | 1D60588D0D05DD3D006BFB54 /* Resources */ = { 249 | isa = PBXResourcesBuildPhase; 250 | buildActionMask = 2147483647; 251 | files = ( 252 | 28AD733F0D9D9553002E5188 /* MainWindow.xib in Resources */, 253 | 2899E5220DE3E06400AC0155 /* DemoViewController.xib in Resources */, 254 | DC903C0610777B8000F65535 /* source.html in Resources */, 255 | DCBCF2791078D72F00B671ED /* source2.html in Resources */, 256 | ); 257 | runOnlyForDeploymentPostprocessing = 0; 258 | }; 259 | /* End PBXResourcesBuildPhase section */ 260 | 261 | /* Begin PBXSourcesBuildPhase section */ 262 | 1D60588E0D05DD3D006BFB54 /* Sources */ = { 263 | isa = PBXSourcesBuildPhase; 264 | buildActionMask = 2147483647; 265 | files = ( 266 | 1D60589B0D05DD56006BFB54 /* main.m in Sources */, 267 | 1D3623260D0F684500981E51 /* DemoAppDelegate.m in Sources */, 268 | 28D7ACF80DDB3853001CB0EB /* DemoViewController.m in Sources */, 269 | DC65742F1042E7C500BE3D62 /* CDataChunk.m in Sources */, 270 | DC6574301042E7C500BE3D62 /* Chunk.m in Sources */, 271 | DC6574311042E7C500BE3D62 /* CommentChunk.m in Sources */, 272 | DC6574321042E7C500BE3D62 /* CSSPartMatcher.m in Sources */, 273 | DC6574331042E7C500BE3D62 /* CSSSelector.m in Sources */, 274 | DC6574341042E7C500BE3D62 /* CSSSelectorMatcher.m in Sources */, 275 | DC6574351042E7C500BE3D62 /* CSSSelectorPart.m in Sources */, 276 | DC6574361042E7C500BE3D62 /* DoctypeChunk.m in Sources */, 277 | DC6574371042E7C500BE3D62 /* DocumentRoot.m in Sources */, 278 | DC6574381042E7C500BE3D62 /* Element.m in Sources */, 279 | DC6574391042E7C500BE3D62 /* ElementParser.m in Sources */, 280 | DC65743A1042E7C500BE3D62 /* EntityChunk.m in Sources */, 281 | DC65743B1042E7C500BE3D62 /* NSString_HTML.m in Sources */, 282 | DC65743C1042E7C500BE3D62 /* ProcessingInstructionChunk.m in Sources */, 283 | DC65743D1042E7C500BE3D62 /* TagChunk.m in Sources */, 284 | DC65743E1042E7C500BE3D62 /* TxtChunk.m in Sources */, 285 | DC65743F1042E7C500BE3D62 /* URLParser.m in Sources */, 286 | ); 287 | runOnlyForDeploymentPostprocessing = 0; 288 | }; 289 | /* End PBXSourcesBuildPhase section */ 290 | 291 | /* Begin XCBuildConfiguration section */ 292 | 1D6058940D05DD3E006BFB54 /* Debug */ = { 293 | isa = XCBuildConfiguration; 294 | buildSettings = { 295 | ALWAYS_SEARCH_USER_PATHS = NO; 296 | COPY_PHASE_STRIP = NO; 297 | GCC_DYNAMIC_NO_PIC = NO; 298 | GCC_OPTIMIZATION_LEVEL = 0; 299 | GCC_PRECOMPILE_PREFIX_HEADER = YES; 300 | GCC_PREFIX_HEADER = Demo_Prefix.pch; 301 | INFOPLIST_FILE = "Demo-Info.plist"; 302 | PRODUCT_NAME = Demo; 303 | }; 304 | name = Debug; 305 | }; 306 | 1D6058950D05DD3E006BFB54 /* Release */ = { 307 | isa = XCBuildConfiguration; 308 | buildSettings = { 309 | ALWAYS_SEARCH_USER_PATHS = NO; 310 | COPY_PHASE_STRIP = YES; 311 | GCC_PRECOMPILE_PREFIX_HEADER = YES; 312 | GCC_PREFIX_HEADER = Demo_Prefix.pch; 313 | INFOPLIST_FILE = "Demo-Info.plist"; 314 | PRODUCT_NAME = Demo; 315 | }; 316 | name = Release; 317 | }; 318 | C01FCF4F08A954540054247B /* Debug */ = { 319 | isa = XCBuildConfiguration; 320 | buildSettings = { 321 | ARCHS = "$(ARCHS_STANDARD_32_BIT)"; 322 | "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; 323 | GCC_C_LANGUAGE_STANDARD = c99; 324 | GCC_WARN_ABOUT_RETURN_TYPE = YES; 325 | GCC_WARN_UNUSED_VARIABLE = YES; 326 | PREBINDING = NO; 327 | SDKROOT = iphoneos3.0; 328 | }; 329 | name = Debug; 330 | }; 331 | C01FCF5008A954540054247B /* Release */ = { 332 | isa = XCBuildConfiguration; 333 | buildSettings = { 334 | ARCHS = "$(ARCHS_STANDARD_32_BIT)"; 335 | "CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer"; 336 | GCC_C_LANGUAGE_STANDARD = c99; 337 | GCC_WARN_ABOUT_RETURN_TYPE = YES; 338 | GCC_WARN_UNUSED_VARIABLE = YES; 339 | PREBINDING = NO; 340 | SDKROOT = iphoneos3.0; 341 | }; 342 | name = Release; 343 | }; 344 | /* End XCBuildConfiguration section */ 345 | 346 | /* Begin XCConfigurationList section */ 347 | 1D6058960D05DD3E006BFB54 /* Build configuration list for PBXNativeTarget "Demo" */ = { 348 | isa = XCConfigurationList; 349 | buildConfigurations = ( 350 | 1D6058940D05DD3E006BFB54 /* Debug */, 351 | 1D6058950D05DD3E006BFB54 /* Release */, 352 | ); 353 | defaultConfigurationIsVisible = 0; 354 | defaultConfigurationName = Release; 355 | }; 356 | C01FCF4E08A954540054247B /* Build configuration list for PBXProject "Demo" */ = { 357 | isa = XCConfigurationList; 358 | buildConfigurations = ( 359 | C01FCF4F08A954540054247B /* Debug */, 360 | C01FCF5008A954540054247B /* Release */, 361 | ); 362 | defaultConfigurationIsVisible = 0; 363 | defaultConfigurationName = Release; 364 | }; 365 | /* End XCConfigurationList section */ 366 | }; 367 | rootObject = 29B97313FDCFA39411CA2CEA /* Project object */; 368 | } 369 | -------------------------------------------------------------------------------- /Demo/Demo_Prefix.pch: -------------------------------------------------------------------------------- 1 | // 2 | // Prefix header for all source files of the 'Demo' target in the 'Demo' project 3 | // 4 | 5 | #ifdef __OBJC__ 6 | #import 7 | #import 8 | #endif 9 | -------------------------------------------------------------------------------- /Demo/MainWindow.xib: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 768 5 | 10A288 6 | 715 7 | 1010 8 | 411.00 9 | 10 | com.apple.InterfaceBuilder.IBCocoaTouchPlugin 11 | 46 12 | 13 | 14 | YES 15 | 16 | 17 | 18 | YES 19 | com.apple.InterfaceBuilder.IBCocoaTouchPlugin 20 | 21 | 22 | YES 23 | 24 | YES 25 | 26 | 27 | YES 28 | 29 | 30 | 31 | YES 32 | 33 | IBFilesOwner 34 | 35 | 36 | IBFirstResponder 37 | 38 | 39 | 40 | DemoViewController 41 | 42 | 43 | 44 | 45 | 292 46 | {320, 480} 47 | 48 | 1 49 | MSAxIDEAA 50 | 51 | NO 52 | NO 53 | 54 | 55 | 56 | 57 | 58 | YES 59 | 60 | 61 | delegate 62 | 63 | 64 | 65 | 4 66 | 67 | 68 | 69 | viewController 70 | 71 | 72 | 73 | 11 74 | 75 | 76 | 77 | window 78 | 79 | 80 | 81 | 14 82 | 83 | 84 | 85 | 86 | YES 87 | 88 | 0 89 | 90 | 91 | 92 | 93 | 94 | -1 95 | 96 | 97 | File's Owner 98 | 99 | 100 | 3 101 | 102 | 103 | Demo App Delegate 104 | 105 | 106 | -2 107 | 108 | 109 | 110 | 111 | 10 112 | 113 | 114 | 115 | 116 | 12 117 | 118 | 119 | 120 | 121 | 122 | 123 | YES 124 | 125 | YES 126 | -1.CustomClassName 127 | -2.CustomClassName 128 | 10.CustomClassName 129 | 10.IBEditorWindowLastContentRect 130 | 10.IBPluginDependency 131 | 12.IBEditorWindowLastContentRect 132 | 12.IBPluginDependency 133 | 3.CustomClassName 134 | 3.IBPluginDependency 135 | 136 | 137 | YES 138 | UIApplication 139 | UIResponder 140 | DemoViewController 141 | {{512, 351}, {320, 480}} 142 | com.apple.InterfaceBuilder.IBCocoaTouchPlugin 143 | {{525, 346}, {320, 480}} 144 | com.apple.InterfaceBuilder.IBCocoaTouchPlugin 145 | DemoAppDelegate 146 | com.apple.InterfaceBuilder.IBCocoaTouchPlugin 147 | 148 | 149 | 150 | YES 151 | 152 | 153 | YES 154 | 155 | 156 | 157 | 158 | YES 159 | 160 | 161 | YES 162 | 163 | 164 | 165 | 14 166 | 167 | 168 | 169 | YES 170 | 171 | DemoAppDelegate 172 | NSObject 173 | 174 | YES 175 | 176 | YES 177 | viewController 178 | window 179 | 180 | 181 | YES 182 | DemoViewController 183 | UIWindow 184 | 185 | 186 | 187 | IBProjectSource 188 | Classes/DemoAppDelegate.h 189 | 190 | 191 | 192 | DemoAppDelegate 193 | NSObject 194 | 195 | IBUserSource 196 | 197 | 198 | 199 | 200 | DemoViewController 201 | UIViewController 202 | 203 | IBProjectSource 204 | Classes/DemoViewController.h 205 | 206 | 207 | 208 | 209 | 0 210 | 211 | com.apple.InterfaceBuilder.CocoaTouchPlugin.InterfaceBuilder3 212 | 213 | 214 | YES 215 | Demo.xcodeproj 216 | 3 217 | 218 | 219 | -------------------------------------------------------------------------------- /Demo/main.m: -------------------------------------------------------------------------------- 1 | // 2 | // main.m 3 | // Demo 4 | // 5 | // Created by Lee Buck on 8/23/09. 6 | // Copyright Blue Bright Ventures 2009. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | int main(int argc, char *argv[]) { 12 | 13 | NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init]; 14 | int retVal = UIApplicationMain(argc, argv, nil, nil); 15 | [pool release]; 16 | return retVal; 17 | } 18 | -------------------------------------------------------------------------------- /Demo/source2.html: -------------------------------------------------------------------------------- 1 | 2 |
    3 |
  1. 1
  2. 4 |
  3. 2
  4. 5 |
  5. 3
  6. 6 |
  7. 4
  8. 7 |
  9. 5
  10. 8 |
  11. 6
  12. 9 |
  13. 7
  14. 10 |
  15. 8
  16. 11 |
  17. 9
  18. 12 |
  19. 10
  20. 13 |
  21. 11
  22. 14 |
  23. 12
  24. 15 |
  25. 13
  26. 16 |
  27. 14
  28. 17 |
  29. 15
  30. 18 |
  31. 16
  32. 19 |
  33. 17
  34. 20 |
  35. 18
  36. 21 |
  37. 19
  38. 22 |
  39. 20
  40. 23 |
  41. 21
  42. 24 |
  43. 22
  44. 25 |
  45. 23
  46. 26 |
  47. 24
  48. 27 |
  49. 25
  50. 28 |
  51. 26
  52. 29 |
  53. 27
  54. 30 |
  55. 28
  56. 31 |
  57. 29
  58. 32 |
  59. 30
  60. 33 |
  61. 31
  62. 34 |
  63. 32
  64. 35 |
  65. 33
  66. 36 |
  67. 34
  68. 37 |
  69. 35
  70. 38 |
  71. 36
  72. 39 |
  73. 37
  74. 40 |
  75. 38
  76. 41 |
  77. 39
  78. 42 |
  79. 40
  80. 43 |
  81. 41
  82. 44 |
  83. 42
  84. 45 |
  85. 43
  86. 46 |
  87. 44
  88. 47 |
  89. 45
  90. 48 |
  91. 46
  92. 49 |
  93. 47
  94. 50 |
  95. 48
  96. 51 |
  97. 49
  98. 52 |
  99. 50
  100. 53 |
  101. 51
  102. 54 |
  103. 52
  104. 55 |
  105. 53
  106. 56 |
  107. 54
  108. 57 |
  109. 55
  110. 58 |
  111. 56
  112. 59 |
  113. 57
  114. 60 |
  115. 58
  116. 61 |
  117. 59
  118. 62 |
  119. 60
  120. 63 |
64 | -------------------------------------------------------------------------------- /ElementParser.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 45; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | 3F4E18B3102DD2FA00320118 /* CDataChunk.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E1891102DD2FA00320118 /* CDataChunk.h */; }; 11 | 3F4E18B4102DD2FA00320118 /* CDataChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E1892102DD2FA00320118 /* CDataChunk.m */; }; 12 | 3F4E18B5102DD2FA00320118 /* Chunk.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E1893102DD2FA00320118 /* Chunk.h */; }; 13 | 3F4E18B6102DD2FA00320118 /* Chunk.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E1894102DD2FA00320118 /* Chunk.m */; }; 14 | 3F4E18B7102DD2FA00320118 /* CommentChunk.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E1895102DD2FA00320118 /* CommentChunk.h */; }; 15 | 3F4E18B8102DD2FA00320118 /* CommentChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E1896102DD2FA00320118 /* CommentChunk.m */; }; 16 | 3F4E18B9102DD2FA00320118 /* CSSPartMatcher.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E1897102DD2FA00320118 /* CSSPartMatcher.h */; }; 17 | 3F4E18BA102DD2FA00320118 /* CSSPartMatcher.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E1898102DD2FA00320118 /* CSSPartMatcher.m */; }; 18 | 3F4E18BB102DD2FA00320118 /* CSSSelector.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E1899102DD2FA00320118 /* CSSSelector.h */; }; 19 | 3F4E18BC102DD2FA00320118 /* CSSSelector.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E189A102DD2FA00320118 /* CSSSelector.m */; }; 20 | 3F4E18BD102DD2FA00320118 /* CSSSelectorMatcher.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E189B102DD2FA00320118 /* CSSSelectorMatcher.h */; }; 21 | 3F4E18BE102DD2FA00320118 /* CSSSelectorMatcher.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E189C102DD2FA00320118 /* CSSSelectorMatcher.m */; }; 22 | 3F4E18BF102DD2FA00320118 /* CSSSelectorPart.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E189D102DD2FA00320118 /* CSSSelectorPart.h */; }; 23 | 3F4E18C0102DD2FA00320118 /* CSSSelectorPart.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E189E102DD2FA00320118 /* CSSSelectorPart.m */; }; 24 | 3F4E18C1102DD2FA00320118 /* DoctypeChunk.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E189F102DD2FA00320118 /* DoctypeChunk.h */; }; 25 | 3F4E18C2102DD2FA00320118 /* DoctypeChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18A0102DD2FA00320118 /* DoctypeChunk.m */; }; 26 | 3F4E18C3102DD2FA00320118 /* DocumentRoot.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18A1102DD2FA00320118 /* DocumentRoot.h */; }; 27 | 3F4E18C4102DD2FA00320118 /* DocumentRoot.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18A2102DD2FA00320118 /* DocumentRoot.m */; }; 28 | 3F4E18C5102DD2FA00320118 /* Element.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18A3102DD2FA00320118 /* Element.h */; }; 29 | 3F4E18C6102DD2FA00320118 /* Element.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18A4102DD2FA00320118 /* Element.m */; }; 30 | 3F4E18C7102DD2FA00320118 /* ElementParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18A5102DD2FA00320118 /* ElementParser.h */; }; 31 | 3F4E18C8102DD2FA00320118 /* ElementParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18A6102DD2FA00320118 /* ElementParser.m */; }; 32 | 3F4E18C9102DD2FA00320118 /* EntityChunk.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18A7102DD2FA00320118 /* EntityChunk.h */; }; 33 | 3F4E18CA102DD2FA00320118 /* EntityChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18A8102DD2FA00320118 /* EntityChunk.m */; }; 34 | 3F4E18CB102DD2FA00320118 /* NSString_HTML.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18A9102DD2FA00320118 /* NSString_HTML.h */; }; 35 | 3F4E18CC102DD2FA00320118 /* NSString_HTML.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18AA102DD2FA00320118 /* NSString_HTML.m */; }; 36 | 3F4E18CD102DD2FA00320118 /* ProcessingInstructionChunk.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18AB102DD2FA00320118 /* ProcessingInstructionChunk.h */; }; 37 | 3F4E18CE102DD2FA00320118 /* ProcessingInstructionChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18AC102DD2FA00320118 /* ProcessingInstructionChunk.m */; }; 38 | 3F4E18CF102DD2FA00320118 /* TagChunk.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18AD102DD2FA00320118 /* TagChunk.h */; }; 39 | 3F4E18D0102DD2FA00320118 /* TagChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18AE102DD2FA00320118 /* TagChunk.m */; }; 40 | 3F4E18D1102DD2FA00320118 /* TxtChunk.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18AF102DD2FA00320118 /* TxtChunk.h */; }; 41 | 3F4E18D2102DD2FA00320118 /* TxtChunk.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18B0102DD2FA00320118 /* TxtChunk.m */; }; 42 | 3F4E18D3102DD2FA00320118 /* URLParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 3F4E18B1102DD2FA00320118 /* URLParser.h */; }; 43 | 3F4E18D4102DD2FA00320118 /* URLParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 3F4E18B2102DD2FA00320118 /* URLParser.m */; }; 44 | AA747D9F0F9514B9006C5449 /* ElementParser_Prefix.pch in Headers */ = {isa = PBXBuildFile; fileRef = AA747D9E0F9514B9006C5449 /* ElementParser_Prefix.pch */; }; 45 | AACBBE4A0F95108600F1A2B1 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = AACBBE490F95108600F1A2B1 /* Foundation.framework */; }; 46 | /* End PBXBuildFile section */ 47 | 48 | /* Begin PBXFileReference section */ 49 | 3F4E1891102DD2FA00320118 /* CDataChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CDataChunk.h; path = Classes/CDataChunk.h; sourceTree = ""; }; 50 | 3F4E1892102DD2FA00320118 /* CDataChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CDataChunk.m; path = Classes/CDataChunk.m; sourceTree = SOURCE_ROOT; }; 51 | 3F4E1893102DD2FA00320118 /* Chunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Chunk.h; path = Classes/Chunk.h; sourceTree = ""; }; 52 | 3F4E1894102DD2FA00320118 /* Chunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = Chunk.m; path = Classes/Chunk.m; sourceTree = SOURCE_ROOT; }; 53 | 3F4E1895102DD2FA00320118 /* CommentChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CommentChunk.h; path = Classes/CommentChunk.h; sourceTree = ""; }; 54 | 3F4E1896102DD2FA00320118 /* CommentChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CommentChunk.m; path = Classes/CommentChunk.m; sourceTree = SOURCE_ROOT; }; 55 | 3F4E1897102DD2FA00320118 /* CSSPartMatcher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSPartMatcher.h; path = Classes/CSSPartMatcher.h; sourceTree = ""; }; 56 | 3F4E1898102DD2FA00320118 /* CSSPartMatcher.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CSSPartMatcher.m; path = Classes/CSSPartMatcher.m; sourceTree = SOURCE_ROOT; }; 57 | 3F4E1899102DD2FA00320118 /* CSSSelector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSSelector.h; path = Classes/CSSSelector.h; sourceTree = ""; }; 58 | 3F4E189A102DD2FA00320118 /* CSSSelector.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CSSSelector.m; path = Classes/CSSSelector.m; sourceTree = SOURCE_ROOT; }; 59 | 3F4E189B102DD2FA00320118 /* CSSSelectorMatcher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSSelectorMatcher.h; path = Classes/CSSSelectorMatcher.h; sourceTree = ""; }; 60 | 3F4E189C102DD2FA00320118 /* CSSSelectorMatcher.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CSSSelectorMatcher.m; path = Classes/CSSSelectorMatcher.m; sourceTree = SOURCE_ROOT; }; 61 | 3F4E189D102DD2FA00320118 /* CSSSelectorPart.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CSSSelectorPart.h; path = Classes/CSSSelectorPart.h; sourceTree = ""; }; 62 | 3F4E189E102DD2FA00320118 /* CSSSelectorPart.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = CSSSelectorPart.m; path = Classes/CSSSelectorPart.m; sourceTree = SOURCE_ROOT; }; 63 | 3F4E189F102DD2FA00320118 /* DoctypeChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DoctypeChunk.h; path = Classes/DoctypeChunk.h; sourceTree = ""; }; 64 | 3F4E18A0102DD2FA00320118 /* DoctypeChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = DoctypeChunk.m; path = Classes/DoctypeChunk.m; sourceTree = SOURCE_ROOT; }; 65 | 3F4E18A1102DD2FA00320118 /* DocumentRoot.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DocumentRoot.h; path = Classes/DocumentRoot.h; sourceTree = ""; }; 66 | 3F4E18A2102DD2FA00320118 /* DocumentRoot.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = DocumentRoot.m; path = Classes/DocumentRoot.m; sourceTree = SOURCE_ROOT; }; 67 | 3F4E18A3102DD2FA00320118 /* Element.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Element.h; path = Classes/Element.h; sourceTree = ""; }; 68 | 3F4E18A4102DD2FA00320118 /* Element.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = Element.m; path = Classes/Element.m; sourceTree = SOURCE_ROOT; }; 69 | 3F4E18A5102DD2FA00320118 /* ElementParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ElementParser.h; path = Classes/ElementParser.h; sourceTree = ""; }; 70 | 3F4E18A6102DD2FA00320118 /* ElementParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = ElementParser.m; path = Classes/ElementParser.m; sourceTree = SOURCE_ROOT; }; 71 | 3F4E18A7102DD2FA00320118 /* EntityChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = EntityChunk.h; path = Classes/EntityChunk.h; sourceTree = ""; }; 72 | 3F4E18A8102DD2FA00320118 /* EntityChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = EntityChunk.m; path = Classes/EntityChunk.m; sourceTree = SOURCE_ROOT; }; 73 | 3F4E18A9102DD2FA00320118 /* NSString_HTML.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = NSString_HTML.h; path = Classes/NSString_HTML.h; sourceTree = ""; }; 74 | 3F4E18AA102DD2FA00320118 /* NSString_HTML.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = NSString_HTML.m; path = Classes/NSString_HTML.m; sourceTree = SOURCE_ROOT; }; 75 | 3F4E18AB102DD2FA00320118 /* ProcessingInstructionChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ProcessingInstructionChunk.h; path = Classes/ProcessingInstructionChunk.h; sourceTree = ""; }; 76 | 3F4E18AC102DD2FA00320118 /* ProcessingInstructionChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = ProcessingInstructionChunk.m; path = Classes/ProcessingInstructionChunk.m; sourceTree = SOURCE_ROOT; }; 77 | 3F4E18AD102DD2FA00320118 /* TagChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TagChunk.h; path = Classes/TagChunk.h; sourceTree = ""; }; 78 | 3F4E18AE102DD2FA00320118 /* TagChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = TagChunk.m; path = Classes/TagChunk.m; sourceTree = SOURCE_ROOT; }; 79 | 3F4E18AF102DD2FA00320118 /* TxtChunk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TxtChunk.h; path = Classes/TxtChunk.h; sourceTree = ""; }; 80 | 3F4E18B0102DD2FA00320118 /* TxtChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = TxtChunk.m; path = Classes/TxtChunk.m; sourceTree = SOURCE_ROOT; }; 81 | 3F4E18B1102DD2FA00320118 /* URLParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = URLParser.h; path = Classes/URLParser.h; sourceTree = ""; }; 82 | 3F4E18B2102DD2FA00320118 /* URLParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = URLParser.m; path = Classes/URLParser.m; sourceTree = SOURCE_ROOT; }; 83 | AA747D9E0F9514B9006C5449 /* ElementParser_Prefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ElementParser_Prefix.pch; sourceTree = SOURCE_ROOT; }; 84 | AACBBE490F95108600F1A2B1 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; }; 85 | D2AAC07E0554694100DB518D /* libElementParser.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libElementParser.a; sourceTree = BUILT_PRODUCTS_DIR; }; 86 | /* End PBXFileReference section */ 87 | 88 | /* Begin PBXFrameworksBuildPhase section */ 89 | D2AAC07C0554694100DB518D /* Frameworks */ = { 90 | isa = PBXFrameworksBuildPhase; 91 | buildActionMask = 2147483647; 92 | files = ( 93 | AACBBE4A0F95108600F1A2B1 /* Foundation.framework in Frameworks */, 94 | ); 95 | runOnlyForDeploymentPostprocessing = 0; 96 | }; 97 | /* End PBXFrameworksBuildPhase section */ 98 | 99 | /* Begin PBXGroup section */ 100 | 034768DFFF38A50411DB9C8B /* Products */ = { 101 | isa = PBXGroup; 102 | children = ( 103 | D2AAC07E0554694100DB518D /* libElementParser.a */, 104 | ); 105 | name = Products; 106 | sourceTree = ""; 107 | }; 108 | 0867D691FE84028FC02AAC07 /* ElementParser */ = { 109 | isa = PBXGroup; 110 | children = ( 111 | 3F4E1923102DD47000320118 /* Headers */, 112 | 08FB77AEFE84172EC02AAC07 /* Classes */, 113 | 32C88DFF0371C24200C91783 /* Other Sources */, 114 | 0867D69AFE84028FC02AAC07 /* Frameworks */, 115 | 034768DFFF38A50411DB9C8B /* Products */, 116 | ); 117 | name = ElementParser; 118 | sourceTree = ""; 119 | }; 120 | 0867D69AFE84028FC02AAC07 /* Frameworks */ = { 121 | isa = PBXGroup; 122 | children = ( 123 | AACBBE490F95108600F1A2B1 /* Foundation.framework */, 124 | ); 125 | name = Frameworks; 126 | sourceTree = ""; 127 | }; 128 | 08FB77AEFE84172EC02AAC07 /* Classes */ = { 129 | isa = PBXGroup; 130 | children = ( 131 | 3F4E1892102DD2FA00320118 /* CDataChunk.m */, 132 | 3F4E1894102DD2FA00320118 /* Chunk.m */, 133 | 3F4E1896102DD2FA00320118 /* CommentChunk.m */, 134 | 3F4E1898102DD2FA00320118 /* CSSPartMatcher.m */, 135 | 3F4E189A102DD2FA00320118 /* CSSSelector.m */, 136 | 3F4E189C102DD2FA00320118 /* CSSSelectorMatcher.m */, 137 | 3F4E189E102DD2FA00320118 /* CSSSelectorPart.m */, 138 | 3F4E18A0102DD2FA00320118 /* DoctypeChunk.m */, 139 | 3F4E18A2102DD2FA00320118 /* DocumentRoot.m */, 140 | 3F4E18A4102DD2FA00320118 /* Element.m */, 141 | 3F4E18A6102DD2FA00320118 /* ElementParser.m */, 142 | 3F4E18A8102DD2FA00320118 /* EntityChunk.m */, 143 | 3F4E18AA102DD2FA00320118 /* NSString_HTML.m */, 144 | 3F4E18AC102DD2FA00320118 /* ProcessingInstructionChunk.m */, 145 | 3F4E18AE102DD2FA00320118 /* TagChunk.m */, 146 | 3F4E18B0102DD2FA00320118 /* TxtChunk.m */, 147 | 3F4E18B2102DD2FA00320118 /* URLParser.m */, 148 | ); 149 | name = Classes; 150 | sourceTree = ""; 151 | }; 152 | 32C88DFF0371C24200C91783 /* Other Sources */ = { 153 | isa = PBXGroup; 154 | children = ( 155 | AA747D9E0F9514B9006C5449 /* ElementParser_Prefix.pch */, 156 | ); 157 | name = "Other Sources"; 158 | sourceTree = ""; 159 | }; 160 | 3F4E1923102DD47000320118 /* Headers */ = { 161 | isa = PBXGroup; 162 | children = ( 163 | 3F4E1891102DD2FA00320118 /* CDataChunk.h */, 164 | 3F4E1893102DD2FA00320118 /* Chunk.h */, 165 | 3F4E1895102DD2FA00320118 /* CommentChunk.h */, 166 | 3F4E1897102DD2FA00320118 /* CSSPartMatcher.h */, 167 | 3F4E1899102DD2FA00320118 /* CSSSelector.h */, 168 | 3F4E189B102DD2FA00320118 /* CSSSelectorMatcher.h */, 169 | 3F4E189D102DD2FA00320118 /* CSSSelectorPart.h */, 170 | 3F4E189F102DD2FA00320118 /* DoctypeChunk.h */, 171 | 3F4E18A1102DD2FA00320118 /* DocumentRoot.h */, 172 | 3F4E18A3102DD2FA00320118 /* Element.h */, 173 | 3F4E18A5102DD2FA00320118 /* ElementParser.h */, 174 | 3F4E18A7102DD2FA00320118 /* EntityChunk.h */, 175 | 3F4E18A9102DD2FA00320118 /* NSString_HTML.h */, 176 | 3F4E18AB102DD2FA00320118 /* ProcessingInstructionChunk.h */, 177 | 3F4E18AD102DD2FA00320118 /* TagChunk.h */, 178 | 3F4E18AF102DD2FA00320118 /* TxtChunk.h */, 179 | 3F4E18B1102DD2FA00320118 /* URLParser.h */, 180 | ); 181 | name = Headers; 182 | sourceTree = ""; 183 | }; 184 | /* End PBXGroup section */ 185 | 186 | /* Begin PBXHeadersBuildPhase section */ 187 | D2AAC07A0554694100DB518D /* Headers */ = { 188 | isa = PBXHeadersBuildPhase; 189 | buildActionMask = 2147483647; 190 | files = ( 191 | AA747D9F0F9514B9006C5449 /* ElementParser_Prefix.pch in Headers */, 192 | 3F4E18B3102DD2FA00320118 /* CDataChunk.h in Headers */, 193 | 3F4E18B5102DD2FA00320118 /* Chunk.h in Headers */, 194 | 3F4E18B7102DD2FA00320118 /* CommentChunk.h in Headers */, 195 | 3F4E18B9102DD2FA00320118 /* CSSPartMatcher.h in Headers */, 196 | 3F4E18BB102DD2FA00320118 /* CSSSelector.h in Headers */, 197 | 3F4E18BD102DD2FA00320118 /* CSSSelectorMatcher.h in Headers */, 198 | 3F4E18BF102DD2FA00320118 /* CSSSelectorPart.h in Headers */, 199 | 3F4E18C1102DD2FA00320118 /* DoctypeChunk.h in Headers */, 200 | 3F4E18C3102DD2FA00320118 /* DocumentRoot.h in Headers */, 201 | 3F4E18C5102DD2FA00320118 /* Element.h in Headers */, 202 | 3F4E18C7102DD2FA00320118 /* ElementParser.h in Headers */, 203 | 3F4E18C9102DD2FA00320118 /* EntityChunk.h in Headers */, 204 | 3F4E18CB102DD2FA00320118 /* NSString_HTML.h in Headers */, 205 | 3F4E18CD102DD2FA00320118 /* ProcessingInstructionChunk.h in Headers */, 206 | 3F4E18CF102DD2FA00320118 /* TagChunk.h in Headers */, 207 | 3F4E18D1102DD2FA00320118 /* TxtChunk.h in Headers */, 208 | 3F4E18D3102DD2FA00320118 /* URLParser.h in Headers */, 209 | ); 210 | runOnlyForDeploymentPostprocessing = 0; 211 | }; 212 | /* End PBXHeadersBuildPhase section */ 213 | 214 | /* Begin PBXNativeTarget section */ 215 | D2AAC07D0554694100DB518D /* ElementParser */ = { 216 | isa = PBXNativeTarget; 217 | buildConfigurationList = 1DEB921E08733DC00010E9CD /* Build configuration list for PBXNativeTarget "ElementParser" */; 218 | buildPhases = ( 219 | D2AAC07A0554694100DB518D /* Headers */, 220 | D2AAC07B0554694100DB518D /* Sources */, 221 | D2AAC07C0554694100DB518D /* Frameworks */, 222 | ); 223 | buildRules = ( 224 | ); 225 | dependencies = ( 226 | ); 227 | name = ElementParser; 228 | productName = ElementParser; 229 | productReference = D2AAC07E0554694100DB518D /* libElementParser.a */; 230 | productType = "com.apple.product-type.library.static"; 231 | }; 232 | /* End PBXNativeTarget section */ 233 | 234 | /* Begin PBXProject section */ 235 | 0867D690FE84028FC02AAC07 /* Project object */ = { 236 | isa = PBXProject; 237 | buildConfigurationList = 1DEB922208733DC00010E9CD /* Build configuration list for PBXProject "ElementParser" */; 238 | compatibilityVersion = "Xcode 3.1"; 239 | hasScannedForEncodings = 1; 240 | mainGroup = 0867D691FE84028FC02AAC07 /* ElementParser */; 241 | productRefGroup = 034768DFFF38A50411DB9C8B /* Products */; 242 | projectDirPath = ""; 243 | projectRoot = ""; 244 | targets = ( 245 | D2AAC07D0554694100DB518D /* ElementParser */, 246 | ); 247 | }; 248 | /* End PBXProject section */ 249 | 250 | /* Begin PBXSourcesBuildPhase section */ 251 | D2AAC07B0554694100DB518D /* Sources */ = { 252 | isa = PBXSourcesBuildPhase; 253 | buildActionMask = 2147483647; 254 | files = ( 255 | 3F4E18B4102DD2FA00320118 /* CDataChunk.m in Sources */, 256 | 3F4E18B6102DD2FA00320118 /* Chunk.m in Sources */, 257 | 3F4E18B8102DD2FA00320118 /* CommentChunk.m in Sources */, 258 | 3F4E18BA102DD2FA00320118 /* CSSPartMatcher.m in Sources */, 259 | 3F4E18BC102DD2FA00320118 /* CSSSelector.m in Sources */, 260 | 3F4E18BE102DD2FA00320118 /* CSSSelectorMatcher.m in Sources */, 261 | 3F4E18C0102DD2FA00320118 /* CSSSelectorPart.m in Sources */, 262 | 3F4E18C2102DD2FA00320118 /* DoctypeChunk.m in Sources */, 263 | 3F4E18C4102DD2FA00320118 /* DocumentRoot.m in Sources */, 264 | 3F4E18C6102DD2FA00320118 /* Element.m in Sources */, 265 | 3F4E18C8102DD2FA00320118 /* ElementParser.m in Sources */, 266 | 3F4E18CA102DD2FA00320118 /* EntityChunk.m in Sources */, 267 | 3F4E18CC102DD2FA00320118 /* NSString_HTML.m in Sources */, 268 | 3F4E18CE102DD2FA00320118 /* ProcessingInstructionChunk.m in Sources */, 269 | 3F4E18D0102DD2FA00320118 /* TagChunk.m in Sources */, 270 | 3F4E18D2102DD2FA00320118 /* TxtChunk.m in Sources */, 271 | 3F4E18D4102DD2FA00320118 /* URLParser.m in Sources */, 272 | ); 273 | runOnlyForDeploymentPostprocessing = 0; 274 | }; 275 | /* End PBXSourcesBuildPhase section */ 276 | 277 | /* Begin XCBuildConfiguration section */ 278 | 1DEB921F08733DC00010E9CD /* Debug */ = { 279 | isa = XCBuildConfiguration; 280 | buildSettings = { 281 | ALWAYS_SEARCH_USER_PATHS = NO; 282 | ARCHS = "$(ARCHS_STANDARD_32_BIT)"; 283 | COPY_PHASE_STRIP = NO; 284 | DSTROOT = /tmp/ElementParser.dst; 285 | GCC_DYNAMIC_NO_PIC = NO; 286 | GCC_ENABLE_FIX_AND_CONTINUE = YES; 287 | GCC_MODEL_TUNING = G5; 288 | GCC_OPTIMIZATION_LEVEL = 0; 289 | GCC_PRECOMPILE_PREFIX_HEADER = YES; 290 | GCC_PREFIX_HEADER = ElementParser_Prefix.pch; 291 | INSTALL_PATH = /usr/local/lib; 292 | PRODUCT_NAME = ElementParser; 293 | }; 294 | name = Debug; 295 | }; 296 | 1DEB922008733DC00010E9CD /* Release */ = { 297 | isa = XCBuildConfiguration; 298 | buildSettings = { 299 | ALWAYS_SEARCH_USER_PATHS = NO; 300 | ARCHS = "$(ARCHS_STANDARD_32_BIT)"; 301 | DSTROOT = /tmp/ElementParser.dst; 302 | GCC_MODEL_TUNING = G5; 303 | GCC_PRECOMPILE_PREFIX_HEADER = YES; 304 | GCC_PREFIX_HEADER = ElementParser_Prefix.pch; 305 | INSTALL_PATH = /usr/local/lib; 306 | PRODUCT_NAME = ElementParser; 307 | }; 308 | name = Release; 309 | }; 310 | 1DEB922308733DC00010E9CD /* Debug */ = { 311 | isa = XCBuildConfiguration; 312 | buildSettings = { 313 | ARCHS = "$(ARCHS_STANDARD_32_BIT)"; 314 | GCC_C_LANGUAGE_STANDARD = c99; 315 | GCC_OPTIMIZATION_LEVEL = 0; 316 | GCC_WARN_ABOUT_RETURN_TYPE = YES; 317 | GCC_WARN_UNUSED_VARIABLE = YES; 318 | OTHER_LDFLAGS = "-ObjC"; 319 | PREBINDING = NO; 320 | SDKROOT = macosx10.5; 321 | }; 322 | name = Debug; 323 | }; 324 | 1DEB922408733DC00010E9CD /* Release */ = { 325 | isa = XCBuildConfiguration; 326 | buildSettings = { 327 | ARCHS = "$(ARCHS_STANDARD_32_BIT)"; 328 | GCC_C_LANGUAGE_STANDARD = c99; 329 | GCC_WARN_ABOUT_RETURN_TYPE = YES; 330 | GCC_WARN_UNUSED_VARIABLE = YES; 331 | OTHER_LDFLAGS = "-ObjC"; 332 | PREBINDING = NO; 333 | SDKROOT = iphoneos3.0; 334 | }; 335 | name = Release; 336 | }; 337 | /* End XCBuildConfiguration section */ 338 | 339 | /* Begin XCConfigurationList section */ 340 | 1DEB921E08733DC00010E9CD /* Build configuration list for PBXNativeTarget "ElementParser" */ = { 341 | isa = XCConfigurationList; 342 | buildConfigurations = ( 343 | 1DEB921F08733DC00010E9CD /* Debug */, 344 | 1DEB922008733DC00010E9CD /* Release */, 345 | ); 346 | defaultConfigurationIsVisible = 0; 347 | defaultConfigurationName = Release; 348 | }; 349 | 1DEB922208733DC00010E9CD /* Build configuration list for PBXProject "ElementParser" */ = { 350 | isa = XCConfigurationList; 351 | buildConfigurations = ( 352 | 1DEB922308733DC00010E9CD /* Debug */, 353 | 1DEB922408733DC00010E9CD /* Release */, 354 | ); 355 | defaultConfigurationIsVisible = 0; 356 | defaultConfigurationName = Release; 357 | }; 358 | /* End XCConfigurationList section */ 359 | }; 360 | rootObject = 0867D690FE84028FC02AAC07 /* Project object */; 361 | } 362 | -------------------------------------------------------------------------------- /ElementParser_Prefix.pch: -------------------------------------------------------------------------------- 1 | // 2 | // Prefix header for all source files of the 'CocoaTouchStaticLibrary' target in the 'CocoaTouchStaticLibrary' project. 3 | // 4 | 5 | #ifdef __OBJC__ 6 | #import 7 | #endif 8 | -------------------------------------------------------------------------------- /GPL v3 Liscense.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Objective3/ElementParser/3fa3d82f8ff05a25d40b61592e1cf18115daf306/GPL v3 Liscense.pdf -------------------------------------------------------------------------------- /HTML Entities.plist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Objective3/ElementParser/3fa3d82f8ff05a25d40b61592e1cf18115daf306/HTML Entities.plist -------------------------------------------------------------------------------- /Read Me.txt: -------------------------------------------------------------------------------- 1 | Element Parser 2 | 5/4/09 3 | 4 | Accessing and manipulating HTML and XML in Cocoa can be incredibly frustrating. There are two existing choices (NSXMLParser and lib2xml) but neither work with HTML or "real-world" XML documents that are often not "perfect". Their interfaces put all the work on you to map between the document and your program's domain objects. They force you to write code that is hard to write and maintain. Somehow, something that starts out looking straightforward ends up becoming 5 | a science project or worse. 6 | 7 | ElementParser is lightweight framework to provide easy access to xml and html content. Rather than get lost in the complexities of the HTML and XML specifications, it aspires to not obscure their essential simplicity. It doesn't do everything, but aspires to do "just enough". 8 | 9 | I hope you like it. 10 | 11 | Let's begin with some examples. 12 | 13 | document = [Element parseHTML: source]; 14 | 15 | Document is a special element that holds the top level element(s) (e.g. or ) of your document. You now have a tree of Element objects which you can walk using methods like firstChild, nextSybling and parent. You can also access the data each contains with methods like tagName, attributes and contentsText. Nice start. And sometimes this is enough. But let's say you don't want to walk the tree all the time to find the data you need. How about: 16 | 17 | linkElement = [element selectElement: @"div.nextLink a"]; 18 | 19 | Here we're using an css-type selector to locate and return a matching element. Nice. Now we can parse a document and conveniently find elements of interest. (Yes, there is a corresponding selectElements: method that returns all matches.) 20 | 21 | Next, let's bind together your world of objects and the world of elements more closely. To do this, we'll use the ElementParser directly to register callbacks into your code when an element is found (and its contents parsed). 22 | 23 | ElementParser* parser = [[ElementParser alloc] initWithCallbacksDelegate: self]; 24 | [parser performSelector:@selector(processFeedElement:) forElementsMatching: @"feed"]; 25 | documentRoot = [parser parseXML: source]; 26 | 27 | Your code might look like this: 28 | 29 | -(FeedItem*)processFeedItem:(Element*)element{ 30 | FeedItem* feedItem = [[[FeedItem alloc] init] autorelease]; 31 | feedItem.title = [[element selectElement: @"title"] contentsText]; 32 | feedItem.description = [[element selectElement: @"description"] contentsText]; 33 | feedItem.enclosure = [[element selectElement: @"title"] contentsText]; 34 | return feedItem; // optional, sets this element's domainObject property 35 | } 36 | 37 | Finally, all these html and xml documents often reside on the web. Wouldn't it be nice if we could use the pattern above to process the documents incrementally as soon as they appear? How about: 38 | 39 | URLParser* parser = [[URLParser alloc] initWithCallbackDelegate: self]; 40 | [parser performSelector:@selector(processChannelElement:) forElementsMatching: @"channel"]; 41 | [parser performSelector:@selector(processFeedElement:) forElementsMatching: @"feed"]; 42 | [parser parseURL: myURL]; 43 | 44 | There is alot more under the covers but this may be all you need. Hopefully its just enough! We'd love your feedback at feedback@touchtankapps.com. 45 | 46 | Terms of Use 47 | The ElementParser framework (and its source code) is free of charge for non commercial uses. For other commercial uses, a fee of $100 is required per product. (That's about 2 hours of your time, right?) Support plans are also available. Please contact sales@touchtankapps.com. 48 | 49 | -------------------------------------------------------------------------------- /Test/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Objective3/ElementParser/3fa3d82f8ff05a25d40b61592e1cf18115daf306/Test/.DS_Store -------------------------------------------------------------------------------- /Test/CSSSelectorTest.h: -------------------------------------------------------------------------------- 1 | // 2 | // CSSSelectorTest.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "CSSSelector.h" 27 | 28 | @interface CSSSelector (Test) 29 | 30 | +(void)testCSSSelector; 31 | +(void)testAll; 32 | 33 | @end 34 | -------------------------------------------------------------------------------- /Test/CSSSelectorTest.m: -------------------------------------------------------------------------------- 1 | // 2 | // CSSSelectorTest.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "CSSSelectorTest.h" 26 | 27 | 28 | @implementation CSSSelector (Test) 29 | 30 | +(void)assertWithCSSSelectorString:(NSString*)source expected:(NSString*)expected{ 31 | CSSSelector* selector = [[CSSSelector alloc] initWithString: source]; 32 | NSString* result = [selector description]; 33 | assert([result isEqualToString: expected]); 34 | [selector release]; 35 | } 36 | 37 | +(void)assertWithCSSSelectorString:(NSString*)source{ 38 | [self assertWithCSSSelectorString: source expected: source]; 39 | } 40 | 41 | +(void)testCSSSelector{ 42 | //should handle tag 43 | [self assertWithCSSSelectorString: @"foo"]; 44 | 45 | //should handle class 46 | [self assertWithCSSSelectorString: @".class"]; 47 | 48 | //should handle multiple classes 49 | [self assertWithCSSSelectorString: @".class.another_class"]; 50 | 51 | //should handle id 52 | [self assertWithCSSSelectorString: @"#identifier"]; 53 | 54 | //should handle id and tag 55 | [self assertWithCSSSelectorString: @"foo#identifier"]; 56 | 57 | //should handle class and tag 58 | [self assertWithCSSSelectorString: @"foo.bar"]; 59 | 60 | //should handle attr 61 | [self assertWithCSSSelectorString: @"foo[bar]"]; 62 | 63 | //should handle attr & value w/o quotes 64 | [self assertWithCSSSelectorString: @"foo[bar=23]" expected: @"foo[bar='23']"]; 65 | 66 | //should handle attr & value w/ single quotes 67 | [self assertWithCSSSelectorString: @"foo[bar='23']"]; 68 | 69 | //should handle whitespace in brackets & value w/ single quotes 70 | [self assertWithCSSSelectorString: @"foo[ bar = '23' ]" expected: @"foo[bar='23']"]; 71 | 72 | //should handle attr & value w/ double quotes 73 | [self assertWithCSSSelectorString: @"foo[bar=\"23\"]" expected: @"foo[bar='23']"]; 74 | 75 | //should handle descendant chains 76 | [self assertWithCSSSelectorString: @"foo bar"]; 77 | 78 | //should handle successor chains 79 | [self assertWithCSSSelectorString: @"foo + bar"]; 80 | 81 | //should handle child chains 82 | [self assertWithCSSSelectorString: @"foo > bar"]; 83 | 84 | //should handle big and ugly 85 | [self assertWithCSSSelectorString: @"foo#ids > bar.huh + img[title]"]; 86 | 87 | } 88 | 89 | +(void)testAll{ 90 | [self testCSSSelector]; 91 | } 92 | @end 93 | -------------------------------------------------------------------------------- /Test/ElementParserTest.h: -------------------------------------------------------------------------------- 1 | // 2 | // ElementParserTest.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "ElementParser.h" 27 | 28 | @interface ElementParser (Test) 29 | 30 | +(void)testElementParser; 31 | +(void)testNestedMatches; 32 | +(void)testAll; 33 | 34 | 35 | @end 36 | -------------------------------------------------------------------------------- /Test/ElementParserTest.m: -------------------------------------------------------------------------------- 1 | // 2 | // ElementParserTest.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "ElementParserTest.h" 26 | #import "NSString_HTML.h" 27 | 28 | @implementation ElementParser (Test) 29 | 30 | +(void)testElementParser{ 31 | ElementParser* builder = [[ElementParser alloc] init]; 32 | NSString* source = @"some test

more text

"; 33 | NSString* expect = @"

"; 34 | Element* root = [builder parseHTML: source]; 35 | NSString* result = [builder description]; 36 | assert([result isEqualToString: expect]); 37 | 38 | Element* body = [root selectElement: @"body"]; 39 | assert([[body description] isEqualToString: @""]); 40 | assert([[body contentsSource] isEqualToString: @"some test

more text

"]); 41 | } 42 | 43 | +(void)testShouldBeEmpty{ 44 | ElementParser* builder = [[ElementParser alloc] init]; 45 | assert([builder shouldBeEmptyElement: [@"
" element]]); 46 | assert([builder shouldBeEmptyElement: [@"" element]]); 47 | assert(![builder shouldBeEmptyElement: [@"

" element]]); 48 | assert(![builder shouldBeEmptyElement: [@"

" element]]); 49 | } 50 | 51 | +(void)testFeedPerf{ 52 | NSString* file = [[NSBundle mainBundle] pathForResource: @"gizmodo" ofType: @"xml"]; 53 | NSString* source = [NSString stringWithContentsOfFile: file]; 54 | assert(source); 55 | 56 | int runs = 10; 57 | 58 | NSTimeInterval start; 59 | start = [NSDate timeIntervalSinceReferenceDate]; 60 | for (int i = 0; i < runs; i++){ 61 | Element* root = [Element parseXML: source]; 62 | NSArray* items = [root selectElements: @"item"]; 63 | for (Element* item in items){ 64 | [[item selectElement: @"title"] contentsText]; 65 | NSString* description = [[item selectElement: @"description"] contentsText]; 66 | 67 | Element* descriptionDocument = [Element parseHTML: description]; 68 | [descriptionDocument contentsText]; 69 | [[descriptionDocument selectElement: @"img"] attribute: @"src"]; 70 | } 71 | } 72 | 73 | NSLog(@"%i runs processing feed: %f", runs, [NSDate timeIntervalSinceReferenceDate] - start); 74 | } 75 | 76 | +(void)testNestedMatches{ 77 | NSString* source = @"
"; 78 | ElementParser* parser = [[[ElementParser alloc] init] autorelease]; 79 | DocumentRoot* root = [parser parseHTML: source]; 80 | NSArray* result = [root selectElements: @"div.x a"]; 81 | assert([result count] == 1); 82 | } 83 | 84 | +(void)testFeed{ 85 | NSString* file = [[NSBundle mainBundle] pathForResource: @"gizmodo" ofType: @"xml"]; 86 | NSString* source = [NSString stringWithContentsOfFile: file]; 87 | assert(source); 88 | 89 | Element* root = [Element parseXML: source]; 90 | NSArray* items = [root selectElements: @"item"]; 91 | for (Element* item in items){ 92 | NSString* title = [[item selectElement: @"title"] contentsText]; 93 | NSString* description = [[item selectElement: @"description"] contentsText]; 94 | 95 | Element* descriptionDocument = [Element parseHTML: description]; 96 | NSString* strippedDescr = [descriptionDocument contentsText]; 97 | NSString* descrImg = [[descriptionDocument selectElement: @"img"] attribute: @"src"]; 98 | 99 | NSLog(@"\n\n%@\n%i chars in descr beginning with: %@\nStripped:%@\nImage: %@", title, [description length], [description substringToIndex: MIN([description length], 32)], [strippedDescr substringToIndex: MIN([strippedDescr length], 32)], descrImg); 100 | } 101 | 102 | assert([items count] == 40); 103 | } 104 | 105 | +(void)testAll{ 106 | [self testFeedPerf]; 107 | [self testFeed]; 108 | [self testElementParser]; 109 | [self testShouldBeEmpty]; 110 | } 111 | 112 | 113 | @end 114 | -------------------------------------------------------------------------------- /Test/Element_Test.h: -------------------------------------------------------------------------------- 1 | // 2 | // Element_Test.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | #import "Element.h" 27 | 28 | @interface Element (Test) 29 | 30 | +(void)testElement; 31 | +(void)testAttribute; 32 | +(void)testContentsOfChildren; 33 | +(void)testSelectElements; 34 | +(void)testElementWithCSSSelector; 35 | +(void)testElementContentsWithCSSSelector; 36 | +(void)testAll; 37 | 38 | @end 39 | -------------------------------------------------------------------------------- /Test/Element_Test.m: -------------------------------------------------------------------------------- 1 | // 2 | // Element_Test.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/21/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "Element_Test.h" 26 | #import "NSString_HTML.h" 27 | #import "DocumentRoot.h" 28 | 29 | @implementation Element (Test) 30 | 31 | 32 | +(void)testElement{ 33 | NSString* result; 34 | Element* element; 35 | 36 | // should handle simple tagname 37 | result = [[@"" element] tagName]; 38 | assert([result isEqualToString: @"foo"]); 39 | 40 | // should handle element with attributes 41 | element = [@"" element]; 42 | result = [element tagName]; 43 | assert([result isEqualToString: @"foo"]); 44 | assert([[element attribute: @"att"] isEqualToString: @"23"]); 45 | 46 | //should handle attributes 47 | element = [@"" element]; 48 | assert([[element attributes] count] == 3); 49 | assert([[element attribute: @"att1"] isEqualToString: @"23"]); 50 | assert([[element attribute: @"att2"] isEqualToString: @"red"]); 51 | assert([[element attribute: @"att3"] isEqualToString: @"what"]); 52 | } 53 | 54 | +(void)testAttribute{ 55 | NSString* result; 56 | 57 | // should handle missing attr 58 | result = [[@"" element] attribute: @"bar"]; 59 | assert(result == nil); 60 | 61 | // should handle attr 62 | result = [[@"" element] attribute: @"bar"]; 63 | assert([result isEqualToString: @"23"]); 64 | 65 | // should handle attr amoung others 66 | result = [[@"" element] attribute: @"bar"]; 67 | assert([result isEqualToString: @"23"]); 68 | 69 | // should handle attr with quotes 70 | result = [[@"" element] attribute: @"bar"]; 71 | assert([result isEqualToString: @"huh"]); 72 | 73 | // should handle attr without values 74 | result = [[@"" element] attribute: @"bar"]; 75 | assert([result isEqualToString: @"goo"]); 76 | } 77 | 78 | +(void)testContentsOfChildren{ 79 | Element* document = [Element parseXML: @"lee919-971-1377"]; 80 | Element* item = [document selectElement:@"item"]; 81 | NSDictionary* kids = [item contentsOfChildren]; 82 | assert([[kids objectForKey: @"name"] isEqualToString: @"lee"]); 83 | assert([[kids objectForKey: @"phone"] isEqualToString: @"919-971-1377"]); 84 | } 85 | 86 | +(void)testSelectElements{ 87 | Element* root = [Element parseXML: @"goofoo"]; 88 | NSArray* found = [root selectElements: @"item"]; 89 | assert([[found objectAtIndex: 0] isEqualToString: @""]); 90 | assert([[found objectAtIndex: 1] isEqualToString: @""]); 91 | } 92 | 93 | +(void)testSelectElement:(NSString*)source selector:(NSString*)sel expect:(NSString*)expect{ 94 | Element* root = [Element parseHTML: source]; 95 | // NSLog([root dumpTree]); 96 | Element* found = [root selectElement: sel]; 97 | assert([[found description] isEqualToString: expect]); 98 | } 99 | 100 | +(void)testElementWithCSSSelector{ 101 | 102 | //should handle single single part 103 | [self testSelectElement: @"" 104 | selector: @"img" 105 | expect: @""]; 106 | 107 | //should handle multiple parts 108 | [self testSelectElement: @"" 109 | selector: @"body img" 110 | expect: @""]; 111 | 112 | //should handle multiple parts with more complicated parts 113 | [self testSelectElement: @"" 114 | selector: @"body img[src='foo']" 115 | expect: @""]; 116 | 117 | //should handle a class 118 | [self testSelectElement: @"" 119 | selector: @"body a.one" 120 | expect: @""]; 121 | 122 | //should handle a class amoung more than one 123 | [self testSelectElement: @"" 124 | selector: @"body a.two" 125 | expect: @""]; 126 | 127 | //should handle multiple classes amoung 128 | [self testSelectElement: @"" 129 | selector: @"body a.one.two" 130 | expect: @""]; 131 | 132 | //should handle multiple an id 133 | [self testSelectElement: @"" 134 | selector: @"#bob img" 135 | expect: @""]; 136 | 137 | //should handle multiple parts with misses 138 | [self testSelectElement: @"" 139 | selector: @"body img[src='goo']" 140 | expect: @""]; 141 | 142 | //should handle successor verb 143 | [self testSelectElement: @"heretherethree" 144 | selector: @"body a + img" 145 | expect: @""]; 146 | 147 | //should handle child verb 148 | [self testSelectElement: @"" 149 | selector: @"body a > img" 150 | expect: @""]; 151 | 152 | } 153 | 154 | +(void)testSelectElemenContents:(NSString*)source selector:(NSString*)sel expect:(NSString*)expect{ 155 | Element* root = [Element parseHTML: source]; 156 | Element* found = [root selectElement: sel]; 157 | assert([[found contentsSource] isEqualToString: expect]); 158 | } 159 | 160 | +(void)testElementContentsWithCSSSelector{ 161 | //should handle child verb 162 | [self testSelectElemenContents: @"not thissome real text" 163 | selector: @"body a[href='2']" 164 | expect: @"some real text"]; 165 | } 166 | +(void)testAll{ 167 | [self testElement]; 168 | [self testAttribute]; 169 | [self testContentsOfChildren]; 170 | [self testSelectElements]; 171 | [self testElementWithCSSSelector]; 172 | [self testElementContentsWithCSSSelector]; 173 | 174 | } 175 | @end 176 | -------------------------------------------------------------------------------- /Test/NSString_HTML_Test.h: -------------------------------------------------------------------------------- 1 | // 2 | // NSString_HTML_Test.h 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/16/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import 26 | 27 | 28 | @interface NSString (HTML_Test) 29 | +(void)testStripTags; 30 | +(void)testParseHTML; 31 | +(void)testStringByReplacingEntities; 32 | 33 | +(void)testAllHTMLTest; 34 | 35 | @end 36 | -------------------------------------------------------------------------------- /Test/NSString_HTML_Test.m: -------------------------------------------------------------------------------- 1 | // 2 | // NSString_HTML_Test.m 3 | // Thumbprint 4 | // 5 | // Created by Lee Buck on 4/16/09. 6 | // Copyright 2009 Blue Bright Ventures. All rights reserved. 7 | // 8 | // This program is free software: you can redistribute it and/or modify 9 | // it under the terms of the GNU General Public License as published by 10 | // the Free Software Foundation, either version 3 of the License, or 11 | // (at your option) any later version. 12 | 13 | // This program is distributed in the hope that it will be useful, 14 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | // GNU General Public License for more details. 17 | 18 | // Commercial licences without many of the obligations of GPL 19 | // are available for a nomial fee at sales@touchtankapps.com. 20 | 21 | // You should have received a copy of the GNU General Public License 22 | // along with this program. If not, see . 23 | // 24 | 25 | #import "NSString_HTML_Test.h" 26 | #import "NSString_HTML.h" 27 | #import "NSString_Additions.h" 28 | #import "Element.h" 29 | #import "Chunk.h" 30 | #import "ElementParser.h" 31 | 32 | 33 | @implementation NSString (HTML_Test) 34 | 35 | +(void)testStripTags{ 36 | NSString* result; 37 | // should strip tags 38 | result = [@"this is some text" stripTags]; 39 | assert([result isEqualToString: @"this is some text"]); 40 | 41 | //should handle text outside of tags 42 | result = [@"outside some stuff this is some text" stripTags]; 43 | assert([result isEqualToString: @"outside some stuff this is some text"]); 44 | 45 | //should handle entities 46 | result = [@"this is some text & and more" stripTags]; 47 | assert([result isEqualToString: @"this is some text & and more"]); 48 | 49 | //should handle bad entities 50 | result = [@"this is some text & and more" stripTags]; 51 | assert([result isEqualToString: @"this is some text & and more"]); 52 | 53 | // should replace p tags with returns (and BRs) 54 | result = [@"this is

some text

and more
" stripTags]; 55 | assert([result isEqualToString: @"this is \n\nsome text and more"]); 56 | 57 | //should handle not including script tags 58 | result = [@"this is

some text and more" stripTags]; 59 | assert([result isEqualToString: @"this is \n\nsome text and more"]); 60 | 61 | //should handle illformed html... (and BRs) 62 | result = [@"this is

some text
and more" stripTags]; 63 | assert([result isEqualToString: @"this is \n\nsome text \n\nand more"]); 64 | } 65 | 66 | 67 | 68 | +(id)chunk:(Chunk*)chunk context:(NSMutableString*)result{ 69 | [result appendString: [chunk interiorString]]; 70 | [result appendString: @"|"]; 71 | return self;//continue 72 | } 73 | 74 | +(void)testParseHTML{ 75 | NSMutableString* result = [@"|" mutableCopy]; 76 | NSString* source = @"some text "goes here"; 77 | [NSString parseHTML: source delegate: self selector: @selector(chunk:context:) context: result]; 78 | assert([result isEqualToString: @"|foo|some |ignoreme|text |quot|goes here|/foo|b class='huh'|c /|/b|"]); 79 | } 80 | 81 | +(void)testStringByReplacingEntities{ 82 | NSString* result; 83 | 84 | //should handle no entities 85 | result = [@"foo" stringByReplacingEntities]; 86 | assert([result isEqualToString: @"foo"]); 87 | 88 | //should handle entity at start 89 | result = [@">foo" stringByReplacingEntities]; 90 | assert([result isEqualToString: @">foo"]); 91 | 92 | //should handle entity at end 93 | result = [@"foo<" stringByReplacingEntities]; 94 | assert([result isEqualToString: @"foo<"]); 95 | 96 | //should handle unknown entity 97 | result = [@"foo&dddlt;" stringByReplacingEntities]; 98 | assert([result isEqualToString: @"foo&dddlt;"]); 99 | 100 | //should handle badly formed entity 101 | result = [@"foo&dddlt" stringByReplacingEntities]; 102 | assert([result isEqualToString: @"foo&dddlt"]); 103 | } 104 | 105 | 106 | 107 | +(void)testStartsWithStr{ 108 | NSString* string = @"foo 23 oa"; 109 | CFStringInlineBuffer buffer; 110 | 111 | CFRange range = CFRangeMake(0, [string length]); 112 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 113 | 114 | //should match or not 115 | assert(startsWithStr(&buffer, 1, "oo")); 116 | assert(!startsWithStr(&buffer, 0, "oo")); 117 | assert(!startsWithStr(&buffer, 5, "oo")); 118 | } 119 | 120 | +(void)testLenEntityName{ 121 | NSString* string = @"#foo;&#ng"; 122 | CFStringInlineBuffer buffer; 123 | 124 | CFRange range = CFRangeMake(0, [string length]); 125 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 126 | 127 | //should match or not 128 | assert(lenEntityName(&buffer, 0)==5); 129 | assert(lenEntityName(&buffer, 5)==0); 130 | } 131 | 132 | +(void)testLenThruOr{ 133 | NSString* string = @"foo 23 oa"; 134 | CFStringInlineBuffer buffer; 135 | 136 | CFRange range = CFRangeMake(0, [string length]); 137 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 138 | 139 | //lenThruOr 140 | assert(lenThruOr(&buffer, 0, 'o', '2')==1); 141 | assert(lenThruOr(&buffer, 0, '2', '1')==4); 142 | assert(lenThruOr(&buffer, 0, 'w', '2')==4); 143 | assert(lenThruOr(&buffer, 0, 'x', 'z')==9); 144 | assert(lenThruOr(&buffer, 3, 'o', '2')==1); 145 | } 146 | 147 | +(void)testLenThru{ 148 | NSString* string = @"foo 23 oa"; 149 | CFStringInlineBuffer buffer; 150 | 151 | CFRange range = CFRangeMake(0, [string length]); 152 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 153 | 154 | assert(lenThru(&buffer, 2, "23")==4); 155 | assert(lenThru(&buffer, 0, "23")==6); 156 | assert(lenThru(&buffer, 0, "oa")==9); 157 | assert(lenThru(&buffer, 0, "XXX")==0); 158 | 159 | } 160 | 161 | +(void)testSkipNonWhitespace{ 162 | NSString* string = @"foo 23 oa"; 163 | CFStringInlineBuffer buffer; 164 | 165 | CFRange range = CFRangeMake(0, [string length]); 166 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 167 | 168 | CFIndex index = 0; 169 | // skips non white 170 | assert(skipNonWhitespace(&buffer, &index)==' '); 171 | assert(index == 3); 172 | 173 | //don't skip if already nonwhite 174 | assert(skipNonWhitespace(&buffer, &index)==' '); 175 | assert(index == 3); 176 | 177 | //return 0 if hit end of string before white char 178 | index = 7; 179 | assert(skipNonWhitespace(&buffer, &index)==0); 180 | assert(index == 9); 181 | } 182 | 183 | +(void)testSkipWhitespace{ 184 | NSString* string = @"foo 23 oa"; 185 | CFStringInlineBuffer buffer; 186 | 187 | CFRange range = CFRangeMake(0, [string length]); 188 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 189 | 190 | CFIndex index; 191 | 192 | //skip a white 193 | index = 3; 194 | assert(skipWhitespace(&buffer, &index)=='2'); 195 | assert(index == 4); 196 | 197 | // don't skip if already non white 198 | assert(skipWhitespace(&buffer, &index)=='2'); 199 | assert(index == 4); 200 | 201 | } 202 | 203 | +(void)testLenToken{ 204 | NSString* string = @"foo 23 oa"; 205 | CFStringInlineBuffer buffer; 206 | 207 | CFRange range = CFRangeMake(0, [string length]); 208 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 209 | 210 | assert(lenToken(&buffer, 0)==3); 211 | // should return 0 if not in a token 212 | assert(lenToken(&buffer, 3)==0); 213 | 214 | } 215 | +(void)testParseEntity{ 216 | NSString* string = @"foo 23 oa"; 217 | CFStringInlineBuffer buffer; 218 | 219 | CFRange range = CFRangeMake(0, [string length]); 220 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 221 | 222 | CFIndex len; 223 | 224 | string = @"&"; 225 | range = CFRangeMake(0, [string length]); 226 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 227 | assert(parseEntity(&buffer, 0, &len)=='&'); 228 | 229 | string = @"'"; 230 | range = CFRangeMake(0, [string length]); 231 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 232 | assert(parseEntity(&buffer, 0, &len)=='\''); 233 | 234 | string = @" "; 235 | range = CFRangeMake(0, [string length]); 236 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 237 | char c = parseEntity(&buffer, 0, &len); 238 | assert(c==' '); 239 | 240 | string = @" "; 241 | range = CFRangeMake(0, [string length]); 242 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 243 | assert(parseEntity(&buffer, 0, &len)==' '); 244 | 245 | string = @"&foo;"; 246 | range = CFRangeMake(0, [string length]); 247 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 248 | assert(parseEntity(&buffer, 0, &len)==0); 249 | 250 | string = @"&mdas"; 251 | range = CFRangeMake(0, [string length]); 252 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 253 | assert(parseEntity(&buffer, 0, &len)==0); 254 | 255 | } 256 | 257 | +(void)testLenDoctype{ 258 | NSString* string = @""; 259 | CFStringInlineBuffer buffer; 260 | CFRange range = CFRangeMake(0, [string length]); 261 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 262 | assert(lenDoctype(&buffer, 0)==[string length]); 263 | 264 | string = @""; 265 | range = CFRangeMake(0, [string length]); 266 | CFStringInitInlineBuffer((CFStringRef)string, &buffer, range); 267 | assert(lenDoctype(&buffer, 0)==[string length]); 268 | 269 | } 270 | 271 | +(void)testHexValue{ 272 | // should handle vanilla input 273 | assert([@"12" hexValue] == 18); 274 | // should handle upper case digits 275 | assert([@"1A" hexValue] == 26); 276 | // should handle lower case digits 277 | assert([@"1c" hexValue] == 28); 278 | // should handle bad characters by stopping 279 | assert([@"10g34" hexValue] == 16); 280 | // should handle empty string by returning 0 281 | assert([@"" hexValue] == 0); 282 | // should handle bad strings by returning 0 283 | assert([@"wywt" hexValue] == 0); 284 | } 285 | 286 | 287 | +(void)testAllHTMLTest{ 288 | NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; 289 | [NSString testStartsWithStr]; 290 | [NSString testLenEntityName]; 291 | [NSString testLenThruOr]; 292 | [NSString testLenThru]; 293 | [NSString testSkipNonWhitespace]; 294 | [NSString testSkipWhitespace]; 295 | [NSString testLenToken]; 296 | [NSString testParseEntity]; 297 | [NSString testLenDoctype]; 298 | [NSString testStripTags]; 299 | [NSString testParseHTML]; 300 | [NSString testStringByReplacingEntities]; 301 | [NSString testHexValue]; 302 | [pool release]; 303 | } 304 | @end 305 | 306 | --------------------------------------------------------------------------------