├── README.md ├── TextBookParse.xcodeproj ├── project.pbxproj ├── project.xcworkspace │ ├── contents.xcworkspacedata │ └── xcshareddata │ │ └── TextBookParse.xccheckout ├── xcshareddata │ └── xcbaselines │ │ └── 664422B91990ABC900C26AAE.xcbaseline │ │ ├── A80F59E5-AB6B-4B4F-B88A-F58DB535CFC4.plist │ │ └── Info.plist └── xcuserdata │ └── xxsy-ima001.xcuserdatad │ ├── xcdebugger │ └── Breakpoints_v2.xcbkptlist │ └── xcschemes │ ├── TextBookParse.xcscheme │ └── xcschememanagement.plist ├── TextBookParse ├── AppDelegate.h ├── AppDelegate.m ├── Base.lproj │ └── Main.storyboard ├── DRBookParse.h ├── DRBookParse.m ├── DRParseChapterContent.h ├── DRParseChapterKit.h ├── Images.xcassets │ ├── AppIcon.appiconset │ │ └── Contents.json │ └── LaunchImage.launchimage │ │ └── Contents.json ├── Section0002_0001_0012_0001.xhtml ├── TextBookParse-Info.plist ├── TextBookParse-Prefix.pch ├── ViewController.h ├── ViewController.m ├── bookParse │ ├── DDFileReader.h │ ├── DDFileReader.m │ ├── DRParseChapter+LocalFile.h │ ├── DRParseChapter+LocalFile.m │ ├── DRParseChapter.h │ ├── DRParseChapter.m │ ├── DRTextBookParse.h │ ├── DRTextBookParse.m │ ├── NSData+encoding.h │ ├── NSData+encoding.m │ ├── NSString+replaceNumbers.h │ ├── NSString+replaceNumbers.m │ └── NSString+replaceNumbersTest.m ├── bookTest.txt ├── en.lproj │ └── InfoPlist.strings ├── epubParse │ ├── DREpubBookParse.h │ ├── DREpubBookParse.m │ ├── DREpubBookParseTest.m │ ├── KFEpubConstants.h │ ├── KFEpubConstants.m │ ├── KFEpubParser.h │ ├── KFEpubParser.m │ ├── KFEpubParserTest.m │ ├── KissXML │ │ ├── Additions │ │ │ ├── DDXMLElementAdditions.h │ │ │ └── DDXMLElementAdditions.m │ │ ├── Categories │ │ │ ├── NSString+DDXML.h │ │ │ └── NSString+DDXML.m │ │ ├── DDXML.h │ │ ├── DDXMLDocument.h │ │ ├── DDXMLDocument.m │ │ ├── DDXMLElement.h │ │ ├── DDXMLElement.m │ │ ├── DDXMLNode.h │ │ ├── DDXMLNode.m │ │ └── Private │ │ │ └── DDXMLPrivate.h │ ├── SSZipArchive │ │ ├── SSZipArchive.h │ │ ├── SSZipArchive.m │ │ └── minizip │ │ │ ├── crypt.h │ │ │ ├── ioapi.c │ │ │ ├── ioapi.h │ │ │ ├── mztools.c │ │ │ ├── mztools.h │ │ │ ├── unzip.c │ │ │ ├── unzip.h │ │ │ ├── zip.c │ │ │ └── zip.h │ └── htmlParse │ │ ├── HTMLComment.h │ │ ├── HTMLComment.m │ │ ├── HTMLDocument.h │ │ ├── HTMLDocument.m │ │ ├── HTMLDocumentType.h │ │ ├── HTMLDocumentType.m │ │ ├── HTMLElement.h │ │ ├── HTMLElement.m │ │ ├── HTMLEntities.h │ │ ├── HTMLEntities.m │ │ ├── HTMLNamespace.h │ │ ├── HTMLNode.h │ │ ├── HTMLNode.m │ │ ├── HTMLOrderedDictionary.h │ │ ├── HTMLOrderedDictionary.m │ │ ├── HTMLParser.h │ │ ├── HTMLParser.m │ │ ├── HTMLPreprocessedInputStream.h │ │ ├── HTMLPreprocessedInputStream.m │ │ ├── HTMLQuirksMode.h │ │ ├── HTMLReader.h │ │ ├── HTMLSelector.h │ │ ├── HTMLSelector.m │ │ ├── HTMLSerialization.h │ │ ├── HTMLSerialization.m │ │ ├── HTMLString.h │ │ ├── HTMLString.m │ │ ├── HTMLSupport.h │ │ ├── HTMLTextNode.h │ │ ├── HTMLTextNode.m │ │ ├── HTMLTokenizer.h │ │ ├── HTMLTokenizer.m │ │ ├── HTMLTokenizerState.h │ │ ├── HTMLTreeEnumerator.h │ │ ├── HTMLTreeEnumerator.m │ │ ├── NSString+HTMLEntities.h │ │ └── NSString+HTMLEntities.m └── main.m └── TextBookParseTests ├── TextBookParseTests-Info.plist ├── TextBookParseTests.m └── en.lproj └── InfoPlist.strings /README.md: -------------------------------------------------------------------------------- 1 | TextBookParse 2 | ============= 3 | 4 | 支持txt文档和epub文档解析 5 | -------------------------------------------------------------------------------- /TextBookParse.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /TextBookParse.xcodeproj/project.xcworkspace/xcshareddata/TextBookParse.xccheckout: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | IDESourceControlProjectFavoriteDictionaryKey 6 | 7 | IDESourceControlProjectIdentifier 8 | C7D61D20-B05E-4BC6-B0C5-9498F83113E9 9 | IDESourceControlProjectName 10 | TextBookParse 11 | IDESourceControlProjectOriginsDictionary 12 | 13 | 2A3442A498E23A9BD95BF5CEEE89FD7AEC77CA48 14 | github.com:david122886/TextBookParse.git 15 | 16 | IDESourceControlProjectPath 17 | TextBookParse.xcodeproj 18 | IDESourceControlProjectRelativeInstallPathDictionary 19 | 20 | 2A3442A498E23A9BD95BF5CEEE89FD7AEC77CA48 21 | ../.. 22 | 23 | IDESourceControlProjectURL 24 | github.com:david122886/TextBookParse.git 25 | IDESourceControlProjectVersion 26 | 111 27 | IDESourceControlProjectWCCIdentifier 28 | 2A3442A498E23A9BD95BF5CEEE89FD7AEC77CA48 29 | IDESourceControlProjectWCConfigurations 30 | 31 | 32 | IDESourceControlRepositoryExtensionIdentifierKey 33 | public.vcs.git 34 | IDESourceControlWCCIdentifierKey 35 | 2A3442A498E23A9BD95BF5CEEE89FD7AEC77CA48 36 | IDESourceControlWCCName 37 | TextBookParse 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /TextBookParse.xcodeproj/xcshareddata/xcbaselines/664422B91990ABC900C26AAE.xcbaseline/A80F59E5-AB6B-4B4F-B88A-F58DB535CFC4.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | classNames 6 | 7 | DREpubBookParseTest 8 | 9 | testPerformanceExample 10 | 11 | com.apple.XCTPerformanceMetric_WallClockTime 12 | 13 | baselineAverage 14 | 2 15 | baselineIntegrationDisplayName 16 | Local Baseline 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /TextBookParse.xcodeproj/xcshareddata/xcbaselines/664422B91990ABC900C26AAE.xcbaseline/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | runDestinationsByUUID 6 | 7 | A80F59E5-AB6B-4B4F-B88A-F58DB535CFC4 8 | 9 | localComputer 10 | 11 | busSpeedInMHz 12 | 100 13 | cpuCount 14 | 1 15 | cpuKind 16 | Intel Core i5 17 | cpuSpeedInMHz 18 | 2500 19 | logicalCPUCoresPerPackage 20 | 4 21 | modelCode 22 | Macmini6,1 23 | physicalCPUCoresPerPackage 24 | 2 25 | platformIdentifier 26 | com.apple.platform.macosx 27 | 28 | targetArchitecture 29 | i386 30 | targetDevice 31 | 32 | modelCode 33 | iPhone5,1 34 | platformIdentifier 35 | com.apple.platform.iphonesimulator 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /TextBookParse.xcodeproj/xcuserdata/xxsy-ima001.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 8 | 20 | 21 | 22 | 24 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /TextBookParse.xcodeproj/xcuserdata/xxsy-ima001.xcuserdatad/xcschemes/TextBookParse.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 30 | 31 | 33 | 39 | 40 | 41 | 42 | 43 | 49 | 50 | 51 | 52 | 61 | 62 | 68 | 69 | 70 | 71 | 72 | 73 | 79 | 80 | 86 | 87 | 88 | 89 | 91 | 92 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /TextBookParse.xcodeproj/xcuserdata/xxsy-ima001.xcuserdatad/xcschemes/xcschememanagement.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SchemeUserState 6 | 7 | TextBookParse.xcscheme 8 | 9 | orderHint 10 | 0 11 | 12 | 13 | SuppressBuildableAutocreation 14 | 15 | 664422981990ABC900C26AAE 16 | 17 | primary 18 | 19 | 20 | 664422B91990ABC900C26AAE 21 | 22 | primary 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /TextBookParse/AppDelegate.h: -------------------------------------------------------------------------------- 1 | // 2 | // AppDelegate.h 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-8-5. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | @interface AppDelegate : UIResponder 12 | 13 | @property (strong, nonatomic) UIWindow *window; 14 | 15 | @end 16 | -------------------------------------------------------------------------------- /TextBookParse/AppDelegate.m: -------------------------------------------------------------------------------- 1 | // 2 | // AppDelegate.m 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-8-5. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import "AppDelegate.h" 10 | #import "DREpubBookParse.h" 11 | #import "HTMLReader.h" 12 | @implementation AppDelegate 13 | 14 | - (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions 15 | { 16 | return YES; 17 | } 18 | 19 | - (void)applicationWillResignActive:(UIApplication *)application 20 | { 21 | // Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state. 22 | // Use this method to pause ongoing tasks, disable timers, and throttle down OpenGL ES frame rates. Games should use this method to pause the game. 23 | } 24 | 25 | - (void)applicationDidEnterBackground:(UIApplication *)application 26 | { 27 | // Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later. 28 | // If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits. 29 | } 30 | 31 | - (void)applicationWillEnterForeground:(UIApplication *)application 32 | { 33 | // Called as part of the transition from the background to the inactive state; here you can undo many of the changes made on entering the background. 34 | } 35 | 36 | - (void)applicationDidBecomeActive:(UIApplication *)application 37 | { 38 | // Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface. 39 | } 40 | 41 | - (void)applicationWillTerminate:(UIApplication *)application 42 | { 43 | // Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:. 44 | } 45 | 46 | @end 47 | -------------------------------------------------------------------------------- /TextBookParse/Base.lproj/Main.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /TextBookParse/DRBookParse.h: -------------------------------------------------------------------------------- 1 | // 2 | // DRBookParse.h 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-10-31. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import 10 | #import "DRParseChapter.h" 11 | #import "DRParseChapterContent.h" 12 | @interface DRBookParse : NSObject 13 | ///判断这本书是否解析过,需要子类实现具体方法 14 | +(BOOL)hasBookParsedWithBookFilePath:(NSString*)filePath; 15 | 16 | ///解析后书籍对应的隐藏目录,需要子类实现具体方法 17 | +(NSString*)getBookParseDicPathWithBookFilePath:(NSString*)filePath; 18 | 19 | ///判断当前书籍是否正在解析,需要子类实现具体方法 20 | +(BOOL)hasBookParsedIsWorkingWithBookFilePath:(NSString *)filePath; 21 | 22 | ///获取书籍格式 23 | +(BookRootDicType)getParsedBookTypeWithBookFilePath:(NSString *)filePath; 24 | 25 | +(NSURL*)getAppDocumentPath; 26 | @end 27 | -------------------------------------------------------------------------------- /TextBookParse/DRBookParse.m: -------------------------------------------------------------------------------- 1 | // 2 | // DRBookParse.m 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-10-31. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import "DRBookParse.h" 10 | @implementation DRBookParse 11 | 12 | ///判断这本书是否解析过 13 | +(BOOL)hasBookParsedWithBookFilePath:(NSString*)filePath{ 14 | return NO; 15 | } 16 | 17 | ///解析后书籍对应的隐藏目录 18 | +(NSString*)getBookParseDicPathWithBookFilePath:(NSString*)filePath{ 19 | return nil; 20 | } 21 | 22 | ///判断当前书籍是否正在解析 23 | +(BOOL)hasBookParsedIsWorkingWithBookFilePath:(NSString *)filePath{ 24 | return NO; 25 | } 26 | 27 | 28 | ///获取书籍格式 29 | +(BookRootDicType)getParsedBookTypeWithBookFilePath:(NSString *)filePath{ 30 | if (!filePath) { 31 | return BookRootDicType_OTHER; 32 | } 33 | NSString *extension = [[filePath pathExtension] lowercaseString]; 34 | if ([extension isEqualToString:@"umd"]) { 35 | return BookRootDicType_UMD; 36 | } 37 | if ([extension isEqualToString:@"epub"]) { 38 | return BookRootDicType_EPUB; 39 | } 40 | if ([extension isEqualToString:@"text"] || [extension isEqualToString:@"txt"]) { 41 | return BookRootDicType_UMD; 42 | } 43 | return BookRootDicType_OTHER; 44 | } 45 | 46 | +(NSURL*)getAppDocumentPath{ 47 | NSArray *urls = [[NSFileManager defaultManager] URLsForDirectory:NSDocumentDirectory inDomains:NSUserDomainMask]; 48 | return [urls firstObject]; 49 | } 50 | @end 51 | -------------------------------------------------------------------------------- /TextBookParse/DRParseChapterContent.h: -------------------------------------------------------------------------------- 1 | // 2 | // DRParseChapterContent.h 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14/11/6. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | ///书籍对应目录 9 | #define kbookTypeTEXT @".david" 10 | #define kbookTypeEPUB @".davidEPUB" 11 | #define kbookTypeUMD @".davidUMD" 12 | 13 | #define kBookChapterArrayFileName @"chapter_david.plist" 14 | 15 | #define kbookPropertiesDic @".davidkbookPropertiesDic" 16 | #pragma mark - 17 | ///epub对应字段 18 | #define kBookName @"kepubBookName" 19 | #define kBookCoverPath @"kepubBookCoverPath" 20 | #define kBookAuthor @"kepubBookAuthor" 21 | 22 | #pragma mark - 23 | 24 | ///解析小说书籍格式 25 | typedef NS_ENUM(NSInteger,BookRootDicType) { 26 | BookRootDicType_TEXT = 210, 27 | BookRootDicType_EPUB, 28 | BookRootDicType_UMD, 29 | BookRootDicType_OTHER, 30 | }; -------------------------------------------------------------------------------- /TextBookParse/DRParseChapterKit.h: -------------------------------------------------------------------------------- 1 | // 2 | // DRParseChapterKit.h 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14/11/7. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import "DRParseChapterContent.h" 10 | #import "DRParseChapter.h" 11 | #import "DRParseChapter+LocalFile.h" 12 | #import "DRBookParse.h" 13 | #import "DREpubBookParse.h" 14 | #import "DRTextBookParse.h" 15 | 16 | -------------------------------------------------------------------------------- /TextBookParse/Images.xcassets/AppIcon.appiconset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "idiom" : "iphone", 5 | "size" : "29x29", 6 | "scale" : "2x" 7 | }, 8 | { 9 | "idiom" : "iphone", 10 | "size" : "40x40", 11 | "scale" : "2x" 12 | }, 13 | { 14 | "idiom" : "iphone", 15 | "size" : "60x60", 16 | "scale" : "2x" 17 | } 18 | ], 19 | "info" : { 20 | "version" : 1, 21 | "author" : "xcode" 22 | } 23 | } -------------------------------------------------------------------------------- /TextBookParse/Images.xcassets/LaunchImage.launchimage/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "orientation" : "portrait", 5 | "idiom" : "iphone", 6 | "extent" : "full-screen", 7 | "minimum-system-version" : "7.0", 8 | "scale" : "2x" 9 | }, 10 | { 11 | "orientation" : "portrait", 12 | "idiom" : "iphone", 13 | "subtype" : "retina4", 14 | "extent" : "full-screen", 15 | "minimum-system-version" : "7.0", 16 | "scale" : "2x" 17 | } 18 | ], 19 | "info" : { 20 | "version" : 1, 21 | "author" : "xcode" 22 | } 23 | } -------------------------------------------------------------------------------- /TextBookParse/TextBookParse-Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | en 7 | CFBundleDisplayName 8 | ${PRODUCT_NAME} 9 | CFBundleExecutable 10 | ${EXECUTABLE_NAME} 11 | CFBundleIdentifier 12 | xiaoxiangwenxue.${PRODUCT_NAME:rfc1034identifier} 13 | CFBundleInfoDictionaryVersion 14 | 6.0 15 | CFBundleName 16 | ${PRODUCT_NAME} 17 | CFBundlePackageType 18 | APPL 19 | CFBundleShortVersionString 20 | 1.0 21 | CFBundleSignature 22 | ???? 23 | CFBundleVersion 24 | 1.0 25 | LSRequiresIPhoneOS 26 | 27 | UIMainStoryboardFile 28 | Main 29 | UIRequiredDeviceCapabilities 30 | 31 | armv7 32 | 33 | UISupportedInterfaceOrientations 34 | 35 | UIInterfaceOrientationPortrait 36 | UIInterfaceOrientationLandscapeLeft 37 | UIInterfaceOrientationLandscapeRight 38 | 39 | UIFileSharingEnabled 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /TextBookParse/TextBookParse-Prefix.pch: -------------------------------------------------------------------------------- 1 | // 2 | // Prefix header 3 | // 4 | // The contents of this file are implicitly included at the beginning of every source file. 5 | // 6 | 7 | #import 8 | 9 | #ifndef __IPHONE_5_0 10 | #warning "This project uses features only available in iOS SDK 5.0 and later." 11 | #endif 12 | 13 | #ifdef __OBJC__ 14 | #import 15 | #import 16 | #endif 17 | -------------------------------------------------------------------------------- /TextBookParse/ViewController.h: -------------------------------------------------------------------------------- 1 | // 2 | // ViewController.h 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-8-5. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | @interface ViewController : UIViewController 12 | 13 | @end 14 | -------------------------------------------------------------------------------- /TextBookParse/ViewController.m: -------------------------------------------------------------------------------- 1 | // 2 | // ViewController.m 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-8-5. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import "ViewController.h" 10 | #import "bookParse/DRTextBookParse.h" 11 | #import "NSData+encoding.h" 12 | #import "NSString+replaceNumbers.h" 13 | @interface ViewController () 14 | 15 | @end 16 | 17 | @implementation ViewController 18 | 19 | - (void)viewDidLoad 20 | { 21 | [super viewDidLoad]; 22 | return; 23 | 24 | //// NSString *test = @"第1964章 要战便战 "; 25 | // NSString *test = @"这些强者放在一些中型城市里,那也都是最顶尖的存在,偏偏在这千城之战的战场中,他们却只能屈居于第二集团。"; 26 | // NSRange range = [test rangeOfString:@"^[\\S]{0,10}第\\s*[0-9零一二三四五六七八九十百千万]+\\s*[篇书首集卷回章部]{1}[\\S]{0,30}$" options:NSRegularExpressionSearch]; 27 | // return; 28 | // 创建文件管理器 29 | NSDate *beginDate = [NSDate date]; 30 | NSFileManager *fileManager = [NSFileManager defaultManager]; 31 | //指向文件目录 32 | NSString *documentsDirectory= [NSHomeDirectory() stringByAppendingPathComponent:@"Documents"]; 33 | NSArray *books = [fileManager contentsOfDirectoryAtPath:documentsDirectory error:nil]; 34 | NSString *bookFile = nil; 35 | for (NSString *bookName in books) { 36 | if ([[bookName lastPathComponent] hasSuffix:@"txt"]) { 37 | bookFile = [documentsDirectory stringByAppendingPathComponent:bookName]; 38 | } 39 | } 40 | 41 | if (!bookFile) { 42 | NSLog(@"没有找到书籍"); 43 | return; 44 | } 45 | // [DRTextBookParse parseBookWithBookFilePath:bookFile findTheChapterBlock:^(NSString *chapterFilePath) { 46 | // NSLog(@"%@",chapterFilePath); 47 | // } withComplete:^(BOOL success) { 48 | // 49 | // NSLog(@"读取书籍成功%f",[[NSDate date] timeIntervalSinceDate:beginDate]); 50 | // } withFailure:^(NSError *error) { 51 | // NSLog(@"%@",error.userInfo[@"msg"]); 52 | // }]; 53 | 54 | 55 | [DRTextBookParse parseBookWithBookFilePath:bookFile withStartChapterIndex:0 loadFirstChapter:^(DRParseChapter *findChapter) { 56 | 57 | } progressBlock:^(unsigned long long readLength, unsigned long long totalLength, DRParseChapter *findChapter) { 58 | 59 | } withComplete:^(NSArray *chaptersArray) { 60 | 61 | } withFailure:^(NSError *error) { 62 | NSLog(@"%@",error.userInfo[@"msg"]); 63 | }]; 64 | } 65 | 66 | - (void)didReceiveMemoryWarning 67 | { 68 | [super didReceiveMemoryWarning]; 69 | // Dispose of any resources that can be recreated. 70 | } 71 | 72 | @end 73 | -------------------------------------------------------------------------------- /TextBookParse/bookParse/DDFileReader.h: -------------------------------------------------------------------------------- 1 | // 2 | // DDFileReader.h 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-8-5. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import 10 | #define kChapterNameRegular @"^\\s*[\\S]{0,10}\\s*第\\s*[0-9零一二三四五六七八九十百千万]+\\s*[篇书首集卷回章节部]{1,2}\\s*[\\S]{0,20}\\s*$" 11 | //#define kChapterNameRegular @"[\\S]{0,10}第\\s*[0-9零一二三四五六七八九十百千万]+\\s*[篇书首集卷回章部]{1}[\\S]{0,30}" 12 | #define kChapterNameIndexRegular @"第\\s*[0-9零一二三四五六七八九十百千万]+\\s*[篇书首集卷回章部]{1}" 13 | @interface DDFileReader : NSObject { 14 | NSString * filePath; 15 | 16 | NSString * lineDelimiter; 17 | NSUInteger chunkSize; 18 | } 19 | 20 | @property (nonatomic, copy) NSString * lineDelimiter; 21 | @property (nonatomic) NSUInteger chunkSize; 22 | @property (nonatomic,assign) unsigned long long currentOffset; 23 | @property (nonatomic,assign) unsigned long long totalFileLength; 24 | @property (nonatomic,strong) NSFileHandle * fileHandle; 25 | @property (nonatomic,assign) NSStringEncoding stringEncoding; 26 | - (id) initWithFilePath:(NSString *)aPath; 27 | 28 | - (NSString *) readLine; 29 | - (NSString *) readTrimmedLine; 30 | ///判断截取的章节内容是否有意义 31 | -(BOOL)valuableDataFromIndex:(unsigned long long)fromIndex toIndex:(unsigned long long)toIndex; 32 | 33 | ///判断字符是否是相同章节名称 34 | -(BOOL)hasTheSameChapterName:(NSString*)oneChapterName withAnotherChapterName:(NSString*)anotherChapterName; 35 | ///判断该行是否是章节标题 36 | -(BOOL)hasChapterTitleLineData:(NSString*)lineString; 37 | #if NS_BLOCKS_AVAILABLE 38 | - (void) enumerateLinesUsingBlock:(void(^)(NSString*, BOOL *))block; 39 | #endif 40 | 41 | @end -------------------------------------------------------------------------------- /TextBookParse/bookParse/DDFileReader.m: -------------------------------------------------------------------------------- 1 | // 2 | // DDFileReader.m 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-8-5. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | // 10 | // DDFileReader.m 11 | // PBX2OPML 12 | // 13 | // Created by michael isbell on 11/6/11. 14 | // Copyright (c) 2011 BlueSwitch. All rights reserved. 15 | // 16 | 17 | //DDFileReader.m 18 | 19 | #import "DDFileReader.h" 20 | #import "NSData+encoding.h" 21 | #import "NSString+replaceNumbers.h" 22 | //////////////////////////////////////// 23 | 24 | @interface NSData (DDAdditions) 25 | 26 | - (NSRange) rangeOfData_dd:(NSData *)dataToFind; 27 | 28 | @end 29 | 30 | @implementation NSData (DDAdditions) 31 | 32 | - (NSRange) rangeOfData_dd:(NSData *)dataToFind { 33 | 34 | const void * bytes = [self bytes]; 35 | NSUInteger length = [self length]; 36 | 37 | const void * searchBytes = [dataToFind bytes]; 38 | NSUInteger searchLength = [dataToFind length]; 39 | NSUInteger searchIndex = 0; 40 | 41 | NSRange foundRange = {NSNotFound, searchLength}; 42 | for (NSUInteger index = 0; index < length; index++) { 43 | if (((char *)bytes)[index] == ((char *)searchBytes)[searchIndex]) { 44 | //the current character matches 45 | if (foundRange.location == NSNotFound) { 46 | foundRange.location = index; 47 | } 48 | searchIndex++; 49 | if (searchIndex >= searchLength) { return foundRange; } 50 | } else { 51 | searchIndex = 0; 52 | foundRange.location = NSNotFound; 53 | } 54 | } 55 | return foundRange; 56 | } 57 | 58 | @end 59 | 60 | @implementation DDFileReader 61 | @synthesize lineDelimiter, chunkSize; 62 | 63 | - (id) initWithFilePath:(NSString *)aPath { 64 | if (self = [super init]) { 65 | self.fileHandle = [NSFileHandle fileHandleForReadingAtPath:aPath]; 66 | if (self.fileHandle == nil) { 67 | return nil; 68 | } 69 | 70 | lineDelimiter = @"\n"; 71 | self.currentOffset = 0ULL; // ??? 72 | chunkSize = 10; 73 | [self.fileHandle seekToEndOfFile]; 74 | self.totalFileLength = [self.fileHandle offsetInFile]; 75 | self.stringEncoding = [self getcontentEncodingWithFilePath:aPath]; 76 | //we don't need to seek back, since readLine will do that. 77 | } 78 | return self; 79 | } 80 | 81 | 82 | -(NSStringEncoding)getcontentEncodingWithFilePath:(NSString*)path{ 83 | [self.fileHandle seekToFileOffset:0]; 84 | NSData *data = [self.fileHandle readDataOfLength:(NSInteger)(10ULL < self.totalFileLength?10ULL:(self.totalFileLength -1ULL))]; 85 | if (data) { 86 | return [data getCharEncodingWithFilePath:path]; 87 | } 88 | return NSUTF8StringEncoding; 89 | } 90 | 91 | - (void) dealloc { 92 | [self.fileHandle closeFile]; 93 | self.currentOffset = 0ULL; 94 | 95 | } 96 | 97 | 98 | - (NSString *) readLine { 99 | if (self.currentOffset >= self.totalFileLength) { return nil; } 100 | 101 | NSData * newLineData = [lineDelimiter dataUsingEncoding:self.stringEncoding]; 102 | [self.fileHandle seekToFileOffset:self.currentOffset]; 103 | NSMutableData * currentData = [[NSMutableData alloc] init]; 104 | BOOL shouldReadMore = YES; 105 | 106 | @autoreleasepool { 107 | 108 | while (shouldReadMore) { 109 | if (self.currentOffset >= self.totalFileLength) { break; } 110 | NSData * chunk = [self.fileHandle readDataOfLength:chunkSize]; 111 | NSRange newLineRange = [chunk rangeOfData_dd:newLineData]; 112 | if (newLineRange.location != NSNotFound) { 113 | 114 | //include the length so we can include the delimiter in the string 115 | chunk = [chunk subdataWithRange:NSMakeRange(0, newLineRange.location+[newLineData length])]; 116 | shouldReadMore = NO; 117 | } 118 | [currentData appendData:chunk]; 119 | self.currentOffset += [chunk length]; 120 | } 121 | } 122 | 123 | NSString * line = [[NSString alloc] initWithData:currentData encoding:self.stringEncoding]; 124 | return line; 125 | } 126 | 127 | - (NSString *) readTrimmedLine { 128 | return [[self readLine] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; 129 | } 130 | 131 | -(BOOL)valuableDataFromIndex:(unsigned long long)fromIndex toIndex:(unsigned long long)toIndex{ 132 | if (toIndex <= fromIndex || fromIndex >= self.totalFileLength) { 133 | return NO; 134 | } 135 | unsigned long long tmpIndex = self.currentOffset; 136 | [self.fileHandle seekToFileOffset:fromIndex]; 137 | NSData *data = [self.fileHandle readDataOfLength:toIndex-fromIndex]; 138 | [self.fileHandle seekToFileOffset:tmpIndex]; 139 | if (!data || data.length <= 0) { 140 | return NO; 141 | } 142 | NSString *tmpString = [[NSString alloc] initWithData:data encoding:self.stringEncoding]; 143 | if (!tmpString || [[tmpString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]] isEqualToString:@""]) { 144 | return NO; 145 | } 146 | if ([self hasChapterTitleLineData:tmpString]) { 147 | return NO; 148 | } 149 | return YES; 150 | } 151 | 152 | ///判断字符是否是相同章节名称 153 | -(BOOL)hasTheSameChapterName:(NSString*)oneChapterName withAnotherChapterName:(NSString*)anotherChapterName{ 154 | if (!oneChapterName || !oneChapterName) { 155 | return NO; 156 | } 157 | NSString *tmpOne = [[oneChapterName replaceNumberStringWhenAllDiscoverToChineseChar] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; 158 | NSString *tmpTwo = [[anotherChapterName replaceNumberStringWhenAllDiscoverToChineseChar] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; 159 | 160 | if ([tmpOne isEqualToString:tmpTwo]) { 161 | return YES; 162 | } 163 | NSRange oneRange = [tmpOne rangeOfString:kChapterNameIndexRegular options:NSRegularExpressionSearch]; 164 | NSRange anotherRange = [tmpTwo rangeOfString:kChapterNameIndexRegular options:NSRegularExpressionSearch]; 165 | if (oneRange.location != NSNotFound && anotherRange.location != NSNotFound) { 166 | if ([[tmpOne substringWithRange:oneRange] isEqualToString:[tmpTwo substringWithRange:anotherRange]]) { 167 | return YES; 168 | } 169 | } 170 | return NO; 171 | } 172 | 173 | 174 | ///判断该行是否是章节标题 175 | -(BOOL)hasChapterTitleLineData:(NSString*)lineString{ 176 | NSRange range = [lineString rangeOfString:kChapterNameRegular options:NSRegularExpressionSearch]; 177 | return range.length > 2; 178 | } 179 | 180 | 181 | #if NS_BLOCKS_AVAILABLE 182 | - (void) enumerateLinesUsingBlock:(void(^)(NSString*, BOOL*))block { 183 | NSString * line = nil; 184 | BOOL stop = NO; 185 | while (stop == NO && (line = [self readLine])) { 186 | block(line, &stop); 187 | } 188 | } 189 | #endif 190 | 191 | @end -------------------------------------------------------------------------------- /TextBookParse/bookParse/DRParseChapter+LocalFile.h: -------------------------------------------------------------------------------- 1 | // 2 | // DRParseChapter+LocalFile.h 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-9-23. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import 10 | #import "DRParseChapter.h" 11 | @interface DRParseChapter (LocalFile) 12 | +(void)writeDRParseChaptersArray:(NSArray*)parseChapterArr ToPlistFileWithPlistFilePath:(NSString*)plistFilePath; 13 | 14 | +(void)writeDRParseChaptersArray:(NSArray*)parseChapterArr withCoverFilePath:(NSString*)coverPath withBookName:(NSString*)bookName withAuthor:(NSString*)author ToPlistFileWithPlistFilePath:(NSString*)plistFilePath; 15 | 16 | +(NSArray*)parseChapterArrayFromPlistFilePath:(NSString*)plistFilePath; 17 | 18 | +(NSDictionary*)parseChapterDicFromPlistFilePath:(NSString*)plistFilePath; 19 | @end 20 | -------------------------------------------------------------------------------- /TextBookParse/bookParse/DRParseChapter+LocalFile.m: -------------------------------------------------------------------------------- 1 | // 2 | // DRParseChapter+LocalFile.m 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-9-23. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import "DRParseChapter+LocalFile.h" 10 | 11 | @implementation DRParseChapter (LocalFile) 12 | +(void)writeDRParseChaptersArray:(NSArray*)parseChapterArr ToPlistFileWithPlistFilePath:(NSString*)plistFilePath{ 13 | if (!parseChapterArr || parseChapterArr.count <= 0 || !plistFilePath || [plistFilePath isEqualToString:@""]) { 14 | return; 15 | } 16 | NSMutableArray *chapterPropertiesArr = [NSMutableArray array]; 17 | for (DRParseChapter *chapter in parseChapterArr) { 18 | [chapterPropertiesArr addObject:[chapter getPropertiesDic]]; 19 | } 20 | if (![[NSFileManager defaultManager] fileExistsAtPath:[plistFilePath stringByDeletingLastPathComponent]]) { 21 | [[NSFileManager defaultManager] createDirectoryAtPath:[plistFilePath stringByDeletingLastPathComponent] withIntermediateDirectories:YES attributes:nil error:nil]; 22 | } 23 | [chapterPropertiesArr writeToFile:plistFilePath atomically:YES]; 24 | } 25 | 26 | +(void)writeDRParseChaptersArray:(NSArray*)parseChapterArr withCoverFilePath:(NSString*)coverPath withBookName:(NSString*)bookName withAuthor:(NSString*)author ToPlistFileWithPlistFilePath:(NSString*)plistFilePath{ 27 | if (!parseChapterArr || parseChapterArr.count <= 0 || !plistFilePath || [plistFilePath isEqualToString:@""]) { 28 | return; 29 | } 30 | 31 | NSString *bookPropertiesPath = [[[plistFilePath stringByDeletingLastPathComponent] stringByAppendingPathComponent:kbookPropertiesDic] stringByAppendingPathComponent:@"property.plist"]; 32 | NSMutableDictionary *propertiesDic = [NSMutableDictionary dictionary]; 33 | if(bookName) propertiesDic[kBookName] = bookName; 34 | if(coverPath) propertiesDic[kBookCoverPath] = coverPath; 35 | if(author) propertiesDic[kBookAuthor] = author; 36 | if (![[NSFileManager defaultManager] fileExistsAtPath:[bookPropertiesPath stringByDeletingLastPathComponent]]) { 37 | [[NSFileManager defaultManager] createDirectoryAtPath:[bookPropertiesPath stringByDeletingLastPathComponent] withIntermediateDirectories:YES attributes:nil error:nil]; 38 | } 39 | [propertiesDic writeToFile:bookPropertiesPath atomically:YES]; 40 | [self writeDRParseChaptersArray:parseChapterArr ToPlistFileWithPlistFilePath:plistFilePath]; 41 | } 42 | 43 | +(NSArray*)parseChapterArrayFromPlistFilePath:(NSString*)plistFilePath{ 44 | if (!plistFilePath) { 45 | return nil; 46 | } 47 | NSArray *chapterPropertiesArr = [NSArray arrayWithContentsOfFile:plistFilePath]; 48 | if (!chapterPropertiesArr || chapterPropertiesArr.count <= 0) { 49 | return nil; 50 | } 51 | NSMutableArray *chapterArr = [NSMutableArray array]; 52 | for (NSDictionary *dic in chapterPropertiesArr) { 53 | [chapterArr addObject:[self getParseChapterFromProperties:dic]]; 54 | } 55 | return chapterArr; 56 | } 57 | 58 | +(NSDictionary*)parseChapterDicFromPlistFilePath:(NSString*)plistFilePath{ 59 | if (!plistFilePath) { 60 | return nil; 61 | } 62 | NSString *bookPropertiesPath = [[[plistFilePath stringByDeletingLastPathComponent] stringByAppendingPathComponent:kbookPropertiesDic] stringByAppendingPathComponent:@"property.plist"]; 63 | NSDictionary *propertiesDic = [NSDictionary dictionaryWithContentsOfFile:bookPropertiesPath]; 64 | NSArray *chapters = [self parseChapterArrayFromPlistFilePath:plistFilePath]; 65 | 66 | if (chapters) { 67 | if (propertiesDic) { 68 | return @{@"chapters":chapters,@"properties":propertiesDic}; 69 | }else{ 70 | return @{@"chapters":chapters}; 71 | } 72 | } 73 | return nil; 74 | } 75 | 76 | -(NSDictionary*)getPropertiesDic{ 77 | NSMutableDictionary *dic = [NSMutableDictionary dictionaryWithCapacity:5]; 78 | dic[@"index"] = [NSNumber numberWithInt:self.index]; 79 | if(self.chapterName) dic[@"chapterName"] = self.chapterName; 80 | dic[@"isEndChapter"] = [NSNumber numberWithBool:self.isEndChapter]; 81 | dic[@"bookFileType"] = [NSNumber numberWithInteger:self.bookFileType]; 82 | //new 83 | if (self.bookFileType == BookRootDicType_EPUB) { 84 | dic[@"isEpubCatalog"] = [NSNumber numberWithBool:self.isEpubCatalog]; 85 | if(self.epubChapterFilePath) dic[@"epubChapterFilePath"] = self.epubChapterFilePath; 86 | }else{ 87 | dic[@"chapterStartIndex"] = [NSNumber numberWithLongLong:self.chapterStartIndex]; 88 | dic[@"chapterEndIndex"] = [NSNumber numberWithLongLong:self.chapterEndIndex]; 89 | } 90 | return dic; 91 | } 92 | 93 | +(DRParseChapter*)getParseChapterFromProperties:(NSDictionary*)dic{ 94 | if (!dic || dic.count <= 0) { 95 | return nil; 96 | } 97 | DRParseChapter *chapter = [[DRParseChapter alloc] init]; 98 | chapter.index = [dic[@"index"] intValue]; 99 | chapter.chapterName = dic[@"chapterName"]; 100 | chapter.chapterStartIndex = [dic[@"chapterStartIndex"] longLongValue]; 101 | chapter.chapterEndIndex = [dic[@"chapterEndIndex"] longLongValue]; 102 | chapter.isEndChapter = [dic[@"isEndChapter"] boolValue]; 103 | 104 | chapter.bookFileType = [dic[@"bookFileType"] integerValue]; 105 | chapter.isEpubCatalog = [dic[@"isEpubCatalog"] boolValue]; 106 | chapter.epubChapterFilePath = dic[@"epubChapterFilePath"]; 107 | return chapter; 108 | } 109 | 110 | -(NSString *)description{ 111 | if (self.bookFileType == BookRootDicType_EPUB) { 112 | return [NSString stringWithFormat:@"index:%d,chapterName:%@,bookFileType:epub,\n epubChapterFilePath:%@\n",self.index,self.chapterName,self.epubChapterFilePath]; 113 | } 114 | return [NSString stringWithFormat:@"index:%d,chapterName:%@,startIndex:%lld,endIndex:%lld,,isEnd:%@",self.index,self.chapterName,self.chapterStartIndex,self.chapterEndIndex,self.isEndChapter?@"yes":@"no"]; 115 | } 116 | @end 117 | -------------------------------------------------------------------------------- /TextBookParse/bookParse/DRParseChapter.h: -------------------------------------------------------------------------------- 1 | // 2 | // DRParseChapter.h 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-9-23. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import 10 | #import "DRParseChapterContent.h" 11 | ///解析出来的章节信息 12 | @interface DRParseChapter : NSObject 13 | @property (assign,nonatomic) NSInteger index; 14 | @property (strong,nonatomic) NSString *chapterName; 15 | @property (assign,nonatomic) BookRootDicType bookFileType; 16 | @property (assign,nonatomic) BOOL isEndChapter; 17 | #pragma mark txt书籍特有属性 18 | @property (assign,nonatomic) unsigned long long chapterStartIndex; 19 | @property (assign,nonatomic) unsigned long long chapterEndIndex; 20 | 21 | #pragma mark epub书籍特有属性 22 | @property (strong,nonatomic) NSString *epubChapterFilePath; 23 | ///是否是epub指定目录,因为epub有些章节内容没有在目录中 24 | @property (assign,nonatomic) BOOL isEpubCatalog; 25 | @end 26 | -------------------------------------------------------------------------------- /TextBookParse/bookParse/DRParseChapter.m: -------------------------------------------------------------------------------- 1 | // 2 | // DRParseChapter.m 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-9-23. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import "DRParseChapter.h" 10 | 11 | @implementation DRParseChapter 12 | 13 | @end 14 | -------------------------------------------------------------------------------- /TextBookParse/bookParse/DRTextBookParse.h: -------------------------------------------------------------------------------- 1 | // 2 | // DRTextBookParse.h 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-8-5. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import 10 | #import "DRBookParse.h" 11 | #import "DRParseChapter.h" 12 | #import "DRParseChapter+LocalFile.h" 13 | 14 | #define kbookChapterNameSperateChar @"#" 15 | 16 | 17 | ///解析txt格式小说,分离出章节单独保存在Document对应小说名目录下, 18 | @interface DRTextBookParse : DRBookParse 19 | 20 | ///遍历整本书所有章节信息 21 | //+(void)parseBookWithBookFilePath:(NSString*)filePath findTheChapterBlock:(void(^)(NSString *chapterFilePath))findTheChapter withComplete:(void (^)(BOOL success))success withFailure:(void (^)(NSError *error))failure; 22 | 23 | 24 | ///chaptersArray 存放是DRParseChapter 对象 25 | +(void)parseBookWithBookFilePath:(NSString *)filePath 26 | withStartChapterIndex:(int)startIndex 27 | loadFirstChapter:(void (^)(DRParseChapter *findChapter))findFirstChapterBlock 28 | progressBlock:(void (^)(unsigned long long 29 | readLength,unsigned long long totalLength,DRParseChapter *findChapter))findChapterBlock 30 | withComplete:(void (^)(NSArray *chaptersArray))success 31 | withFailure:(void (^)(NSError *))failure; 32 | 33 | @end 34 | -------------------------------------------------------------------------------- /TextBookParse/bookParse/NSData+encoding.h: -------------------------------------------------------------------------------- 1 | // 2 | // NSData+encoding.h 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-8-6. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | @interface NSData (encoding) 12 | ///获取编码格式 13 | -(NSStringEncoding)getCharEncodingWithFilePath:(NSString*)filePath; 14 | @end 15 | -------------------------------------------------------------------------------- /TextBookParse/bookParse/NSData+encoding.m: -------------------------------------------------------------------------------- 1 | // 2 | // NSData+encoding.m 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-8-6. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import "NSData+encoding.h" 10 | 11 | @implementation NSData (encoding) 12 | ///获取编码格式 13 | 14 | -(NSStringEncoding)getCharEncodingWithFilePath:(NSString*)filePath{ 15 | NSStringEncoding encoding; 16 | NSError *error = nil; 17 | NSString *encodingString = [[NSString alloc] initWithContentsOfFile:filePath usedEncoding:&encoding error:&error]; 18 | if (!error) { 19 | return encoding; 20 | } 21 | 22 | if ([[NSString alloc] initWithData:self encoding:CFStringConvertEncodingToNSStringEncoding(kCFStringEncodingGBK_95)]) { 23 | return CFStringConvertEncodingToNSStringEncoding(kCFStringEncodingGBK_95); 24 | } 25 | 26 | if ([[NSString alloc] initWithData:self encoding:CFStringConvertEncodingToNSStringEncoding(kCFStringEncodingGB_2312_80)]) { 27 | return CFStringConvertEncodingToNSStringEncoding(kCFStringEncodingGB_2312_80); 28 | } 29 | 30 | if ([[NSString alloc] initWithData:self encoding:CFStringConvertEncodingToNSStringEncoding(kCFStringEncodingGB_18030_2000)]) { 31 | return CFStringConvertEncodingToNSStringEncoding(kCFStringEncodingGB_18030_2000); 32 | } 33 | 34 | return NSUTF8StringEncoding; 35 | } 36 | @end 37 | -------------------------------------------------------------------------------- /TextBookParse/bookParse/NSString+replaceNumbers.h: -------------------------------------------------------------------------------- 1 | // 2 | // NSString+replaceNumbers.h 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-9-28. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | @interface NSString(replaceNumbers) 12 | 13 | ///阿拉伯数字替换成中文字符 14 | -(NSString*)replaceNumbersWithChineseChar; 15 | 16 | ///字符串中所有阿拉伯数字替换成中文字符 17 | -(NSString*)replaceNumberStringWhenAllDiscoverToChineseChar; 18 | 19 | @end 20 | -------------------------------------------------------------------------------- /TextBookParse/bookParse/NSString+replaceNumbers.m: -------------------------------------------------------------------------------- 1 | // 2 | // NSString+replaceNumbers.m 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-9-28. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import "NSString+replaceNumbers.h" 10 | 11 | @implementation NSString(replaceNumbers) 12 | 13 | ///阿拉伯数字替换成中文字符 14 | -(NSString*)replaceNumbersWithChineseChar{ 15 | NSArray *numbers = @[@"零",@"一",@"二",@"三",@"四",@"五",@"六",@"七",@"八",@"九"]; 16 | NSArray *unitNumbers = @[@"",@"十",@"百",@"千",@"万",@"亿"]; 17 | NSString *tmpNum = [self stringByReplacingOccurrencesOfString:@"," withString:@""]; 18 | NSRange numRange = [tmpNum rangeOfString:@"[0-9]+" options:NSRegularExpressionSearch]; 19 | if (numRange.location == NSNotFound) { 20 | return @""; 21 | } 22 | if (numRange.length > 12) { 23 | return self; 24 | } 25 | unsigned long long number = [tmpNum substringWithRange:numRange].longLongValue; 26 | if (number < 10) { 27 | return numbers[number]; 28 | } 29 | NSMutableString *result = [NSMutableString string]; 30 | int unitIndex = 4; 31 | int zeroCount = 0; 32 | BOOL isInsertZero = YES; 33 | for (int i = 1; i <= unitNumbers.count; i++) { 34 | 35 | 36 | if (i > 4) { 37 | if (number%(10*unitIndex) != 0) { 38 | [result insertString:unitNumbers[unitIndex] atIndex:0]; 39 | } 40 | unitIndex++; 41 | i=1; 42 | } 43 | 44 | if (number%10 == 0) {//如果出现多个0,就不显示 45 | number = number/10; 46 | zeroCount++; 47 | if (i > 1 && isInsertZero) { 48 | [result insertString:[numbers firstObject] atIndex:0]; 49 | isInsertZero = NO; 50 | } 51 | continue; 52 | }else{ 53 | zeroCount = 0; 54 | isInsertZero = YES; 55 | } 56 | 57 | [result insertString:unitNumbers[i-1] atIndex:0]; 58 | if (number%10 == 1 && i == 2 && number/10 == 0) { 59 | 60 | }else{ 61 | [result insertString:numbers[number%10] atIndex:0]; 62 | } 63 | 64 | 65 | if (number < 10) { 66 | break; 67 | } 68 | 69 | number = number/10; 70 | } 71 | if (result.length > 1) { 72 | NSString *lastChar = [result substringFromIndex:result.length-1]; 73 | if ([lastChar isEqualToString:@"零"]) { 74 | return [result substringToIndex:result.length-1]; 75 | } 76 | } 77 | 78 | return result; 79 | } 80 | 81 | ///字符串中所有阿拉伯数字替换成中文字符 82 | -(NSString*)replaceNumberStringWhenAllDiscoverToChineseChar{ 83 | NSString *tmp = [[NSString alloc] initWithFormat:@"%@",self]; 84 | tmp = [tmp stringByReplacingOccurrencesOfString:@"," withString:@""]; 85 | tmp = [tmp stringByReplacingOccurrencesOfString:@" " withString:@""]; 86 | NSRange numRange = [tmp rangeOfString:@"[0-9]+" options:NSRegularExpressionSearch]; 87 | while (numRange.location != NSNotFound) { 88 | NSString *replaceStr = [[tmp substringWithRange:numRange] replaceNumbersWithChineseChar]; 89 | tmp = [tmp stringByReplacingCharactersInRange:numRange withString:replaceStr]; 90 | numRange = [tmp rangeOfString:@"[0-9]+" options:NSRegularExpressionSearch]; 91 | } 92 | return tmp; 93 | } 94 | @end 95 | -------------------------------------------------------------------------------- /TextBookParse/bookParse/NSString+replaceNumbersTest.m: -------------------------------------------------------------------------------- 1 | // 2 | // NSString+replaceNumbersTest.m 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-9-28. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import 10 | #import "NSString+replaceNumbers.h" 11 | @interface NSString_replaceNumbersTest : XCTestCase 12 | 13 | @end 14 | 15 | @implementation NSString_replaceNumbersTest 16 | 17 | - (void)setUp 18 | { 19 | [super setUp]; 20 | // Put setup code here. This method is called before the invocation of each test method in the class. 21 | } 22 | 23 | - (void)tearDown 24 | { 25 | // Put teardown code here. This method is called after the invocation of each test method in the class. 26 | [super tearDown]; 27 | } 28 | 29 | - (void)testReplaceNumbersWithChineseChar 30 | { 31 | XCTAssertEqualObjects([@"" replaceNumbersWithChineseChar], @"",@""); 32 | XCTAssertEqualObjects([@"0" replaceNumbersWithChineseChar], @"零",@""); 33 | 34 | XCTAssertEqualObjects([@"10" replaceNumbersWithChineseChar], @"十",@""); 35 | XCTAssertEqualObjects([@"20" replaceNumbersWithChineseChar], @"二十",@""); 36 | 37 | XCTAssertEqualObjects([@"22" replaceNumbersWithChineseChar], @"二十二",@""); 38 | XCTAssertEqualObjects([@"12" replaceNumbersWithChineseChar], @"十二",@""); 39 | XCTAssertEqualObjects([@"312" replaceNumbersWithChineseChar], @"三百一十二",@""); 40 | XCTAssertEqualObjects([@"302" replaceNumbersWithChineseChar], @"三百零二",@""); 41 | 42 | XCTAssertEqualObjects([@"101" replaceNumbersWithChineseChar], @"一百零一",@""); 43 | XCTAssertEqualObjects([@"10100" replaceNumbersWithChineseChar], @"一万零一百",@""); 44 | XCTAssertEqualObjects([@"101001" replaceNumbersWithChineseChar], @"十万一千零一",@""); 45 | 46 | XCTAssertEqualObjects([@"303030030" replaceNumbersWithChineseChar], @"三亿零三百零三万零三十",@""); 47 | XCTAssertEqualObjects([@"3,0303,0030" replaceNumbersWithChineseChar], @"三亿零三百零三万零三十",@""); 48 | 49 | XCTAssertEqualObjects([@"3,0000,0000" replaceNumbersWithChineseChar], @"三亿",@""); 50 | XCTAssertEqualObjects([@"300,0000" replaceNumbersWithChineseChar], @"三百万",@""); 51 | XCTAssertEqualObjects([@"300000" replaceNumbersWithChineseChar], @"三十万",@""); 52 | XCTAssertEqualObjects([@"3000" replaceNumbersWithChineseChar], @"三千",@""); 53 | 54 | XCTAssertEqualObjects([@"30300000000" replaceNumbersWithChineseChar], @"三百零三亿",@""); 55 | XCTAssertEqualObjects([@"303000000000" replaceNumbersWithChineseChar], @"三千零三十亿",@""); 56 | XCTAssertEqualObjects([@"3030000000000" replaceNumbersWithChineseChar], @"3030000000000",@""); 57 | 58 | XCTAssertEqualObjects([@"3030000000000" replaceNumbersWithChineseChar], @"三万零三百亿",@""); 59 | XCTAssertEqualObjects([@"100" replaceNumbersWithChineseChar], @"一百",@""); 60 | 61 | 62 | } 63 | 64 | 65 | -(void)testReplaceNumberStringWhenAllDiscoverToChineseChar{ 66 | XCTAssertEqualObjects([@"123" replaceNumberStringWhenAllDiscoverToChineseChar], @"一百二十三", @""); 67 | XCTAssertEqualObjects([@"35号给你356984块钱!" replaceNumberStringWhenAllDiscoverToChineseChar],@"三十五号给你三十五万六千九百八十四块钱!", @""); 68 | } 69 | @end 70 | -------------------------------------------------------------------------------- /TextBookParse/bookTest.txt: -------------------------------------------------------------------------------- 1 | 制作时间: 2012-11-18 22:42:04 2 | 3 | 4 | 5 | [正文 第一章 少年] 6 | 7 |   茅山! 8 |   是中国江苏省的一座道教名山,是道教上清派的发源地,被道家称为“上清宗坛”,有“第一福地,第八洞天”之美誉! 9 |   因山势曲折,形似“已”字,故名句曲山,道家称“句曲之金陵,是养真之福境,成神之灵墟”。(更新速度最快尽在读书阁) 10 |   西汉时陕西咸阳茅氏三兄弟茅盈、茅固、茅衷来句曲山修道行善,益泽世人,后人为纪念茅氏功德,遂改句曲山为三茅山,简称“茅山”。 11 |    而两米多高的泥塑,从供台上摔下后,也是四分五裂散了一地,不知道这麻衣老祖在千年前,是否会算到自己有此劫难? 12 |   头上是人身毛细血管最密集的地方,就是平时擦破点皮,也会血流不止的,叶天小小的身体倒在地上,不一会鲜血就将身边的地面染红掉了。 13 |   外面的暴雨下的愈发的急了,一道道闪电在天空中肆虐着,破旧不堪的道观在这暴风雨中摇摇欲坠,似乎随时都可能坍塌下来。 14 |   ***** 15 |   ps:新书终于发上来了,心中忐忑,望诸君收藏推荐多多支持,打眼拜谢! 16 | 17 | 18 | [正文 第二章 受伤] 19 | 20 |   道观外的电闪雷鸣,映照的观内的光线也是忽明忽暗。(更新速度最快尽在读书阁) 21 |  似乎多了什么东西。 22 |   ******** 23 |   ps:新书需要收藏推荐票支持,走过路过的新老朋友,把推荐票都给咱吧,先谢谢大家了。 24 | 25 | 26 | [正文 第三章 相术【求推荐】] 27 | 28 |   “这是什么东西?” 29 |   脑中感觉到一阵眩晕之后,叶天发现,他的脑海里好像出现了一个巴掌大小的乌龟壳,上面密密麻麻的镌刻着各种图案。(阅读本书最新章节请访问读书阁) 30 |   当叶天仔细看去的时候,龟壳却突然消失不见了,一行篆书出现在了眼前,不过这倒是难不倒叶天,他从五岁的时候,就跟着老道学习篆文了。 31 |   “李善元,陕西华阴人,1880年生,兄弟三人,姐妹四人,1896年中前清秀才,师从麻衣神相四十九代传人……” 32 |    33 |   叶天深深的吸了口气,装出了一副小神棍的模样,说道:“当然是我看出来的,师傅,您眉毛细长,淡而不断,正是桃园三结义之相,应该是兄弟三人,右眼角有两颗小痣,却是鸾凤成双,那就是姐妹四位,加起来一共七个吧?” 34 |   前面说的比较笼统,叶天说的准确一点,想看看老道士是什么反应,如果这次说对了的话,那就能证明自己绝对不是出现了幻觉了。 35 |   “哦,对了,师傅,我看您印堂有些发暗,说不定您今儿就有血光之灾啊……” 36 |    老道士闻言笑着摇了摇头,他这一生几乎见证了中国近代所有的大事件,早已是勘破世情,荣华富贵对于一个百岁老人而言,真的没有什么吸引力了。 37 |   忽然想起一件事来,老道士站了起来:“行了,这些以后再说,也该带你下山见识一下了,去,把房间箱子里的道袍拿来……” 38 |   ********** 39 |   ps:冲上新人榜了,不过吊在榜尾,朋友们有推荐票的支持下,新书期需要各种票票,麻烦大家了。 40 | 41 | 42 | [正文 第四章 高人] 43 | 44 |   在下山的小路上,一大一小两个身影,正往山下走去,不过让人感到滑稽的是,这两人穿着一身干净的道袍,却均是没有穿鞋,赤脚踩在泥泞的小路上。(阅读本书最新章节请访问读书阁) 45 |     老道看了苗老大一眼,出右手,屈食指(含一气化三清之义),抬至胸前,开口说道:“我和门下弟子行径路过这里,发现贵宅阴煞极重,这里地处道教圣地,不知道这位居士为何不找人化解呢?” 46 |    听到叶天的话后,苗老大有些怀疑的看向这小道士,要说老道身上的确有股子高人的味道,但这小毛孩子懂什么啊? 47 |   ********* 48 |   ps:求推荐票,新书需要大家的呵护,没有投票习惯的朋友们,也上号点击收藏推荐支援一下吧,谢谢大家! 49 | 50 | 51 | [正文 第五章 欲擒故纵] 52 | 53 |   connection: close 54 |    “喊名字?”孩子母亲愣了一下,不知道老神仙是个什么意思。 55 |   “哎,哎,弟妹,快,听真人的话,去喊啊……”听到老道的话后,苗老大连忙推了弟媳妇一把。 56 |   在农村本就有叫魂一说,也有些地方叫做“喊惊”或者是“喊魂”,女人没听说过,苗老大却是知晓的,所以对老道的话是深信不疑。 57 |   “好孩子,不哭喽,魂归来兮……” 58 |   听到女人的声音响起之后,老道左手抱着孩子,伸出右手,装模作样的在空中虚抓了一把,然后轻抚着孩子的胸背处。 59 |   “哎,不哭啦,不哭啦……” 60 |   似乎这孩子的魂魄真的被老道抓回来一般,原本哭嚎不止的婴儿,忽然停住了口,睁着一双满是泪水的大眼睛,好奇的看着老道。 61 |   这立竿见影的效果,让苗老大震惊之余,狂喜了起来,二弟已经去了,这苗家唯一的血脉可再不能出事了。 62 |    “对,对,我有次去南方送货,那里的水产养殖很发达,所以我也挖了这两个鱼塘,就是一个月前的事情……” 63 |     见到苗老大如此盛情款款的挽留,老道士自然是从善如流,带着叶天又回到了宅子里。 64 |   ******* 65 |   ps:咳咳,那个,推荐票还是有点少啊,大家多支持下,今儿能到三千吗?拜托诸位了! 66 | -------------------------------------------------------------------------------- /TextBookParse/en.lproj/InfoPlist.strings: -------------------------------------------------------------------------------- 1 | /* Localized versions of Info.plist keys */ 2 | 3 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/DREpubBookParse.h: -------------------------------------------------------------------------------- 1 | // 2 | // DREpubBookParse.h 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-10-31. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import 10 | #import "DRBookParse.h" 11 | @interface DREpubBookParse : DRBookParse 12 | ///chaptersArray 存放是DRParseChapter 对象 13 | +(void)parseBookWithBookFilePath:(NSString *)filePath 14 | withComplete:(void (^)(NSArray *chaptersArray,NSString *bookName,NSString *bookCoverFilePath,NSString *author))success 15 | withFailure:(void (^)(NSError *))failure; 16 | @end 17 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/DREpubBookParse.m: -------------------------------------------------------------------------------- 1 | // 2 | // DREpubBookParse.m 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-10-31. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import "DREpubBookParse.h" 10 | #import "SSZipArchive.h" 11 | #import "KFEpubParser.h" 12 | #import "DRParseChapter+LocalFile.h" 13 | @interface DREpubBookParse() 14 | 15 | @end 16 | 17 | @implementation DREpubBookParse 18 | 19 | ///判断这本书是否解析过 20 | +(BOOL)hasBookParsedWithBookFilePath:(NSString*)filePath{ 21 | if (!filePath) { 22 | return NO; 23 | } 24 | // 创建文件管理器 25 | NSFileManager *fileManager = [NSFileManager defaultManager]; 26 | NSString *parseBookDic = [self getBookParseDicPathWithBookFilePath:filePath]; 27 | return [fileManager fileExistsAtPath:parseBookDic]; 28 | } 29 | 30 | 31 | ///解析后书籍对应的隐藏目录 32 | +(NSString*)getBookParseDicPathWithBookFilePath:(NSString*)filePath{ 33 | if (!filePath) { 34 | return nil; 35 | } 36 | //指向文件目录 37 | NSString *parseBookDic = [[filePath stringByDeletingLastPathComponent] stringByAppendingPathComponent:[NSString stringWithFormat:@"%@_%@",kbookTypeEPUB,[[filePath stringByDeletingPathExtension] lastPathComponent]]]; 38 | return parseBookDic; 39 | } 40 | 41 | 42 | ///判断当前书籍是否正在解析 43 | +(BOOL)hasBookParsedIsWorkingWithBookFilePath:(NSString *)filePath{ 44 | return NO; 45 | } 46 | 47 | +(void)parseBookWithBookFilePath:(NSString *)filePath 48 | withComplete:(void (^)(NSArray *chaptersArray,NSString *bookName,NSString *bookCoverFilePath,NSString *author))success 49 | withFailure:(void (^)(NSError *))failure{ 50 | if (!filePath || [filePath isEqualToString:@""]) { 51 | if (failure) { 52 | failure([NSError errorWithDomain:@"" code:1548 userInfo:@{@"msg":@"需要解析的书籍不存在"}]); 53 | } 54 | return; 55 | } 56 | NSString *parsedBookDic = [self getBookParseDicPathWithBookFilePath:filePath]; 57 | NSDictionary *chapterDic = [DRParseChapter parseChapterDicFromPlistFilePath:[parsedBookDic stringByAppendingPathComponent:kBookChapterArrayFileName]]; 58 | NSArray *chaptersArray = chapterDic[@"chapters"]; 59 | NSDictionary *properties = chapterDic[@"properties"]; 60 | if (chaptersArray && chaptersArray.count > 0) { 61 | if (success) { 62 | success(chaptersArray,properties[kBookName],properties[kBookCoverPath],properties[kBookAuthor]); 63 | } 64 | return; 65 | } 66 | 67 | //解析 68 | NSString *destination = [self getBookParseDicPathWithBookFilePath:filePath]; 69 | BOOL unzipSuccess = [SSZipArchive unzipFileAtPath:filePath toDestination:destination]; 70 | if (!unzipSuccess) { 71 | if (failure) { 72 | failure([NSError errorWithDomain:@"" code:1548 userInfo:@{@"msg":@"需要解析的书籍格式不支持"}]); 73 | } 74 | return; 75 | } 76 | KFEpubParser *parser = [[KFEpubParser alloc] init]; 77 | NSURL *rootFile = [parser rootFileForBaseURL:[NSURL fileURLWithPath:destination]]; 78 | NSString *content = [NSString stringWithContentsOfURL:rootFile encoding:NSUTF8StringEncoding error:nil]; 79 | DDXMLDocument *document = [[DDXMLDocument alloc] initWithXMLString:content options:kNilOptions error:nil]; 80 | if (!document) { 81 | if (failure) { 82 | failure([NSError errorWithDomain:@"" code:1548 userInfo:@{@"msg":@"需要解析的书籍格式不支持"}]); 83 | } 84 | return; 85 | } 86 | 87 | NSDictionary *manifestDic = [parser manifestFromDocument:document]; 88 | if (!manifestDic || manifestDic.count <= 0) { 89 | if (failure) { 90 | failure([NSError errorWithDomain:@"" code:1548 userInfo:@{@"msg":@"需要解析的书籍格式不支持"}]); 91 | } 92 | return; 93 | } 94 | NSDictionary *metaDataDic = [parser metaDataFromDocument:document]; 95 | NSString *author = metaDataDic[@"creator"]; 96 | NSString *bookName = metaDataDic[@"title"]; 97 | NSString *coverPath = nil; 98 | NSString *catalogPath = nil; 99 | NSString *ncxPath = nil; 100 | NSInteger index = 0; 101 | NSMutableArray *parseChapterArray = [[NSMutableArray alloc] init]; 102 | NSString *docPath = [rootFile.path stringByReplacingOccurrencesOfString:[self getAppDocumentPath].path withString:@""]; 103 | if (docPath.length > 0) { 104 | docPath = [docPath stringByReplacingCharactersInRange:(NSRange){0,1} withString:@""]; 105 | } 106 | NSArray *spineArray = [parser spineFromDocument:document]; 107 | for (NSString *key in spineArray) { 108 | NSString *contentFile = manifestDic[key][@"href"]; 109 | NSString *contentPath = [[docPath stringByDeletingLastPathComponent] stringByAppendingPathComponent:contentFile]; 110 | if ([key isEqualToString:@"catalog"]) { 111 | catalogPath = contentPath; 112 | }else 113 | if ([key isEqualToString:@"cover"]) { 114 | coverPath = contentPath; 115 | }else 116 | if ([key isEqualToString:@"ncx"]) { 117 | ncxPath = contentPath; 118 | }else{ 119 | DRParseChapter *chapter = [[DRParseChapter alloc] init]; 120 | chapter.index = index++; 121 | chapter.bookFileType = BookRootDicType_EPUB; 122 | chapter.epubChapterFilePath = contentPath; 123 | chapter.isEndChapter = NO; 124 | chapter.isEpubCatalog = NO; 125 | [parseChapterArray addObject:chapter]; 126 | } 127 | } 128 | DRParseChapter *lastChapter = [parseChapterArray lastObject]; 129 | lastChapter.isEndChapter = YES; 130 | 131 | NSArray *parseCatalogArray = nil; 132 | if (catalogPath) { 133 | parseCatalogArray = [parser catalogFromDocumentForCatalogFilePath:[[self getAppDocumentPath].path stringByAppendingPathComponent:catalogPath]]; 134 | } 135 | 136 | // NSArray *parseChapterNameArray = nil; 137 | // if (ncxUrl) { 138 | // DDXMLDocument *ncxDoc = [[DDXMLDocument alloc] initWithXMLString:[[NSString alloc] initWithContentsOfURL:ncxUrl encoding:NSUTF8StringEncoding error:nil] options:kNilOptions error:nil]; 139 | // parseChapterNameArray = [parser ncxFromDocument:ncxDoc]; 140 | // } 141 | [DRParseChapter writeDRParseChaptersArray:parseChapterArray withCoverFilePath:coverPath withBookName:bookName withAuthor:author ToPlistFileWithPlistFilePath:[parsedBookDic stringByAppendingPathComponent:kBookChapterArrayFileName]]; 142 | if (success) { 143 | success(parseChapterArray,bookName,coverPath,author); 144 | } 145 | } 146 | 147 | @end 148 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/DREpubBookParseTest.m: -------------------------------------------------------------------------------- 1 | // 2 | // DREpubBookParseTest.m 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-11-4. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import 10 | #import 11 | #import "DREpubBookParse.h" 12 | #import "HTMLReader.h" 13 | @interface DREpubBookParseTest : XCTestCase 14 | 15 | @end 16 | 17 | @implementation DREpubBookParseTest 18 | 19 | - (void)setUp { 20 | [super setUp]; 21 | // Put setup code here. This method is called before the invocation of each test method in the class. 22 | } 23 | 24 | - (void)tearDown { 25 | // Put teardown code here. This method is called after the invocation of each test method in the class. 26 | [super tearDown]; 27 | } 28 | 29 | - (void)testExample { 30 | NSArray *paths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES); 31 | NSString *path = [NSString stringWithFormat:@"%@/%@",paths[0],@"魔神紫星.epub"]; 32 | 33 | NSData *data = [NSData dataWithContentsOfURL:[[NSBundle mainBundle] URLForResource:@"魔神紫星" withExtension:@"epub"]]; 34 | [data writeToFile:path atomically:YES]; 35 | 36 | [DREpubBookParse parseBookWithBookFilePath:path withComplete:^(NSArray *chaptersArray,NSString *bookName,NSString *bookCoverFilePath,NSString *author) { 37 | DRParseChapter *chapter = chaptersArray[10]; 38 | NSString *data = [NSString stringWithContentsOfFile:[[DRBookParse getAppDocumentPath].path stringByAppendingPathComponent:chapter.epubChapterFilePath] encoding:NSUTF8StringEncoding error:nil]; 39 | if (!data) { 40 | return; 41 | } 42 | HTMLDocument *doc = [HTMLDocument documentWithString:data]; 43 | NSArray *nodes= [doc nodesMatchingSelector:@"body"]; 44 | HTMLNode *body = [nodes firstObject]; 45 | NSString *string = [body textContent]; 46 | NSLog(@"finished"); 47 | 48 | } withFailure:^(NSError *error) { 49 | XCTAssert(NO, @"Pass"); 50 | }]; 51 | 52 | } 53 | 54 | - (void)testPerformanceExample { 55 | // This is an example of a performance test case. 56 | [self measureBlock:^{ 57 | NSString *path = NSHomeDirectory(); 58 | [DREpubBookParse parseBookWithBookFilePath:[NSString stringWithFormat:@"%@/%@",path,@"魔神紫星.epub"] withComplete:^(NSArray *chaptersArray,NSString *bookName,NSString *bookCoverFilePath,NSString *author) { 59 | 60 | } withFailure:^(NSError *error) { 61 | 62 | }]; 63 | // Put the code you want to measure the time of here. 64 | }]; 65 | } 66 | 67 | -(void)testSaveFile{ 68 | NSArray *paths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES); 69 | NSString *path = [NSString stringWithFormat:@"%@/%@",paths[0],@"魔神紫星.epub"]; 70 | NSData *data = [NSData dataWithContentsOfURL:[[NSBundle mainBundle] URLForResource:@"魔神紫星" withExtension:@"epub"]]; 71 | [data writeToFile:path atomically:YES]; 72 | 73 | XCTAssert([[NSFileManager defaultManager] fileExistsAtPath:path],@"文件不存在"); 74 | } 75 | 76 | 77 | -(void)testHtmlParse{ 78 | NSString *data = [NSString stringWithContentsOfFile:[[NSBundle mainBundle] pathForResource:@"002" ofType:@"xhtml"] encoding:NSUTF8StringEncoding error:nil]; 79 | HTMLDocument *doc = [HTMLDocument documentWithString:data]; 80 | HTMLNode *body = [doc firstNodeMatchingSelector:@"body"]; 81 | if (!body) { 82 | return; 83 | } 84 | 85 | NSString *string = [body textContent]; 86 | 87 | NSArray *imgNodes= [doc nodesMatchingSelector:@"img[src]"]; 88 | string = [string stringByReplacingOccurrencesOfString:@"[\n]+[\\s]*[\n]+" withString:@"\n" options:NSRegularExpressionSearch range:(NSRange){0,string.length}]; 89 | NSLog(@"%@",string); 90 | NSString *result = [string stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; 91 | if ([result isEqualToString:@"\n"] || !result || [result isEqualToString:@""]) { 92 | 93 | }else{ 94 | 95 | } 96 | 97 | } 98 | @end 99 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/KFEpubConstants.h: -------------------------------------------------------------------------------- 1 | // KFEpubConstants.h 2 | // KFEpubKit 3 | // 4 | // Copyright (c) 2013 Rico Becker | KF INTERACTIVE 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files (the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions: 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | // 24 | 25 | #import 26 | 27 | 28 | extern NSString *const KFEpubKitErrorDomain; 29 | 30 | 31 | typedef NS_ENUM(NSUInteger, KFEpubKitBookType) 32 | { 33 | KFEpubKitBookTypeUnknown, 34 | KFEpubKitBookTypeEpub2, 35 | KFEpubKitBookTypeEpub3, 36 | KFEpubKitBookTypeiBook 37 | }; 38 | 39 | 40 | typedef NS_ENUM(NSUInteger, KFEpubKitBookEncryption) 41 | { 42 | KFEpubKitBookEnryptionNone, 43 | KFEpubKitBookEnryptionFairplay 44 | }; 45 | 46 | 47 | @interface KFEpubConstants : NSObject 48 | 49 | @end 50 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/KFEpubConstants.m: -------------------------------------------------------------------------------- 1 | // KFEpubConstants.m 2 | // KFEpubKit 3 | // 4 | // Copyright (c) 2013 Rico Becker | KF INTERACTIVE 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files (the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions: 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | // 24 | 25 | #import "KFEpubConstants.h" 26 | 27 | 28 | NSString *const KFEpubKitErrorDomain = @"KFEpubKitErrorDomain"; 29 | 30 | 31 | @implementation KFEpubConstants 32 | 33 | @end 34 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/KFEpubParser.h: -------------------------------------------------------------------------------- 1 | // KFEpubParser.h 2 | // KFEpubKit 3 | // 4 | // Copyright (c) 2013 Rico Becker | KF INTERACTIVE 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files (the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions: 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #import 25 | #import "KFEpubConstants.h" 26 | #import "DDXML.h" 27 | 28 | @class KFEpubParser; 29 | 30 | 31 | @interface KFEpubParser : NSObject 32 | 33 | - (KFEpubKitBookType)bookTypeForBaseURL:(NSURL *)baseURL; 34 | 35 | - (KFEpubKitBookEncryption)contentEncryptionForBaseURL:(NSURL *)baseURL; 36 | 37 | - (NSURL *)rootFileForBaseURL:(NSURL *)baseURL; 38 | 39 | - (NSString *)coverPathComponentFromDocument:(DDXMLDocument *)document; 40 | 41 | - (NSDictionary *)metaDataFromDocument:(DDXMLDocument *)document; 42 | 43 | - (NSArray *)spineFromDocument:(DDXMLDocument *)document; 44 | 45 | - (NSDictionary *)manifestFromDocument:(DDXMLDocument *)document; 46 | 47 | - (NSArray *)guideFromDocument:(DDXMLDocument *)document; 48 | 49 | ///获取目录列表,章节数量比spine获取到章节少 50 | - (NSArray *)ncxFromDocument:(DDXMLDocument *)document; 51 | 52 | ///获取目录列表,章节数量比spine获取到章节少 53 | - (NSArray *)catalogFromDocumentForCatalogFilePath:(NSString*)catalogFilePath; 54 | 55 | ///返回html解析内容,image 对应图片path,content对应纯文本 56 | +(NSArray*)contentFromHTMLDocumentForHtmlFilePath:(NSString*)filePath; 57 | @end 58 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/KFEpubParserTest.m: -------------------------------------------------------------------------------- 1 | // 2 | // KFEpubParserTest.m 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-11-5. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import 10 | #import 11 | #import "KFEpubParser.h" 12 | #import "HTMLReader.h" 13 | @interface KFEpubParserTest : XCTestCase 14 | 15 | @end 16 | 17 | @implementation KFEpubParserTest 18 | 19 | - (void)setUp { 20 | [super setUp]; 21 | // Put setup code here. This method is called before the invocation of each test method in the class. 22 | } 23 | 24 | - (void)tearDown { 25 | // Put teardown code here. This method is called after the invocation of each test method in the class. 26 | [super tearDown]; 27 | } 28 | 29 | - (void)testExample { 30 | // This is an example of a functional test case. 31 | XCTAssert(YES, @"Pass"); 32 | } 33 | 34 | - (void)testPerformanceExample { 35 | // This is an example of a performance test case. 36 | [self measureBlock:^{ 37 | // Put the code you want to measure the time of here. 38 | }]; 39 | } 40 | 41 | 42 | -(void)testHtmlParse{ 43 | NSString *data = [NSString stringWithContentsOfFile:[[NSBundle mainBundle] pathForResource:@"Section0002_0001_0012_0001" ofType:@"xhtml"] encoding:NSUTF8StringEncoding error:nil]; 44 | if (!data) { 45 | return ; 46 | } 47 | HTMLDocument *doc = [HTMLDocument documentWithString:data]; 48 | HTMLElement *body = [doc firstNodeMatchingSelector:@"body"]; 49 | NSArray *imgNodes = [body nodesMatchingSelector:@"img[src]"]; 50 | NSArray *pNodes = [body nodesMatchingSelector:@"*"]; 51 | 52 | NSMutableArray *ps = [NSMutableArray array]; 53 | NSMutableArray *imgs = [NSMutableArray array]; 54 | for (HTMLElement *node in pNodes) { 55 | if ([node.tagName isEqualToString:@"img"] && node.attributes[@"src"]) { 56 | [imgs addObject:node]; 57 | } 58 | if ([node.tagName isEqualToString:@"p"]) { 59 | [ps addObject:node]; 60 | } 61 | } 62 | NSArray *testNodes = [body nodesMatchingSelector:@"p"]; 63 | NSLog(@"%@",ps); 64 | XCTAssert(YES,@"ok"); 65 | } 66 | @end 67 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/KissXML/Additions/DDXMLElementAdditions.h: -------------------------------------------------------------------------------- 1 | #import 2 | #import "DDXML.h" 3 | 4 | // These methods are not part of the standard NSXML API. 5 | // But any developer working extensively with XML will likely appreciate them. 6 | 7 | @interface DDXMLElement (DDAdditions) 8 | 9 | + (DDXMLElement *)elementWithName:(NSString *)name xmlns:(NSString *)ns; 10 | 11 | - (DDXMLElement *)elementForName:(NSString *)name; 12 | - (DDXMLElement *)elementForName:(NSString *)name xmlns:(NSString *)xmlns; 13 | 14 | - (NSString *)xmlns; 15 | - (void)setXmlns:(NSString *)ns; 16 | 17 | - (NSString *)prettyXMLString; 18 | - (NSString *)compactXMLString; 19 | 20 | - (void)addAttributeWithName:(NSString *)name stringValue:(NSString *)string; 21 | 22 | - (NSDictionary *)attributesAsDictionary; 23 | 24 | @end 25 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/KissXML/Additions/DDXMLElementAdditions.m: -------------------------------------------------------------------------------- 1 | #import "DDXMLElementAdditions.h" 2 | 3 | @implementation DDXMLElement (DDAdditions) 4 | 5 | /** 6 | * Quick method to create an element 7 | **/ 8 | + (DDXMLElement *)elementWithName:(NSString *)name xmlns:(NSString *)ns 9 | { 10 | DDXMLElement *element = [DDXMLElement elementWithName:name]; 11 | [element setXmlns:ns]; 12 | return element; 13 | } 14 | 15 | /** 16 | * This method returns the first child element for the given name. 17 | * If no child element exists for the given name, returns nil. 18 | **/ 19 | - (DDXMLElement *)elementForName:(NSString *)name 20 | { 21 | NSArray *elements = [self elementsForName:name]; 22 | if([elements count] > 0) 23 | { 24 | return [elements objectAtIndex:0]; 25 | } 26 | else 27 | { 28 | // Note: If you port this code to work with Apple's NSXML, beware of the following: 29 | // 30 | // There is a bug in the NSXMLElement elementsForName: method. 31 | // Consider the following XML fragment: 32 | // 33 | // 34 | // 35 | // 36 | // 37 | // Calling [query elementsForName:@"x"] results in an empty array! 38 | // 39 | // However, it will work properly if you use the following: 40 | // [query elementsForLocalName:@"x" URI:@"some:other:namespace"] 41 | // 42 | // The trouble with this is that we may not always know the xmlns in advance, 43 | // so in this particular case there is no way to access the element without looping through the children. 44 | // 45 | // This bug was submitted to apple on June 1st, 2007 and was classified as "serious". 46 | // 47 | // --!!-- This bug does NOT exist in DDXML --!!-- 48 | 49 | return nil; 50 | } 51 | } 52 | 53 | /** 54 | * This method returns the first child element for the given name and given xmlns. 55 | * If no child elements exist for the given name and given xmlns, returns nil. 56 | **/ 57 | - (DDXMLElement *)elementForName:(NSString *)name xmlns:(NSString *)xmlns 58 | { 59 | NSArray *elements = [self elementsForLocalName:name URI:xmlns]; 60 | if([elements count] > 0) 61 | { 62 | return [elements objectAtIndex:0]; 63 | } 64 | else 65 | { 66 | return nil; 67 | } 68 | } 69 | 70 | /** 71 | * Returns the common xmlns "attribute", which is only accessible via the namespace methods. 72 | * The xmlns value is often used in jabber elements. 73 | **/ 74 | - (NSString *)xmlns 75 | { 76 | return [[self namespaceForPrefix:@""] stringValue]; 77 | } 78 | 79 | - (void)setXmlns:(NSString *)ns 80 | { 81 | // If you use setURI: then the xmlns won't be displayed in the XMLString. 82 | // Adding the namespace this way works properly. 83 | // 84 | // This applies to both Apple's NSXML and DDXML. 85 | 86 | [self addNamespace:[DDXMLNode namespaceWithName:@"" stringValue:ns]]; 87 | } 88 | 89 | /** 90 | * Shortcut to get a pretty (formatted) string representation of the element. 91 | **/ 92 | - (NSString *)prettyXMLString 93 | { 94 | return [self XMLStringWithOptions:(DDXMLNodePrettyPrint | DDXMLNodeCompactEmptyElement)]; 95 | } 96 | 97 | /** 98 | * Shortcut to get a compact string representation of the element. 99 | **/ 100 | - (NSString *)compactXMLString 101 | { 102 | return [self XMLStringWithOptions:DDXMLNodeCompactEmptyElement]; 103 | } 104 | 105 | /** 106 | * Shortcut to avoid having to manually create a DDXMLNode everytime. 107 | **/ 108 | - (void)addAttributeWithName:(NSString *)name stringValue:(NSString *)string 109 | { 110 | [self addAttribute:[DDXMLNode attributeWithName:name stringValue:string]]; 111 | } 112 | 113 | /** 114 | * Returns all the attributes as a dictionary. 115 | **/ 116 | - (NSDictionary *)attributesAsDictionary 117 | { 118 | NSArray *attributes = [self attributes]; 119 | NSMutableDictionary *result = [NSMutableDictionary dictionaryWithCapacity:[attributes count]]; 120 | 121 | uint i; 122 | for(i = 0; i < [attributes count]; i++) 123 | { 124 | DDXMLNode *node = [attributes objectAtIndex:i]; 125 | 126 | [result setObject:[node stringValue] forKey:[node name]]; 127 | } 128 | return result; 129 | } 130 | 131 | @end 132 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/KissXML/Categories/NSString+DDXML.h: -------------------------------------------------------------------------------- 1 | #import 2 | #import 3 | 4 | 5 | @interface NSString (DDXML) 6 | 7 | /** 8 | * xmlChar - A basic replacement for char, a byte in a UTF-8 encoded string. 9 | **/ 10 | - (const xmlChar *)xmlChar; 11 | 12 | - (NSString *)stringByTrimming; 13 | 14 | @end 15 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/KissXML/Categories/NSString+DDXML.m: -------------------------------------------------------------------------------- 1 | #import "NSString+DDXML.h" 2 | 3 | #if ! __has_feature(objc_arc) 4 | #warning This file must be compiled with ARC. Use -fobjc-arc flag (or convert project to ARC). 5 | #endif 6 | 7 | @implementation NSString (DDXML) 8 | 9 | - (const xmlChar *)xmlChar 10 | { 11 | return (const xmlChar *)[self UTF8String]; 12 | } 13 | 14 | #ifdef GNUSTEP 15 | - (NSString *)stringByTrimming 16 | { 17 | return [self stringByTrimmingSpaces]; 18 | } 19 | #else 20 | - (NSString *)stringByTrimming 21 | { 22 | NSMutableString *mStr = [self mutableCopy]; 23 | CFStringTrimWhitespace((__bridge CFMutableStringRef)mStr); 24 | 25 | NSString *result = [mStr copy]; 26 | 27 | return result; 28 | } 29 | #endif 30 | 31 | @end 32 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/KissXML/DDXML.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Welcome to KissXML. 3 | * 4 | * The project page has documentation if you have questions. 5 | * https://github.com/robbiehanson/KissXML 6 | * 7 | * If you're new to the project you may wish to read the "Getting Started" wiki. 8 | * https://github.com/robbiehanson/KissXML/wiki/GettingStarted 9 | * 10 | * KissXML provides a drop-in replacement for Apple's NSXML class cluster. 11 | * The goal is to get the exact same behavior as the NSXML classes. 12 | * 13 | * For API Reference, see Apple's excellent documentation, 14 | * either via Xcode's Mac OS X documentation, or via the web: 15 | * 16 | * https://github.com/robbiehanson/KissXML/wiki/Reference 17 | **/ 18 | 19 | #import "DDXMLNode.h" 20 | #import "DDXMLElement.h" 21 | #import "DDXMLDocument.h" 22 | 23 | 24 | 25 | #if TARGET_OS_IPHONE && 0 // Disabled by default 26 | 27 | // Since KissXML is a drop in replacement for NSXML, 28 | // it may be desireable (when writing cross-platform code to be used on both Mac OS X and iOS) 29 | // to use the NSXML prefixes instead of the DDXML prefix. 30 | // 31 | // This way, on Mac OS X it uses NSXML, and on iOS it uses KissXML. 32 | 33 | #ifndef NSXMLNode 34 | #define NSXMLNode DDXMLNode 35 | #endif 36 | #ifndef NSXMLElement 37 | #define NSXMLElement DDXMLElement 38 | #endif 39 | #ifndef NSXMLDocument 40 | #define NSXMLDocument DDXMLDocument 41 | #endif 42 | 43 | #ifndef NSXMLInvalidKind 44 | #define NSXMLInvalidKind DDXMLInvalidKind 45 | #endif 46 | #ifndef NSXMLDocumentKind 47 | #define NSXMLDocumentKind DDXMLDocumentKind 48 | #endif 49 | #ifndef NSXMLElementKind 50 | #define NSXMLElementKind DDXMLElementKind 51 | #endif 52 | #ifndef NSXMLAttributeKind 53 | #define NSXMLAttributeKind DDXMLAttributeKind 54 | #endif 55 | #ifndef NSXMLNamespaceKind 56 | #define NSXMLNamespaceKind DDXMLNamespaceKind 57 | #endif 58 | #ifndef NSXMLProcessingInstructionKind 59 | #define NSXMLProcessingInstructionKind DDXMLProcessingInstructionKind 60 | #endif 61 | #ifndef NSXMLCommentKind 62 | #define NSXMLCommentKind DDXMLCommentKind 63 | #endif 64 | #ifndef NSXMLTextKind 65 | #define NSXMLTextKind DDXMLTextKind 66 | #endif 67 | #ifndef NSXMLDTDKind 68 | #define NSXMLDTDKind DDXMLDTDKind 69 | #endif 70 | #ifndef NSXMLEntityDeclarationKind 71 | #define NSXMLEntityDeclarationKind DDXMLEntityDeclarationKind 72 | #endif 73 | #ifndef NSXMLAttributeDeclarationKind 74 | #define NSXMLAttributeDeclarationKind DDXMLAttributeDeclarationKind 75 | #endif 76 | #ifndef NSXMLElementDeclarationKind 77 | #define NSXMLElementDeclarationKind DDXMLElementDeclarationKind 78 | #endif 79 | #ifndef NSXMLNotationDeclarationKind 80 | #define NSXMLNotationDeclarationKind DDXMLNotationDeclarationKind 81 | #endif 82 | 83 | #ifndef NSXMLNodeOptionsNone 84 | #define NSXMLNodeOptionsNone DDXMLNodeOptionsNone 85 | #endif 86 | #ifndef NSXMLNodeExpandEmptyElement 87 | #define NSXMLNodeExpandEmptyElement DDXMLNodeExpandEmptyElement 88 | #endif 89 | #ifndef NSXMLNodeCompactEmptyElement 90 | #define NSXMLNodeCompactEmptyElement DDXMLNodeCompactEmptyElement 91 | #endif 92 | #ifndef NSXMLNodePrettyPrint 93 | #define NSXMLNodePrettyPrint DDXMLNodePrettyPrint 94 | #endif 95 | 96 | #endif // #if TARGET_OS_IPHONE 97 | 98 | 99 | 100 | // KissXML has rather straight-forward memory management: 101 | // https://github.com/robbiehanson/KissXML/wiki/MemoryManagementThreadSafety 102 | // 103 | // There are 3 important concepts to keep in mind when working with KissXML: 104 | // 105 | // 106 | // 1.) KissXML provides a light-weight wrapper around libxml. 107 | // 108 | // The parsing, creation, storage, etc of the xml tree is all done via libxml. 109 | // This is a fast low-level C library that's been around for ages, and comes pre-installed on Mac OS X and iOS. 110 | // KissXML provides an easy-to-use Objective-C library atop libxml. 111 | // So a DDXMLNode, DDXMLElement, or DDXMLDocument are simply objective-c objects 112 | // with pointers to the underlying libxml C structure. 113 | // Then only time you need to be aware of any of this is when it comes to equality. 114 | // In order to maximize speed and provide read-access thread-safety, 115 | // the library may create multiple DDXML wrapper objects that point to the same underlying xml node. 116 | // So don't assume you can test for equality with "==". 117 | // Instead use the isEqual method (as you should generally do with objects anyway). 118 | // 119 | // 120 | // 2.) XML is implicitly a tree heirarchy, and the XML API's are designed to allow traversal up & down the tree. 121 | // 122 | // The tree heirarchy and API contract have an implicit impact concerning memory management. 123 | // 124 | // 125 | // 126 | // 127 | // 128 | // Imagine you have a DDXMLNode corresponding to the starbucks node, 129 | // and you have a DDXMLNode corresponding to the latte node. 130 | // Now imagine you release the starbucks node, but you retain a reference to the latte node. 131 | // What happens? 132 | // Well the latte node is a part of the xml tree heirarchy. 133 | // So if the latte node is still around, the xml tree heirarchy must stick around as well. 134 | // So even though the DDXMLNode corresponding to the starbucks node may get deallocated, 135 | // the underlying xml tree structure won't be freed until the latte node gets dealloacated. 136 | // 137 | // In general, this means that KissXML remains thread-safe when reading and processing a tree. 138 | // If you traverse a tree and fork off asynchronous tasks to process subnodes, 139 | // the tree will remain properly in place until all your asynchronous tasks have completed. 140 | // In other words, it just works. 141 | // 142 | // However, if you parse a huge document into memory, and retain a single node from the giant xml tree... 143 | // Well you should see the problem this creates. 144 | // Instead, in this situation, copy or detach the node if you want to keep it around. 145 | // Or just extract the info you need from it. 146 | // 147 | // 148 | // 3.) KissXML is read-access thread-safe, but write-access thread-unsafe (designed for speed). 149 | // 150 | // 151 | // 152 | // 153 | // 154 | // Imagine you have a DDXMLNode corresponding to the starbucks node, 155 | // and you have a DDXMLNode corresponding to the latte node. 156 | // What happens if you invoke [starbucks removeChildAtIndex:0]? 157 | // Well the undelying xml tree will remove the latte node, and release the associated memory. 158 | // And what if you still have a reference to the DDXMLNode that corresponds to the latte node? 159 | // Well the short answer is that you shouldn't use it. At all. 160 | // This is pretty obvious when you think about it from the context of this simple example. 161 | // But in the real world, you might have multiple threads running in parallel, 162 | // and you might accidently modify a node while another thread is processing it. 163 | // 164 | // To completely fix this problem, and provide write-access thread-safety, would require extensive overhead. 165 | // This overhead is completely unwanted in the majority of cases. 166 | // Most XML usage patterns are heavily read-only. 167 | // And in the case of xml creation or modification, it is generally done on the same thread. 168 | // Thus the KissXML library is write-access thread-unsafe, but provides speedier performance. 169 | // 170 | // However, when such a bug does creep up, it produces horrible side-effects. 171 | // Essentially the pointer to the underlying xml structure becomes a dangling pointer, 172 | // which means that accessing the dangling pointer might give you the correct results, or completely random results. 173 | // And attempting to make modifications to non-existant xml nodes via the dangling pointer might do nothing, 174 | // or completely corrupt your heap and cause un-explainable crashes in random parts of your library. 175 | // Heap corruption is one of the worst problems to track down. 176 | // So to help out, the library provides a debugging macro to track down these problems. 177 | // That is, if you invalidate the write-access thread-unsafe rule, 178 | // this macro will tell you when you're trying to access a now-dangling pointer. 179 | // 180 | // How does it work? 181 | // Well everytime a DDXML wrapper object is created atop a libxml structure, 182 | // it marks the linkage in a table. 183 | // And everytime a libxml structure is freed, it destorys all corresponding linkages in the table. 184 | // So everytime a DDXML wrapper objects is about to dereference it's pointer, 185 | // it first ensures the linkage still exists in the table. 186 | // 187 | // Set to 1 to enable 188 | // Set to 0 to disable (this is the default) 189 | // 190 | // The debugging macro adds a significant amount of overhead, and should NOT be enabled on production builds. 191 | 192 | #if DEBUG 193 | #define DDXML_DEBUG_MEMORY_ISSUES 0 194 | #else 195 | #define DDXML_DEBUG_MEMORY_ISSUES 0 // Don't change me! 196 | #endif 197 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/KissXML/DDXMLDocument.h: -------------------------------------------------------------------------------- 1 | #import 2 | #import "DDXMLElement.h" 3 | #import "DDXMLNode.h" 4 | 5 | /** 6 | * Welcome to KissXML. 7 | * 8 | * The project page has documentation if you have questions. 9 | * https://github.com/robbiehanson/KissXML 10 | * 11 | * If you're new to the project you may wish to read the "Getting Started" wiki. 12 | * https://github.com/robbiehanson/KissXML/wiki/GettingStarted 13 | * 14 | * KissXML provides a drop-in replacement for Apple's NSXML class cluster. 15 | * The goal is to get the exact same behavior as the NSXML classes. 16 | * 17 | * For API Reference, see Apple's excellent documentation, 18 | * either via Xcode's Mac OS X documentation, or via the web: 19 | * 20 | * https://github.com/robbiehanson/KissXML/wiki/Reference 21 | **/ 22 | 23 | enum { 24 | DDXMLDocumentXMLKind = 0, 25 | DDXMLDocumentXHTMLKind, 26 | DDXMLDocumentHTMLKind, 27 | DDXMLDocumentTextKind 28 | }; 29 | typedef NSUInteger DDXMLDocumentContentKind; 30 | 31 | @interface DDXMLDocument : DDXMLNode 32 | { 33 | } 34 | 35 | - (id)initWithXMLString:(NSString *)string options:(NSUInteger)mask error:(NSError **)error; 36 | //- (id)initWithContentsOfURL:(NSURL *)url options:(NSUInteger)mask error:(NSError **)error; 37 | - (id)initWithData:(NSData *)data options:(NSUInteger)mask error:(NSError **)error; 38 | //- (id)initWithRootElement:(DDXMLElement *)element; 39 | 40 | //+ (Class)replacementClassForClass:(Class)cls; 41 | 42 | //- (void)setCharacterEncoding:(NSString *)encoding; //primitive 43 | //- (NSString *)characterEncoding; //primitive 44 | 45 | //- (void)setVersion:(NSString *)version; 46 | //- (NSString *)version; 47 | 48 | //- (void)setStandalone:(BOOL)standalone; 49 | //- (BOOL)isStandalone; 50 | 51 | //- (void)setDocumentContentKind:(DDXMLDocumentContentKind)kind; 52 | //- (DDXMLDocumentContentKind)documentContentKind; 53 | 54 | //- (void)setMIMEType:(NSString *)MIMEType; 55 | //- (NSString *)MIMEType; 56 | 57 | //- (void)setDTD:(DDXMLDTD *)documentTypeDeclaration; 58 | //- (DDXMLDTD *)DTD; 59 | 60 | //- (void)setRootElement:(DDXMLNode *)root; 61 | - (DDXMLElement *)rootElement; 62 | 63 | //- (void)insertChild:(DDXMLNode *)child atIndex:(NSUInteger)index; 64 | 65 | //- (void)insertChildren:(NSArray *)children atIndex:(NSUInteger)index; 66 | 67 | //- (void)removeChildAtIndex:(NSUInteger)index; 68 | 69 | //- (void)setChildren:(NSArray *)children; 70 | 71 | //- (void)addChild:(DDXMLNode *)child; 72 | 73 | //- (void)replaceChildAtIndex:(NSUInteger)index withNode:(DDXMLNode *)node; 74 | 75 | - (NSData *)XMLData; 76 | - (NSData *)XMLDataWithOptions:(NSUInteger)options; 77 | 78 | //- (id)objectByApplyingXSLT:(NSData *)xslt arguments:(NSDictionary *)arguments error:(NSError **)error; 79 | //- (id)objectByApplyingXSLTString:(NSString *)xslt arguments:(NSDictionary *)arguments error:(NSError **)error; 80 | //- (id)objectByApplyingXSLTAtURL:(NSURL *)xsltURL arguments:(NSDictionary *)argument error:(NSError **)error; 81 | 82 | //- (BOOL)validateAndReturnError:(NSError **)error; 83 | 84 | @end 85 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/KissXML/DDXMLDocument.m: -------------------------------------------------------------------------------- 1 | #import "DDXMLPrivate.h" 2 | #import "NSString+DDXML.h" 3 | 4 | #if ! __has_feature(objc_arc) 5 | #warning This file must be compiled with ARC. Use -fobjc-arc flag (or convert project to ARC). 6 | #endif 7 | 8 | /** 9 | * Welcome to KissXML. 10 | * 11 | * The project page has documentation if you have questions. 12 | * https://github.com/robbiehanson/KissXML 13 | * 14 | * If you're new to the project you may wish to read the "Getting Started" wiki. 15 | * https://github.com/robbiehanson/KissXML/wiki/GettingStarted 16 | * 17 | * KissXML provides a drop-in replacement for Apple's NSXML class cluster. 18 | * The goal is to get the exact same behavior as the NSXML classes. 19 | * 20 | * For API Reference, see Apple's excellent documentation, 21 | * either via Xcode's Mac OS X documentation, or via the web: 22 | * 23 | * https://github.com/robbiehanson/KissXML/wiki/Reference 24 | **/ 25 | 26 | @implementation DDXMLDocument 27 | 28 | /** 29 | * Returns a DDXML wrapper object for the given primitive node. 30 | * The given node MUST be non-NULL and of the proper type. 31 | **/ 32 | + (id)nodeWithDocPrimitive:(xmlDocPtr)doc owner:(DDXMLNode *)owner 33 | { 34 | return [[DDXMLDocument alloc] initWithDocPrimitive:doc owner:owner]; 35 | } 36 | 37 | - (id)initWithDocPrimitive:(xmlDocPtr)doc owner:(DDXMLNode *)inOwner 38 | { 39 | self = [super initWithPrimitive:(xmlKindPtr)doc owner:inOwner]; 40 | return self; 41 | } 42 | 43 | + (id)nodeWithPrimitive:(xmlKindPtr)kindPtr owner:(DDXMLNode *)owner 44 | { 45 | // Promote initializers which use proper parameter types to enable compiler to catch more mistakes 46 | NSAssert(NO, @"Use nodeWithDocPrimitive:owner:"); 47 | 48 | return nil; 49 | } 50 | 51 | - (id)initWithPrimitive:(xmlKindPtr)kindPtr owner:(DDXMLNode *)inOwner 52 | { 53 | // Promote initializers which use proper parameter types to enable compiler to catch more mistakes. 54 | NSAssert(NO, @"Use initWithDocPrimitive:owner:"); 55 | 56 | return nil; 57 | } 58 | 59 | /** 60 | * Initializes and returns a DDXMLDocument object created from an NSData object. 61 | * 62 | * Returns an initialized DDXMLDocument object, or nil if initialization fails 63 | * because of parsing errors or other reasons. 64 | **/ 65 | - (id)initWithXMLString:(NSString *)string options:(NSUInteger)mask error:(NSError **)error 66 | { 67 | return [self initWithData:[string dataUsingEncoding:NSUTF8StringEncoding] 68 | options:mask 69 | error:error]; 70 | } 71 | 72 | /** 73 | * Initializes and returns a DDXMLDocument object created from an NSData object. 74 | * 75 | * Returns an initialized DDXMLDocument object, or nil if initialization fails 76 | * because of parsing errors or other reasons. 77 | **/ 78 | - (id)initWithData:(NSData *)data options:(NSUInteger)mask error:(NSError **)error 79 | { 80 | if (data == nil || [data length] == 0) 81 | { 82 | if (error) *error = [NSError errorWithDomain:@"DDXMLErrorDomain" code:0 userInfo:nil]; 83 | 84 | return nil; 85 | } 86 | 87 | // Even though xmlKeepBlanksDefault(0) is called in DDXMLNode's initialize method, 88 | // it has been documented that this call seems to get reset on the iPhone: 89 | // http://code.google.com/p/kissxml/issues/detail?id=8 90 | // 91 | // Therefore, we call it again here just to be safe. 92 | xmlKeepBlanksDefault(0); 93 | 94 | xmlDocPtr doc = xmlParseMemory([data bytes], [data length]); 95 | if (doc == NULL) 96 | { 97 | if (error) *error = [NSError errorWithDomain:@"DDXMLErrorDomain" code:1 userInfo:nil]; 98 | 99 | return nil; 100 | } 101 | 102 | return [self initWithDocPrimitive:doc owner:nil]; 103 | } 104 | 105 | /** 106 | * Returns the root element of the receiver. 107 | **/ 108 | - (DDXMLElement *)rootElement 109 | { 110 | #if DDXML_DEBUG_MEMORY_ISSUES 111 | DDXMLNotZombieAssert(); 112 | #endif 113 | 114 | xmlDocPtr doc = (xmlDocPtr)genericPtr; 115 | 116 | // doc->children is a list containing possibly comments, DTDs, etc... 117 | 118 | xmlNodePtr rootNode = xmlDocGetRootElement(doc); 119 | 120 | if (rootNode != NULL) 121 | return [DDXMLElement nodeWithElementPrimitive:rootNode owner:self]; 122 | else 123 | return nil; 124 | } 125 | 126 | - (NSData *)XMLData 127 | { 128 | // Zombie test occurs in XMLString 129 | 130 | return [[self XMLString] dataUsingEncoding:NSUTF8StringEncoding]; 131 | } 132 | 133 | - (NSData *)XMLDataWithOptions:(NSUInteger)options 134 | { 135 | // Zombie test occurs in XMLString 136 | 137 | return [[self XMLStringWithOptions:options] dataUsingEncoding:NSUTF8StringEncoding]; 138 | } 139 | 140 | @end 141 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/KissXML/DDXMLElement.h: -------------------------------------------------------------------------------- 1 | #import 2 | #import "DDXMLNode.h" 3 | 4 | /** 5 | * Welcome to KissXML. 6 | * 7 | * The project page has documentation if you have questions. 8 | * https://github.com/robbiehanson/KissXML 9 | * 10 | * If you're new to the project you may wish to read the "Getting Started" wiki. 11 | * https://github.com/robbiehanson/KissXML/wiki/GettingStarted 12 | * 13 | * KissXML provides a drop-in replacement for Apple's NSXML class cluster. 14 | * The goal is to get the exact same behavior as the NSXML classes. 15 | * 16 | * For API Reference, see Apple's excellent documentation, 17 | * either via Xcode's Mac OS X documentation, or via the web: 18 | * 19 | * https://github.com/robbiehanson/KissXML/wiki/Reference 20 | **/ 21 | 22 | @interface DDXMLElement : DDXMLNode 23 | { 24 | } 25 | 26 | - (id)initWithName:(NSString *)name; 27 | - (id)initWithName:(NSString *)name URI:(NSString *)URI; 28 | - (id)initWithName:(NSString *)name stringValue:(NSString *)string; 29 | - (id)initWithXMLString:(NSString *)string error:(NSError **)error; 30 | 31 | #pragma mark --- Elements by name --- 32 | 33 | - (NSArray *)elementsForName:(NSString *)name; 34 | - (NSArray *)elementsForLocalName:(NSString *)localName URI:(NSString *)URI; 35 | 36 | #pragma mark --- Attributes --- 37 | 38 | - (void)addAttribute:(DDXMLNode *)attribute; 39 | - (void)removeAttributeForName:(NSString *)name; 40 | - (void)setAttributes:(NSArray *)attributes; 41 | //- (void)setAttributesAsDictionary:(NSDictionary *)attributes; 42 | - (NSArray *)attributes; 43 | - (DDXMLNode *)attributeForName:(NSString *)name; 44 | //- (DDXMLNode *)attributeForLocalName:(NSString *)localName URI:(NSString *)URI; 45 | 46 | #pragma mark --- Namespaces --- 47 | 48 | - (void)addNamespace:(DDXMLNode *)aNamespace; 49 | - (void)removeNamespaceForPrefix:(NSString *)name; 50 | - (void)setNamespaces:(NSArray *)namespaces; 51 | - (NSArray *)namespaces; 52 | - (DDXMLNode *)namespaceForPrefix:(NSString *)prefix; 53 | - (DDXMLNode *)resolveNamespaceForName:(NSString *)name; 54 | - (NSString *)resolvePrefixForNamespaceURI:(NSString *)namespaceURI; 55 | 56 | #pragma mark --- Children --- 57 | 58 | - (void)insertChild:(DDXMLNode *)child atIndex:(NSUInteger)index; 59 | //- (void)insertChildren:(NSArray *)children atIndex:(NSUInteger)index; 60 | - (void)removeChildAtIndex:(NSUInteger)index; 61 | - (void)setChildren:(NSArray *)children; 62 | - (void)addChild:(DDXMLNode *)child; 63 | //- (void)replaceChildAtIndex:(NSUInteger)index withNode:(DDXMLNode *)node; 64 | //- (void)normalizeAdjacentTextNodesPreservingCDATA:(BOOL)preserve; 65 | 66 | @end 67 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/KissXML/DDXMLNode.h: -------------------------------------------------------------------------------- 1 | #import 2 | #import 3 | 4 | @class DDXMLDocument; 5 | 6 | /** 7 | * Welcome to KissXML. 8 | * 9 | * The project page has documentation if you have questions. 10 | * https://github.com/robbiehanson/KissXML 11 | * 12 | * If you're new to the project you may wish to read the "Getting Started" wiki. 13 | * https://github.com/robbiehanson/KissXML/wiki/GettingStarted 14 | * 15 | * KissXML provides a drop-in replacement for Apple's NSXML class cluster. 16 | * The goal is to get the exact same behavior as the NSXML classes. 17 | * 18 | * For API Reference, see Apple's excellent documentation, 19 | * either via Xcode's Mac OS X documentation, or via the web: 20 | * 21 | * https://github.com/robbiehanson/KissXML/wiki/Reference 22 | **/ 23 | 24 | enum { 25 | DDXMLInvalidKind = 0, 26 | DDXMLDocumentKind = XML_DOCUMENT_NODE, 27 | DDXMLElementKind = XML_ELEMENT_NODE, 28 | DDXMLAttributeKind = XML_ATTRIBUTE_NODE, 29 | DDXMLNamespaceKind = XML_NAMESPACE_DECL, 30 | DDXMLProcessingInstructionKind = XML_PI_NODE, 31 | DDXMLCommentKind = XML_COMMENT_NODE, 32 | DDXMLTextKind = XML_TEXT_NODE, 33 | DDXMLDTDKind = XML_DTD_NODE, 34 | DDXMLEntityDeclarationKind = XML_ENTITY_DECL, 35 | DDXMLAttributeDeclarationKind = XML_ATTRIBUTE_DECL, 36 | DDXMLElementDeclarationKind = XML_ELEMENT_DECL, 37 | DDXMLNotationDeclarationKind = XML_NOTATION_NODE 38 | }; 39 | typedef NSUInteger DDXMLNodeKind; 40 | 41 | enum { 42 | DDXMLNodeOptionsNone = 0, 43 | DDXMLNodeExpandEmptyElement = 1 << 1, 44 | DDXMLNodeCompactEmptyElement = 1 << 2, 45 | DDXMLNodePrettyPrint = 1 << 17, 46 | }; 47 | 48 | 49 | //extern struct _xmlKind; 50 | 51 | 52 | @interface DDXMLNode : NSObject 53 | { 54 | // Every DDXML object is simply a wrapper around an underlying libxml node 55 | struct _xmlKind *genericPtr; 56 | 57 | // Every libxml node resides somewhere within an xml tree heirarchy. 58 | // We cannot free the tree heirarchy until all referencing nodes have been released. 59 | // So all nodes retain a reference to the node that created them, 60 | // and when the last reference is released the tree gets freed. 61 | DDXMLNode *owner; 62 | } 63 | 64 | //- (id)initWithKind:(DDXMLNodeKind)kind; 65 | 66 | //- (id)initWithKind:(DDXMLNodeKind)kind options:(NSUInteger)options; 67 | 68 | //+ (id)document; 69 | 70 | //+ (id)documentWithRootElement:(DDXMLElement *)element; 71 | 72 | + (id)elementWithName:(NSString *)name; 73 | 74 | + (id)elementWithName:(NSString *)name URI:(NSString *)URI; 75 | 76 | + (id)elementWithName:(NSString *)name stringValue:(NSString *)string; 77 | 78 | + (id)elementWithName:(NSString *)name children:(NSArray *)children attributes:(NSArray *)attributes; 79 | 80 | + (id)attributeWithName:(NSString *)name stringValue:(NSString *)stringValue; 81 | 82 | + (id)attributeWithName:(NSString *)name URI:(NSString *)URI stringValue:(NSString *)stringValue; 83 | 84 | + (id)namespaceWithName:(NSString *)name stringValue:(NSString *)stringValue; 85 | 86 | + (id)processingInstructionWithName:(NSString *)name stringValue:(NSString *)stringValue; 87 | 88 | + (id)commentWithStringValue:(NSString *)stringValue; 89 | 90 | + (id)textWithStringValue:(NSString *)stringValue; 91 | 92 | //+ (id)DTDNodeWithXMLString:(NSString *)string; 93 | 94 | #pragma mark --- Properties --- 95 | 96 | - (DDXMLNodeKind)kind; 97 | 98 | - (void)setName:(NSString *)name; 99 | - (NSString *)name; 100 | 101 | //- (void)setObjectValue:(id)value; 102 | //- (id)objectValue; 103 | 104 | - (void)setStringValue:(NSString *)string; 105 | //- (void)setStringValue:(NSString *)string resolvingEntities:(BOOL)resolve; 106 | - (NSString *)stringValue; 107 | 108 | #pragma mark --- Tree Navigation --- 109 | 110 | - (NSUInteger)index; 111 | 112 | - (NSUInteger)level; 113 | 114 | - (DDXMLDocument *)rootDocument; 115 | 116 | - (DDXMLNode *)parent; 117 | - (NSUInteger)childCount; 118 | - (NSArray *)children; 119 | - (DDXMLNode *)childAtIndex:(NSUInteger)index; 120 | 121 | - (DDXMLNode *)previousSibling; 122 | - (DDXMLNode *)nextSibling; 123 | 124 | - (DDXMLNode *)previousNode; 125 | - (DDXMLNode *)nextNode; 126 | 127 | - (void)detach; 128 | 129 | - (NSString *)XPath; 130 | 131 | #pragma mark --- QNames --- 132 | 133 | - (NSString *)localName; 134 | - (NSString *)prefix; 135 | 136 | - (void)setURI:(NSString *)URI; 137 | - (NSString *)URI; 138 | 139 | + (NSString *)localNameForName:(NSString *)name; 140 | + (NSString *)prefixForName:(NSString *)name; 141 | //+ (DDXMLNode *)predefinedNamespaceForPrefix:(NSString *)name; 142 | 143 | #pragma mark --- Output --- 144 | 145 | - (NSString *)description; 146 | - (NSString *)XMLString; 147 | - (NSString *)XMLStringWithOptions:(NSUInteger)options; 148 | //- (NSString *)canonicalXMLStringPreservingComments:(BOOL)comments; 149 | 150 | #pragma mark --- XPath/XQuery --- 151 | 152 | - (NSArray *)nodesForXPath:(NSString *)xpath error:(NSError **)error; 153 | //- (NSArray *)objectsForXQuery:(NSString *)xquery constants:(NSDictionary *)constants error:(NSError **)error; 154 | //- (NSArray *)objectsForXQuery:(NSString *)xquery error:(NSError **)error; 155 | 156 | @end 157 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/KissXML/DDXMLNode.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/david122886/TextBookParse/a9bef580cd290c5af610ca67d3eb739cb229ea47/TextBookParse/epubParse/KissXML/DDXMLNode.m -------------------------------------------------------------------------------- /TextBookParse/epubParse/SSZipArchive/SSZipArchive.h: -------------------------------------------------------------------------------- 1 | // 2 | // SSZipArchive.h 3 | // SSZipArchive 4 | // 5 | // Created by Sam Soffes on 7/21/10. 6 | // Copyright (c) Sam Soffes 2010-2014. All rights reserved. 7 | // 8 | 9 | #ifndef _SSZIPARCHIVE_H 10 | #define _SSZIPARCHIVE_H 11 | 12 | #import 13 | #include "unzip.h" 14 | 15 | @protocol SSZipArchiveDelegate; 16 | 17 | @interface SSZipArchive : NSObject 18 | 19 | // Unzip 20 | + (BOOL)unzipFileAtPath:(NSString *)path toDestination:(NSString *)destination; 21 | + (BOOL)unzipFileAtPath:(NSString *)path toDestination:(NSString *)destination overwrite:(BOOL)overwrite password:(NSString *)password error:(NSError **)error; 22 | 23 | + (BOOL)unzipFileAtPath:(NSString *)path toDestination:(NSString *)destination delegate:(id)delegate; 24 | + (BOOL)unzipFileAtPath:(NSString *)path toDestination:(NSString *)destination overwrite:(BOOL)overwrite password:(NSString *)password error:(NSError **)error delegate:(id)delegate; 25 | 26 | // Zip 27 | + (BOOL)createZipFileAtPath:(NSString *)path withFilesAtPaths:(NSArray *)filenames; 28 | + (BOOL)createZipFileAtPath:(NSString *)path withContentsOfDirectory:(NSString *)directoryPath; 29 | 30 | - (id)initWithPath:(NSString *)path; 31 | - (BOOL)open; 32 | - (BOOL)writeFile:(NSString *)path; 33 | - (BOOL)writeData:(NSData *)data filename:(NSString *)filename; 34 | - (BOOL)close; 35 | 36 | @end 37 | 38 | 39 | @protocol SSZipArchiveDelegate 40 | 41 | @optional 42 | 43 | - (void)zipArchiveWillUnzipArchiveAtPath:(NSString *)path zipInfo:(unz_global_info)zipInfo; 44 | - (void)zipArchiveDidUnzipArchiveAtPath:(NSString *)path zipInfo:(unz_global_info)zipInfo unzippedPath:(NSString *)unzippedPath; 45 | 46 | - (void)zipArchiveWillUnzipFileAtIndex:(NSInteger)fileIndex totalFiles:(NSInteger)totalFiles archivePath:(NSString *)archivePath fileInfo:(unz_file_info)fileInfo; 47 | - (void)zipArchiveDidUnzipFileAtIndex:(NSInteger)fileIndex totalFiles:(NSInteger)totalFiles archivePath:(NSString *)archivePath fileInfo:(unz_file_info)fileInfo; 48 | 49 | - (void)zipArchiveProgressEvent:(NSInteger)loaded total:(NSInteger)total; 50 | @end 51 | 52 | #endif /* _SSZIPARCHIVE_H */ 53 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/SSZipArchive/minizip/crypt.h: -------------------------------------------------------------------------------- 1 | /* crypt.h -- base code for crypt/uncrypt ZIPfile 2 | 3 | 4 | Version 1.01e, February 12th, 2005 5 | 6 | Copyright (C) 1998-2005 Gilles Vollant 7 | 8 | This code is a modified version of crypting code in Infozip distribution 9 | 10 | The encryption/decryption parts of this source code (as opposed to the 11 | non-echoing password parts) were originally written in Europe. The 12 | whole source package can be freely distributed, including from the USA. 13 | (Prior to January 2000, re-export from the US was a violation of US law.) 14 | 15 | This encryption code is a direct transcription of the algorithm from 16 | Roger Schlafly, described by Phil Katz in the file appnote.txt. This 17 | file (appnote.txt) is distributed with the PKZIP program (even in the 18 | version without encryption capabilities). 19 | 20 | If you don't need crypting in your application, just define symbols 21 | NOCRYPT and NOUNCRYPT. 22 | 23 | This code support the "Traditional PKWARE Encryption". 24 | 25 | The new AES encryption added on Zip format by Winzip (see the page 26 | http://www.winzip.com/aes_info.htm ) and PKWare PKZip 5.x Strong 27 | Encryption is not supported. 28 | */ 29 | 30 | #define CRC32(c, b) ((*(pcrc_32_tab+(((int)(c) ^ (b)) & 0xff))) ^ ((c) >> 8)) 31 | 32 | /*********************************************************************** 33 | * Return the next byte in the pseudo-random sequence 34 | */ 35 | static int decrypt_byte(unsigned long* pkeys, const unsigned long* pcrc_32_tab) 36 | { 37 | unsigned temp; /* POTENTIAL BUG: temp*(temp^1) may overflow in an 38 | * unpredictable manner on 16-bit systems; not a problem 39 | * with any known compiler so far, though */ 40 | 41 | temp = ((unsigned)(*(pkeys+2)) & 0xffff) | 2; 42 | return (int)(((temp * (temp ^ 1)) >> 8) & 0xff); 43 | } 44 | 45 | /*********************************************************************** 46 | * Update the encryption keys with the next byte of plain text 47 | */ 48 | static int update_keys(unsigned long* pkeys,const unsigned long* pcrc_32_tab,int c) 49 | { 50 | (*(pkeys+0)) = CRC32((*(pkeys+0)), c); 51 | (*(pkeys+1)) += (*(pkeys+0)) & 0xff; 52 | (*(pkeys+1)) = (*(pkeys+1)) * 134775813L + 1; 53 | { 54 | register int keyshift = (int)((*(pkeys+1)) >> 24); 55 | (*(pkeys+2)) = CRC32((*(pkeys+2)), keyshift); 56 | } 57 | return c; 58 | } 59 | 60 | 61 | /*********************************************************************** 62 | * Initialize the encryption keys and the random header according to 63 | * the given password. 64 | */ 65 | static void init_keys(const char* passwd,unsigned long* pkeys,const unsigned long* pcrc_32_tab) 66 | { 67 | *(pkeys+0) = 305419896L; 68 | *(pkeys+1) = 591751049L; 69 | *(pkeys+2) = 878082192L; 70 | while (*passwd != '\0') { 71 | update_keys(pkeys,pcrc_32_tab,(int)*passwd); 72 | passwd++; 73 | } 74 | } 75 | 76 | #define zdecode(pkeys,pcrc_32_tab,c) \ 77 | (update_keys(pkeys,pcrc_32_tab,c ^= decrypt_byte(pkeys,pcrc_32_tab))) 78 | 79 | #define zencode(pkeys,pcrc_32_tab,c,t) \ 80 | (t=decrypt_byte(pkeys,pcrc_32_tab), update_keys(pkeys,pcrc_32_tab,c), t^(c)) 81 | 82 | #ifdef INCLUDECRYPTINGCODE_IFCRYPTALLOWED 83 | 84 | #define RAND_HEAD_LEN 12 85 | /* "last resort" source for second part of crypt seed pattern */ 86 | # ifndef ZCR_SEED2 87 | # define ZCR_SEED2 3141592654UL /* use PI as default pattern */ 88 | # endif 89 | 90 | static int crypthead(const char* passwd, /* password string */ 91 | unsigned char* buf, /* where to write header */ 92 | int bufSize, 93 | unsigned long* pkeys, 94 | const unsigned long* pcrc_32_tab, 95 | unsigned long crcForCrypting) 96 | { 97 | int n; /* index in random header */ 98 | int t; /* temporary */ 99 | int c; /* random byte */ 100 | unsigned char header[RAND_HEAD_LEN-2]; /* random header */ 101 | static unsigned calls = 0; /* ensure different random header each time */ 102 | 103 | if (bufSize> 7) & 0xff; 118 | header[n] = (unsigned char)zencode(pkeys, pcrc_32_tab, c, t); 119 | } 120 | /* Encrypt random header (last two bytes is high word of crc) */ 121 | init_keys(passwd, pkeys, pcrc_32_tab); 122 | for (n = 0; n < RAND_HEAD_LEN-2; n++) 123 | { 124 | buf[n] = (unsigned char)zencode(pkeys, pcrc_32_tab, header[n], t); 125 | } 126 | buf[n++] = (unsigned char)zencode(pkeys, pcrc_32_tab, (int)(crcForCrypting >> 16) & 0xff, t); 127 | buf[n++] = (unsigned char)zencode(pkeys, pcrc_32_tab, (int)(crcForCrypting >> 24) & 0xff, t); 128 | return n; 129 | } 130 | 131 | #endif 132 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/SSZipArchive/minizip/ioapi.c: -------------------------------------------------------------------------------- 1 | /* ioapi.h -- IO base function header for compress/uncompress .zip 2 | part of the MiniZip project - ( http://www.winimage.com/zLibDll/minizip.html ) 3 | 4 | Copyright (C) 1998-2010 Gilles Vollant (minizip) ( http://www.winimage.com/zLibDll/minizip.html ) 5 | 6 | Modifications for Zip64 support 7 | Copyright (C) 2009-2010 Mathias Svensson ( http://result42.com ) 8 | 9 | For more info read MiniZip_info.txt 10 | 11 | */ 12 | 13 | #if (defined(_WIN32)) 14 | #define _CRT_SECURE_NO_WARNINGS 15 | #endif 16 | 17 | #include "ioapi.h" 18 | 19 | voidpf call_zopen64 (const zlib_filefunc64_32_def* pfilefunc,const void*filename,int mode) 20 | { 21 | if (pfilefunc->zfile_func64.zopen64_file != NULL) 22 | return (*(pfilefunc->zfile_func64.zopen64_file)) (pfilefunc->zfile_func64.opaque,filename,mode); 23 | else 24 | { 25 | return (*(pfilefunc->zopen32_file))(pfilefunc->zfile_func64.opaque,(const char*)filename,mode); 26 | } 27 | } 28 | 29 | long call_zseek64 (const zlib_filefunc64_32_def* pfilefunc,voidpf filestream, ZPOS64_T offset, int origin) 30 | { 31 | if (pfilefunc->zfile_func64.zseek64_file != NULL) 32 | return (*(pfilefunc->zfile_func64.zseek64_file)) (pfilefunc->zfile_func64.opaque,filestream,offset,origin); 33 | else 34 | { 35 | uLong offsetTruncated = (uLong)offset; 36 | if (offsetTruncated != offset) 37 | return -1; 38 | else 39 | return (*(pfilefunc->zseek32_file))(pfilefunc->zfile_func64.opaque,filestream,offsetTruncated,origin); 40 | } 41 | } 42 | 43 | ZPOS64_T call_ztell64 (const zlib_filefunc64_32_def* pfilefunc,voidpf filestream) 44 | { 45 | if (pfilefunc->zfile_func64.zseek64_file != NULL) 46 | return (*(pfilefunc->zfile_func64.ztell64_file)) (pfilefunc->zfile_func64.opaque,filestream); 47 | else 48 | { 49 | uLong tell_uLong = (*(pfilefunc->ztell32_file))(pfilefunc->zfile_func64.opaque,filestream); 50 | if ((tell_uLong) == ((uLong)-1)) 51 | return (ZPOS64_T)-1; 52 | else 53 | return tell_uLong; 54 | } 55 | } 56 | 57 | void fill_zlib_filefunc64_32_def_from_filefunc32(zlib_filefunc64_32_def* p_filefunc64_32,const zlib_filefunc_def* p_filefunc32) 58 | { 59 | p_filefunc64_32->zfile_func64.zopen64_file = NULL; 60 | p_filefunc64_32->zopen32_file = p_filefunc32->zopen_file; 61 | p_filefunc64_32->zfile_func64.zerror_file = p_filefunc32->zerror_file; 62 | p_filefunc64_32->zfile_func64.zread_file = p_filefunc32->zread_file; 63 | p_filefunc64_32->zfile_func64.zwrite_file = p_filefunc32->zwrite_file; 64 | p_filefunc64_32->zfile_func64.ztell64_file = NULL; 65 | p_filefunc64_32->zfile_func64.zseek64_file = NULL; 66 | p_filefunc64_32->zfile_func64.zclose_file = p_filefunc32->zclose_file; 67 | 68 | #ifndef __clang_analyzer__ 69 | p_filefunc64_32->zfile_func64.zerror_file = p_filefunc32->zerror_file; 70 | #endif 71 | 72 | p_filefunc64_32->zfile_func64.opaque = p_filefunc32->opaque; 73 | p_filefunc64_32->zseek32_file = p_filefunc32->zseek_file; 74 | p_filefunc64_32->ztell32_file = p_filefunc32->ztell_file; 75 | } 76 | 77 | 78 | 79 | static voidpf ZCALLBACK fopen_file_func OF((voidpf opaque, const char* filename, int mode)); 80 | static uLong ZCALLBACK fread_file_func OF((voidpf opaque, voidpf stream, void* buf, uLong size)); 81 | static uLong ZCALLBACK fwrite_file_func OF((voidpf opaque, voidpf stream, const void* buf,uLong size)); 82 | static ZPOS64_T ZCALLBACK ftell64_file_func OF((voidpf opaque, voidpf stream)); 83 | static long ZCALLBACK fseek64_file_func OF((voidpf opaque, voidpf stream, ZPOS64_T offset, int origin)); 84 | static int ZCALLBACK fclose_file_func OF((voidpf opaque, voidpf stream)); 85 | static int ZCALLBACK ferror_file_func OF((voidpf opaque, voidpf stream)); 86 | 87 | static voidpf ZCALLBACK fopen_file_func (voidpf opaque, const char* filename, int mode) 88 | { 89 | FILE* file = NULL; 90 | const char* mode_fopen = NULL; 91 | if ((mode & ZLIB_FILEFUNC_MODE_READWRITEFILTER)==ZLIB_FILEFUNC_MODE_READ) 92 | mode_fopen = "rb"; 93 | else 94 | if (mode & ZLIB_FILEFUNC_MODE_EXISTING) 95 | mode_fopen = "r+b"; 96 | else 97 | if (mode & ZLIB_FILEFUNC_MODE_CREATE) 98 | mode_fopen = "wb"; 99 | 100 | if ((filename!=NULL) && (mode_fopen != NULL)) 101 | file = fopen(filename, mode_fopen); 102 | return file; 103 | } 104 | 105 | static voidpf ZCALLBACK fopen64_file_func (voidpf opaque, const void* filename, int mode) 106 | { 107 | FILE* file = NULL; 108 | const char* mode_fopen = NULL; 109 | if ((mode & ZLIB_FILEFUNC_MODE_READWRITEFILTER)==ZLIB_FILEFUNC_MODE_READ) 110 | mode_fopen = "rb"; 111 | else 112 | if (mode & ZLIB_FILEFUNC_MODE_EXISTING) 113 | mode_fopen = "r+b"; 114 | else 115 | if (mode & ZLIB_FILEFUNC_MODE_CREATE) 116 | mode_fopen = "wb"; 117 | 118 | if ((filename!=NULL) && (mode_fopen != NULL)) 119 | file = fopen64((const char*)filename, mode_fopen); 120 | return file; 121 | } 122 | 123 | 124 | static uLong ZCALLBACK fread_file_func (voidpf opaque, voidpf stream, void* buf, uLong size) 125 | { 126 | uLong ret; 127 | ret = (uLong)fread(buf, 1, (size_t)size, (FILE *)stream); 128 | return ret; 129 | } 130 | 131 | static uLong ZCALLBACK fwrite_file_func (voidpf opaque, voidpf stream, const void* buf, uLong size) 132 | { 133 | uLong ret; 134 | ret = (uLong)fwrite(buf, 1, (size_t)size, (FILE *)stream); 135 | return ret; 136 | } 137 | 138 | static long ZCALLBACK ftell_file_func (voidpf opaque, voidpf stream) 139 | { 140 | long ret; 141 | ret = ftell((FILE *)stream); 142 | return ret; 143 | } 144 | 145 | 146 | static ZPOS64_T ZCALLBACK ftell64_file_func (voidpf opaque, voidpf stream) 147 | { 148 | ZPOS64_T ret; 149 | ret = ftello64((FILE *)stream); 150 | return ret; 151 | } 152 | 153 | static long ZCALLBACK fseek_file_func (voidpf opaque, voidpf stream, uLong offset, int origin) 154 | { 155 | int fseek_origin=0; 156 | long ret; 157 | switch (origin) 158 | { 159 | case ZLIB_FILEFUNC_SEEK_CUR : 160 | fseek_origin = SEEK_CUR; 161 | break; 162 | case ZLIB_FILEFUNC_SEEK_END : 163 | fseek_origin = SEEK_END; 164 | break; 165 | case ZLIB_FILEFUNC_SEEK_SET : 166 | fseek_origin = SEEK_SET; 167 | break; 168 | default: return -1; 169 | } 170 | ret = 0; 171 | if (fseek((FILE *)stream, offset, fseek_origin) != 0) 172 | ret = -1; 173 | return ret; 174 | } 175 | 176 | static long ZCALLBACK fseek64_file_func (voidpf opaque, voidpf stream, ZPOS64_T offset, int origin) 177 | { 178 | int fseek_origin=0; 179 | long ret; 180 | switch (origin) 181 | { 182 | case ZLIB_FILEFUNC_SEEK_CUR : 183 | fseek_origin = SEEK_CUR; 184 | break; 185 | case ZLIB_FILEFUNC_SEEK_END : 186 | fseek_origin = SEEK_END; 187 | break; 188 | case ZLIB_FILEFUNC_SEEK_SET : 189 | fseek_origin = SEEK_SET; 190 | break; 191 | default: return -1; 192 | } 193 | ret = 0; 194 | 195 | if(fseeko64((FILE *)stream, offset, fseek_origin) != 0) 196 | ret = -1; 197 | 198 | return ret; 199 | } 200 | 201 | 202 | static int ZCALLBACK fclose_file_func (voidpf opaque, voidpf stream) 203 | { 204 | int ret; 205 | ret = fclose((FILE *)stream); 206 | return ret; 207 | } 208 | 209 | static int ZCALLBACK ferror_file_func (voidpf opaque, voidpf stream) 210 | { 211 | int ret; 212 | ret = ferror((FILE *)stream); 213 | return ret; 214 | } 215 | 216 | void fill_fopen_filefunc (pzlib_filefunc_def) 217 | zlib_filefunc_def* pzlib_filefunc_def; 218 | { 219 | pzlib_filefunc_def->zopen_file = fopen_file_func; 220 | pzlib_filefunc_def->zread_file = fread_file_func; 221 | pzlib_filefunc_def->zwrite_file = fwrite_file_func; 222 | pzlib_filefunc_def->ztell_file = ftell_file_func; 223 | pzlib_filefunc_def->zseek_file = fseek_file_func; 224 | pzlib_filefunc_def->zclose_file = fclose_file_func; 225 | pzlib_filefunc_def->zerror_file = ferror_file_func; 226 | pzlib_filefunc_def->opaque = NULL; 227 | } 228 | 229 | void fill_fopen64_filefunc (zlib_filefunc64_def* pzlib_filefunc_def) 230 | { 231 | pzlib_filefunc_def->zopen64_file = fopen64_file_func; 232 | pzlib_filefunc_def->zread_file = fread_file_func; 233 | pzlib_filefunc_def->zwrite_file = fwrite_file_func; 234 | pzlib_filefunc_def->ztell64_file = ftell64_file_func; 235 | pzlib_filefunc_def->zseek64_file = fseek64_file_func; 236 | pzlib_filefunc_def->zclose_file = fclose_file_func; 237 | pzlib_filefunc_def->zerror_file = ferror_file_func; 238 | pzlib_filefunc_def->opaque = NULL; 239 | } 240 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/SSZipArchive/minizip/ioapi.h: -------------------------------------------------------------------------------- 1 | /* ioapi.h -- IO base function header for compress/uncompress .zip 2 | part of the MiniZip project - ( http://www.winimage.com/zLibDll/minizip.html ) 3 | 4 | Copyright (C) 1998-2010 Gilles Vollant (minizip) ( http://www.winimage.com/zLibDll/minizip.html ) 5 | 6 | Modifications for Zip64 support 7 | Copyright (C) 2009-2010 Mathias Svensson ( http://result42.com ) 8 | 9 | For more info read MiniZip_info.txt 10 | 11 | Changes 12 | 13 | Oct-2009 - Defined ZPOS64_T to fpos_t on windows and u_int64_t on linux. (might need to find a better why for this) 14 | Oct-2009 - Change to fseeko64, ftello64 and fopen64 so large files would work on linux. 15 | More if/def section may be needed to support other platforms 16 | Oct-2009 - Defined fxxxx64 calls to normal fopen/ftell/fseek so they would compile on windows. 17 | (but you should use iowin32.c for windows instead) 18 | 19 | */ 20 | 21 | #ifndef _ZLIBIOAPI64_H 22 | #define _ZLIBIOAPI64_H 23 | 24 | #if (!defined(_WIN32)) && (!defined(WIN32)) 25 | 26 | // Linux needs this to support file operation on files larger then 4+GB 27 | // But might need better if/def to select just the platforms that needs them. 28 | 29 | #ifndef __USE_FILE_OFFSET64 30 | #define __USE_FILE_OFFSET64 31 | #endif 32 | #ifndef __USE_LARGEFILE64 33 | #define __USE_LARGEFILE64 34 | #endif 35 | #ifndef _LARGEFILE64_SOURCE 36 | #define _LARGEFILE64_SOURCE 37 | #endif 38 | #ifndef _FILE_OFFSET_BIT 39 | #define _FILE_OFFSET_BIT 64 40 | #endif 41 | #endif 42 | 43 | #include 44 | #include 45 | #include "zlib.h" 46 | 47 | #define USE_FILE32API 48 | #if defined(USE_FILE32API) 49 | #define fopen64 fopen 50 | #define ftello64 ftell 51 | #define fseeko64 fseek 52 | #else 53 | #ifdef _MSC_VER 54 | #define fopen64 fopen 55 | #if (_MSC_VER >= 1400) && (!(defined(NO_MSCVER_FILE64_FUNC))) 56 | #define ftello64 _ftelli64 57 | #define fseeko64 _fseeki64 58 | #else // old MSC 59 | #define ftello64 ftell 60 | #define fseeko64 fseek 61 | #endif 62 | #endif 63 | #endif 64 | 65 | /* 66 | #ifndef ZPOS64_T 67 | #ifdef _WIN32 68 | #define ZPOS64_T fpos_t 69 | #else 70 | #include 71 | #define ZPOS64_T uint64_t 72 | #endif 73 | #endif 74 | */ 75 | 76 | #ifdef HAVE_MINIZIP64_CONF_H 77 | #include "mz64conf.h" 78 | #endif 79 | 80 | /* a type choosen by DEFINE */ 81 | #ifdef HAVE_64BIT_INT_CUSTOM 82 | typedef 64BIT_INT_CUSTOM_TYPE ZPOS64_T; 83 | #else 84 | #ifdef HAS_STDINT_H 85 | #include "stdint.h" 86 | typedef uint64_t ZPOS64_T; 87 | #else 88 | 89 | 90 | #if defined(_MSC_VER) || defined(__BORLANDC__) 91 | typedef unsigned __int64 ZPOS64_T; 92 | #else 93 | typedef unsigned long long int ZPOS64_T; 94 | #endif 95 | #endif 96 | #endif 97 | 98 | 99 | 100 | #ifdef __cplusplus 101 | extern "C" { 102 | #endif 103 | 104 | 105 | #define ZLIB_FILEFUNC_SEEK_CUR (1) 106 | #define ZLIB_FILEFUNC_SEEK_END (2) 107 | #define ZLIB_FILEFUNC_SEEK_SET (0) 108 | 109 | #define ZLIB_FILEFUNC_MODE_READ (1) 110 | #define ZLIB_FILEFUNC_MODE_WRITE (2) 111 | #define ZLIB_FILEFUNC_MODE_READWRITEFILTER (3) 112 | 113 | #define ZLIB_FILEFUNC_MODE_EXISTING (4) 114 | #define ZLIB_FILEFUNC_MODE_CREATE (8) 115 | 116 | 117 | #ifndef ZCALLBACK 118 | #if (defined(WIN32) || defined(_WIN32) || defined (WINDOWS) || defined (_WINDOWS)) && defined(CALLBACK) && defined (USEWINDOWS_CALLBACK) 119 | #define ZCALLBACK CALLBACK 120 | #else 121 | #define ZCALLBACK 122 | #endif 123 | #endif 124 | 125 | 126 | 127 | 128 | typedef voidpf (ZCALLBACK *open_file_func) OF((voidpf opaque, const char* filename, int mode)); 129 | typedef uLong (ZCALLBACK *read_file_func) OF((voidpf opaque, voidpf stream, void* buf, uLong size)); 130 | typedef uLong (ZCALLBACK *write_file_func) OF((voidpf opaque, voidpf stream, const void* buf, uLong size)); 131 | typedef int (ZCALLBACK *close_file_func) OF((voidpf opaque, voidpf stream)); 132 | typedef int (ZCALLBACK *testerror_file_func) OF((voidpf opaque, voidpf stream)); 133 | 134 | typedef long (ZCALLBACK *tell_file_func) OF((voidpf opaque, voidpf stream)); 135 | typedef long (ZCALLBACK *seek_file_func) OF((voidpf opaque, voidpf stream, uLong offset, int origin)); 136 | 137 | 138 | /* here is the "old" 32 bits structure structure */ 139 | typedef struct zlib_filefunc_def_s 140 | { 141 | open_file_func zopen_file; 142 | read_file_func zread_file; 143 | write_file_func zwrite_file; 144 | tell_file_func ztell_file; 145 | seek_file_func zseek_file; 146 | close_file_func zclose_file; 147 | testerror_file_func zerror_file; 148 | voidpf opaque; 149 | } zlib_filefunc_def; 150 | 151 | typedef ZPOS64_T (ZCALLBACK *tell64_file_func) OF((voidpf opaque, voidpf stream)); 152 | typedef long (ZCALLBACK *seek64_file_func) OF((voidpf opaque, voidpf stream, ZPOS64_T offset, int origin)); 153 | typedef voidpf (ZCALLBACK *open64_file_func) OF((voidpf opaque, const void* filename, int mode)); 154 | 155 | typedef struct zlib_filefunc64_def_s 156 | { 157 | open64_file_func zopen64_file; 158 | read_file_func zread_file; 159 | write_file_func zwrite_file; 160 | tell64_file_func ztell64_file; 161 | seek64_file_func zseek64_file; 162 | close_file_func zclose_file; 163 | testerror_file_func zerror_file; 164 | voidpf opaque; 165 | } zlib_filefunc64_def; 166 | 167 | void fill_fopen64_filefunc OF((zlib_filefunc64_def* pzlib_filefunc_def)); 168 | void fill_fopen_filefunc OF((zlib_filefunc_def* pzlib_filefunc_def)); 169 | 170 | /* now internal definition, only for zip.c and unzip.h */ 171 | typedef struct zlib_filefunc64_32_def_s 172 | { 173 | zlib_filefunc64_def zfile_func64; 174 | open_file_func zopen32_file; 175 | tell_file_func ztell32_file; 176 | seek_file_func zseek32_file; 177 | } zlib_filefunc64_32_def; 178 | 179 | 180 | #define ZREAD64(filefunc,filestream,buf,size) ((*((filefunc).zfile_func64.zread_file)) ((filefunc).zfile_func64.opaque,filestream,buf,size)) 181 | #define ZWRITE64(filefunc,filestream,buf,size) ((*((filefunc).zfile_func64.zwrite_file)) ((filefunc).zfile_func64.opaque,filestream,buf,size)) 182 | //#define ZTELL64(filefunc,filestream) ((*((filefunc).ztell64_file)) ((filefunc).opaque,filestream)) 183 | //#define ZSEEK64(filefunc,filestream,pos,mode) ((*((filefunc).zseek64_file)) ((filefunc).opaque,filestream,pos,mode)) 184 | #define ZCLOSE64(filefunc,filestream) ((*((filefunc).zfile_func64.zclose_file)) ((filefunc).zfile_func64.opaque,filestream)) 185 | #define ZERROR64(filefunc,filestream) ((*((filefunc).zfile_func64.zerror_file)) ((filefunc).zfile_func64.opaque,filestream)) 186 | 187 | voidpf call_zopen64 OF((const zlib_filefunc64_32_def* pfilefunc,const void*filename,int mode)); 188 | long call_zseek64 OF((const zlib_filefunc64_32_def* pfilefunc,voidpf filestream, ZPOS64_T offset, int origin)); 189 | ZPOS64_T call_ztell64 OF((const zlib_filefunc64_32_def* pfilefunc,voidpf filestream)); 190 | 191 | void fill_zlib_filefunc64_32_def_from_filefunc32(zlib_filefunc64_32_def* p_filefunc64_32,const zlib_filefunc_def* p_filefunc32); 192 | 193 | #define ZOPEN64(filefunc,filename,mode) (call_zopen64((&(filefunc)),(filename),(mode))) 194 | #define ZTELL64(filefunc,filestream) (call_ztell64((&(filefunc)),(filestream))) 195 | #define ZSEEK64(filefunc,filestream,pos,mode) (call_zseek64((&(filefunc)),(filestream),(pos),(mode))) 196 | 197 | #ifdef __cplusplus 198 | } 199 | #endif 200 | 201 | #endif 202 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/SSZipArchive/minizip/mztools.h: -------------------------------------------------------------------------------- 1 | /* 2 | Additional tools for Minizip 3 | Code: Xavier Roche '2004 4 | License: Same as ZLIB (www.gzip.org) 5 | */ 6 | 7 | #ifndef _zip_tools_H 8 | #define _zip_tools_H 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | #ifndef _ZLIB_H 15 | #include "zlib.h" 16 | #endif 17 | 18 | #include "unzip.h" 19 | 20 | /* Repair a ZIP file (missing central directory) 21 | file: file to recover 22 | fileOut: output file after recovery 23 | fileOutTmp: temporary file name used for recovery 24 | */ 25 | extern int ZEXPORT unzRepair(const char* file, 26 | const char* fileOut, 27 | const char* fileOutTmp, 28 | uLong* nRecovered, 29 | uLong* bytesRecovered); 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLComment.h: -------------------------------------------------------------------------------- 1 | // HTMLComment.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "HTMLNode.h" 6 | 7 | /** 8 | * An HTMLCommentNode represents a comment. 9 | * 10 | * For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#comments 11 | */ 12 | @interface HTMLComment : HTMLNode 13 | 14 | /** 15 | * This is the designated initializer. 16 | */ 17 | - (id)initWithData:(NSString *)data; 18 | 19 | /** 20 | * The comment itself. 21 | */ 22 | @property (copy, nonatomic) NSString *data; 23 | 24 | @end 25 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLComment.m: -------------------------------------------------------------------------------- 1 | // HTMLComment.m 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "HTMLComment.h" 6 | 7 | @implementation HTMLComment 8 | 9 | - (id)initWithData:(NSString *)data 10 | { 11 | self = [super init]; 12 | if (!self) return nil; 13 | 14 | _data = [data copy]; 15 | 16 | return self; 17 | } 18 | 19 | - (NSString *)textContent 20 | { 21 | return self.data; 22 | } 23 | 24 | - (void)setTextContent:(NSString *)textContent 25 | { 26 | self.data = textContent; 27 | } 28 | 29 | #pragma mark NSCopying 30 | 31 | - (id)copyWithZone:(NSZone *)zone 32 | { 33 | HTMLComment *copy = [super copyWithZone:zone]; 34 | copy->_data = self.data; 35 | return copy; 36 | } 37 | 38 | @end 39 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLDocument.h: -------------------------------------------------------------------------------- 1 | // HTMLDocument.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import 6 | #import "HTMLDocumentType.h" 7 | #import "HTMLElement.h" 8 | #import "HTMLNode.h" 9 | #import "HTMLQuirksMode.h" 10 | 11 | /** 12 | * An HTMLDocument is the root of a tree of nodes representing parsed HTML. 13 | * 14 | * For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#writing 15 | */ 16 | @interface HTMLDocument : HTMLNode 17 | 18 | /** 19 | * Parses an HTML string into a document. 20 | */ 21 | + (instancetype)documentWithString:(NSString *)string; 22 | 23 | /** 24 | * The document type node. 25 | * 26 | * The setter replaces the existing documentType, if there is one; otherwise, the new documentType will be placed immediately before the rootElement, if there is one; otherwise the new documentType is added as the last child. 27 | */ 28 | @property (strong, nonatomic) HTMLDocumentType *documentType; 29 | 30 | /** 31 | * The document's quirks mode. 32 | */ 33 | @property (assign, nonatomic) HTMLQuirksMode quirksMode; 34 | 35 | /** 36 | * The first element in tree order. Typically the `` element. 37 | * 38 | * The setter replaces the existing rootElement, if there is one; otherwise, the new rootElement is added as the last child. 39 | */ 40 | @property (strong, nonatomic) HTMLElement *rootElement; 41 | 42 | @end 43 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLDocument.m: -------------------------------------------------------------------------------- 1 | // HTMLDocument.m 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "HTMLDocument.h" 6 | #import "HTMLParser.h" 7 | 8 | @implementation HTMLDocument 9 | 10 | + (instancetype)documentWithString:(NSString *)string 11 | { 12 | HTMLParser *parser = [[HTMLParser alloc] initWithString:string context:nil]; 13 | return parser.document; 14 | } 15 | 16 | - (HTMLDocumentType *)documentType 17 | { 18 | return FirstNodeOfType(self.children, [HTMLDocumentType class]); 19 | } 20 | 21 | - (void)setDocumentType:(HTMLDocumentType *)documentType 22 | { 23 | HTMLDocumentType *oldDocumentType = self.documentType; 24 | NSMutableOrderedSet *children = [self mutableChildren]; 25 | if (oldDocumentType && documentType) { 26 | NSUInteger i = [children indexOfObject:oldDocumentType]; 27 | [children replaceObjectAtIndex:i withObject:documentType]; 28 | } else if (documentType) { 29 | HTMLElement *rootElement = self.rootElement; 30 | if (rootElement) { 31 | [children insertObject:documentType atIndex:[children indexOfObject:rootElement]]; 32 | } else { 33 | [children addObject:documentType]; 34 | } 35 | } else if (oldDocumentType) { 36 | [children removeObject:oldDocumentType]; 37 | } 38 | } 39 | 40 | - (HTMLElement *)rootElement 41 | { 42 | return FirstNodeOfType(self.children, [HTMLElement class]); 43 | } 44 | 45 | - (void)setRootElement:(HTMLElement *)rootElement 46 | { 47 | HTMLElement *oldRootElement = self.rootElement; 48 | NSMutableOrderedSet *children = [self mutableChildren]; 49 | if (oldRootElement && rootElement) { 50 | [children replaceObjectAtIndex:[children indexOfObject:oldRootElement] withObject:rootElement]; 51 | } else if (rootElement) { 52 | [children addObject:rootElement]; 53 | } else if (oldRootElement) { 54 | [children removeObject:oldRootElement]; 55 | } 56 | } 57 | 58 | static id FirstNodeOfType(id collection, Class type) 59 | { 60 | for (id node in collection) { 61 | if ([node isKindOfClass:type]) { 62 | return node; 63 | } 64 | } 65 | return nil; 66 | } 67 | 68 | @end 69 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLDocumentType.h: -------------------------------------------------------------------------------- 1 | // HTMLDocumentType.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "HTMLNode.h" 6 | 7 | /** 8 | * An HTMLDocumentType represents an archaic description of the standards an HTML document is meant to adhere to. 9 | * 10 | * The only valid document type is ``. 11 | * 12 | * For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#the-doctype 13 | */ 14 | @interface HTMLDocumentType : HTMLNode 15 | 16 | /** 17 | * Designated initializer. 18 | * 19 | * Given: 20 | * |____| |_________________________| |_____________________________________| 21 | * We have: name publicIdentifier systemIdentifier 22 | */ 23 | - (id)initWithName:(NSString *)name publicIdentifier:(NSString *)publicIdentifier systemIdentifier:(NSString *)systemIdentifier; 24 | 25 | /** 26 | * That first part of the DOCTYPE. 27 | */ 28 | @property (readonly, copy, nonatomic) NSString *name; 29 | 30 | /** 31 | * That second part of the DOCTYPE. 32 | */ 33 | @property (readonly, copy, nonatomic) NSString *publicIdentifier; 34 | 35 | /** 36 | * That third part of the DOCTYPE. 37 | */ 38 | @property (readonly, copy, nonatomic) NSString *systemIdentifier; 39 | 40 | @end 41 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLDocumentType.m: -------------------------------------------------------------------------------- 1 | // HTMLDocumentType.m 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "HTMLDocumentType.h" 6 | #import "HTMLDocument.h" 7 | 8 | @implementation HTMLDocumentType 9 | 10 | - (id)initWithName:(NSString *)name publicIdentifier:(NSString *)publicIdentifier systemIdentifier:(NSString *)systemIdentifier 11 | { 12 | self = [super init]; 13 | if (!self) return nil; 14 | 15 | _name = [name copy]; 16 | _publicIdentifier = [publicIdentifier copy] ?: @""; 17 | _systemIdentifier = [systemIdentifier copy] ?: @""; 18 | 19 | return self; 20 | } 21 | 22 | - (id)init 23 | { 24 | return [self initWithName:nil publicIdentifier:nil systemIdentifier:nil]; 25 | } 26 | 27 | #pragma mark NSCopying 28 | 29 | - (id)copyWithZone:(NSZone *)zone 30 | { 31 | HTMLDocumentType *copy = [super copyWithZone:zone]; 32 | copy->_name = self.name; 33 | copy->_publicIdentifier = self.publicIdentifier; 34 | copy->_systemIdentifier = self.systemIdentifier; 35 | return copy; 36 | } 37 | 38 | @end 39 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLElement.h: -------------------------------------------------------------------------------- 1 | // HTMLElement.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "HTMLNode.h" 6 | 7 | /** 8 | * An HTMLElement represents a subtree of content in an HTML document. 9 | * 10 | * For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/elements.html#elements 11 | */ 12 | @interface HTMLElement : HTMLNode 13 | 14 | /** 15 | * This is the designated initializer. 16 | * 17 | * @param tagName What kind of element to make. 18 | * @param attributes A dictionary of attributes to start the element off. May be nil. 19 | */ 20 | - (id)initWithTagName:(NSString *)tagName attributes:(NSDictionary *)attributes; 21 | 22 | /** 23 | * The element's kind. 24 | */ 25 | @property (readonly, copy, nonatomic) NSString *tagName; 26 | 27 | /** 28 | * The element's attributes. 29 | * 30 | * The attributes' sort order is stable when serialized. (This is required by the spec, but is not guaranteed by NSDictionary.) 31 | * 32 | * @see -objectForKeyedSubscript: 33 | * @see -setObject:forKeyedSubscript: 34 | * @see -removeAttributeWithName: 35 | */ 36 | @property (readonly, copy, nonatomic) NSDictionary *attributes; 37 | 38 | /** 39 | * Returns the value of the named attribute, or nil if no such value exists. 40 | */ 41 | - (id)objectForKeyedSubscript:(id)attributeNameOrString; 42 | 43 | /** 44 | * Sets a named attribute's value, adding it to the element if needed. 45 | */ 46 | - (void)setObject:(NSString *)attributeValue forKeyedSubscript:(NSString *)attributeName; 47 | 48 | /** 49 | * Removes the named attribute from the element. 50 | */ 51 | - (void)removeAttributeWithName:(NSString *)attributeName; 52 | 53 | /** 54 | * Whether or not a name appears in the element's class attribute. 55 | */ 56 | - (BOOL)hasClass:(NSString *)className; 57 | 58 | /** 59 | * If the name appears in the element's class attribute, remove it; otherwise, add it. 60 | */ 61 | - (void)toggleClass:(NSString *)className; 62 | 63 | /** 64 | * This element's namespace. 65 | */ 66 | @property (assign, nonatomic) HTMLNamespace namespace; 67 | 68 | @end 69 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLElement.m: -------------------------------------------------------------------------------- 1 | // HTMLElement.m 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "HTMLElement.h" 6 | #import "HTMLOrderedDictionary.h" 7 | #import "HTMLSelector.h" 8 | 9 | @implementation HTMLElement 10 | { 11 | HTMLOrderedDictionary *_attributes; 12 | } 13 | 14 | - (id)initWithTagName:(NSString *)tagName attributes:(NSDictionary *)attributes 15 | { 16 | self = [super init]; 17 | if (!self) return nil; 18 | 19 | _tagName = [tagName copy]; 20 | _attributes = [HTMLOrderedDictionary new]; 21 | [_attributes addEntriesFromDictionary:attributes]; 22 | 23 | return self; 24 | } 25 | 26 | - (id)init 27 | { 28 | return [self initWithTagName:nil attributes:nil]; 29 | } 30 | 31 | - (NSDictionary *)attributes 32 | { 33 | return [_attributes copy]; 34 | } 35 | 36 | - (id)objectForKeyedSubscript:(id)attributeName 37 | { 38 | return _attributes[attributeName]; 39 | } 40 | 41 | - (void)setObject:(NSString *)attributeValue forKeyedSubscript:(NSString *)attributeName 42 | { 43 | _attributes[attributeName] = attributeValue; 44 | } 45 | 46 | - (void)removeAttributeWithName:(NSString *)attributeName 47 | { 48 | [_attributes removeObjectForKey:attributeName]; 49 | } 50 | 51 | - (BOOL)hasClass:(NSString *)className 52 | { 53 | NSArray *classes = [self[@"class"] componentsSeparatedByCharactersInSet:HTMLSelectorWhitespaceCharacterSet()]; 54 | return [classes containsObject:className]; 55 | } 56 | 57 | - (void)toggleClass:(NSString *)className 58 | { 59 | NSString *classValue = self[@"class"] ?: @""; 60 | NSMutableArray *classes = [[classValue componentsSeparatedByCharactersInSet:HTMLSelectorWhitespaceCharacterSet()] mutableCopy]; 61 | NSUInteger i = [classes indexOfObject:className]; 62 | if (i == NSNotFound) { 63 | [classes addObject:className]; 64 | } else { 65 | [classes removeObjectAtIndex:i]; 66 | } 67 | self[@"class"] = [classes componentsJoinedByString:@" "]; 68 | } 69 | 70 | #pragma mark NSCopying 71 | 72 | - (id)copyWithZone:(NSZone *)zone 73 | { 74 | HTMLElement *copy = [super copyWithZone:zone]; 75 | copy->_tagName = self.tagName; 76 | copy->_attributes = [_attributes copy]; 77 | return copy; 78 | } 79 | 80 | @end 81 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLEntities.h: -------------------------------------------------------------------------------- 1 | // HTMLEntities.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import 6 | 7 | /** 8 | * Returns the code point for a numeric HTML entity if it is meant to be replaced, or U+0000 NULL if no replacement is required. 9 | * 10 | * For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#table-charref-overrides 11 | */ 12 | extern UTF32Char ReplacementForNumericEntity(UInt32 entity); 13 | 14 | /** 15 | * Returns the replacement string for a named entity, or nil if there is no match. 16 | * 17 | * @param entityName A string whose prefix is tested for a named entity. The ampersand that starts the entity should not be included. 18 | * @param parsedName If non-nil and a match is found, will contain the matching entity name. This will be a (possibly proper) prefix of entityName. 19 | * 20 | * For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/named-character-references.html 21 | */ 22 | extern NSString * StringForNamedEntity(NSString *entityName, NSString * __autoreleasing *parsedName); 23 | 24 | /** 25 | * No named entities are longer than this (does not consider the leading ampersand). 26 | */ 27 | extern const NSUInteger LongestEntityNameLength; 28 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLNamespace.h: -------------------------------------------------------------------------------- 1 | // HTMLNamespace.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | /** 6 | * This HTML parser treats three namespaces with any special consideration whatsoever. 7 | * 8 | * For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/namespaces.html#namespaces 9 | */ 10 | typedef NS_ENUM(NSInteger, HTMLNamespace) 11 | { 12 | /** 13 | * The default namespace is HTML. 14 | */ 15 | HTMLNamespaceHTML, 16 | 17 | /** 18 | * Most elements within tags are in the MathML namespace. 19 | */ 20 | HTMLNamespaceMathML, 21 | 22 | /** 23 | * Most elements within tags are in the SVG namespace. 24 | */ 25 | HTMLNamespaceSVG, 26 | }; 27 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLNode.h: -------------------------------------------------------------------------------- 1 | // HTMLNode.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import 6 | @class HTMLDocument; 7 | @class HTMLElement; 8 | #import "HTMLNamespace.h" 9 | 10 | /** 11 | * HTMLNode is an abstract class representing a node in a parsed HTML tree. 12 | * 13 | * @note Copying an HTMLNode does not copy its document, parentElement, or children. 14 | */ 15 | @interface HTMLNode : NSObject 16 | 17 | /** 18 | * The document in which this node appears, or nil if the node is not in a tree with a document at its root. 19 | */ 20 | @property (readonly, strong, nonatomic) HTMLDocument *document; 21 | 22 | /** 23 | * The node's parent, or nil if the node is a root node. 24 | */ 25 | @property (strong, nonatomic) HTMLNode *parentNode; 26 | 27 | /** 28 | * The node's parent if it is an instance of HTMLElement, otherwise nil. Setter is equivalent to calling -setParentNode:. 29 | */ 30 | @property (strong, nonatomic) HTMLElement *parentElement; 31 | 32 | /** 33 | * The node's children. Each is an instance of HTMLNode. Key-Value Coding compliant for accessing and mutation. 34 | */ 35 | @property (readonly, copy, nonatomic) NSOrderedSet *children; 36 | 37 | /** 38 | * Convenience method that returns a mutable proxy for children. The proxy returned by -mutableChildren is much faster than the one obtained by calling -mutableOrderedSetValueForKey: yourself. 39 | */ 40 | - (NSMutableOrderedSet *)mutableChildren; 41 | 42 | /** 43 | * The number of nodes that have the node as their parent. 44 | * 45 | * This method is faster than calling `aNode.children.count`. 46 | */ 47 | - (NSUInteger)numberOfChildren; 48 | 49 | /** 50 | * Returns a child of the node. Throws an NSRangeException if index is out of bounds. 51 | * 52 | * This method is faster than calling `[aNode.children objectAtIndex:]`. 53 | */ 54 | - (HTMLNode *)childAtIndex:(NSUInteger)index; 55 | 56 | /** 57 | * Returns the location of a child, or NSNotFound if the node is not the child's parent. 58 | * 59 | * This method is faster than calling `[aNode.children indexOfObject:]`. 60 | */ 61 | - (NSUInteger)indexOfChild:(HTMLNode *)child; 62 | 63 | /** 64 | * The node's children which are instances of HTMLElement. 65 | */ 66 | @property (readonly, copy, nonatomic) NSArray *childElementNodes; 67 | 68 | /** 69 | * Emits in tree order the nodes in the subtree rooted at the node. 70 | * 71 | * For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/infrastructure.html#tree-order 72 | */ 73 | - (NSEnumerator *)treeEnumerator; 74 | 75 | /** 76 | * Emits in tree order the node in the subree rooted at the node, except children are enumerated back to front. 77 | */ 78 | - (NSEnumerator *)reversedTreeEnumerator; 79 | 80 | /** 81 | * The combined text content of the node and its descendants. The setter replaces the node's text, removing all descendants. 82 | * 83 | * For more information, see http://dom.spec.whatwg.org/#dom-node-textcontent 84 | */ 85 | @property (copy, nonatomic) NSString *textContent; 86 | 87 | /** 88 | * Convenience method for either adding a string to an existing text node or creating a new text node. 89 | * 90 | * @param string The text to insert. 91 | * @param childNodeIndex The desired location of the text. If a new text node is created, this is where it will be inserted. 92 | */ 93 | - (void)insertString:(NSString *)string atChildNodeIndex:(NSUInteger)childNodeIndex; 94 | 95 | @end 96 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLNode.m: -------------------------------------------------------------------------------- 1 | // HTMLNode.m 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "HTMLNode.h" 6 | #import "HTMLDocument.h" 7 | #import "HTMLTextNode.h" 8 | #import "HTMLTreeEnumerator.h" 9 | 10 | @interface HTMLChildrenRelationshipProxy : NSMutableOrderedSet 11 | 12 | - (id)initWithNode:(HTMLNode *)node children:(NSMutableOrderedSet *)children; 13 | 14 | @property (readonly, strong, nonatomic) HTMLNode *node; 15 | 16 | @property (readonly, strong, nonatomic) NSMutableOrderedSet *children; 17 | 18 | @end 19 | 20 | @implementation HTMLNode 21 | { 22 | NSMutableOrderedSet *_children; 23 | } 24 | 25 | - (id)init 26 | { 27 | self = [super init]; 28 | if (!self) return nil; 29 | 30 | _children = [NSMutableOrderedSet new]; 31 | 32 | return self; 33 | } 34 | 35 | - (HTMLDocument *)document 36 | { 37 | HTMLNode *currentNode = self.parentNode; 38 | while (currentNode && ![currentNode isKindOfClass:[HTMLDocument class]]) { 39 | currentNode = currentNode.parentNode; 40 | } 41 | return (HTMLDocument *)currentNode; 42 | } 43 | 44 | - (void)setParentNode:(HTMLNode *)parentNode 45 | { 46 | [self setParentNode:parentNode updateChildren:YES]; 47 | } 48 | 49 | - (void)setParentNode:(HTMLNode *)parentNode updateChildren:(BOOL)updateChildren 50 | { 51 | [_parentNode removeChild:self updateParentNode:NO]; 52 | _parentNode = parentNode; 53 | if (updateChildren) { 54 | [parentNode addChild:self updateParentNode:NO]; 55 | } 56 | } 57 | 58 | - (HTMLElement *)parentElement 59 | { 60 | HTMLNode *parent = self.parentNode; 61 | return [parent isKindOfClass:[HTMLElement class]] ? (HTMLElement *)parent : nil; 62 | } 63 | 64 | - (void)setParentElement:(HTMLElement *)parentElement 65 | { 66 | self.parentNode = parentElement; 67 | } 68 | 69 | - (NSOrderedSet *)children 70 | { 71 | return [_children copy]; 72 | } 73 | 74 | // In order to quickly mutate the children set, we need to pull some shenanigans. From the Key-Value Coding Programming Guide: 75 | // 76 | // > When the default implementation of valueForKey: is invoked on a receiver, the following search pattern is used: 77 | // > 78 | // > 1. Searches the class of the receiver for an accessor method whose name matches the pattern get, , or is, in that order. If such a method is found it is invoked.… 79 | // > 2. Otherwise (no simple accessor method is found), searches the class of the receiver for methods whose names match the patterns countOf and objectInAtIndex: … and AtIndexes:…. 80 | // > If the countOf method and at least one of the other two possible methods are found, a collection proxy object that responds to all NSArray [sic] methods is returned. Each NSArray [sic] message sent to the collection proxy object will result in some combination of countOf, objectInAtIndex:, and AtIndexes: messages being sent to the original receiver of valueForKey:. 81 | // 82 | // From this, we can see that implementing -children stops us at step 1, and our implementation involves copying the set so it is slow. To work around this, we become KVC-compliant for the key "HTMLMutableChildren" and implement the accessors for that key. Since we don't implement -HTMLMutableChildren et al (step 1), our accessors are used instead (step 2), and all is well. 83 | // 84 | // Note that -mutableOrderedSetValueForKey: will still work for the key "children", it'll just be slow. 85 | 86 | - (NSMutableOrderedSet *)mutableChildren 87 | { 88 | return [[HTMLChildrenRelationshipProxy alloc] initWithNode:self children:_children]; 89 | } 90 | 91 | - (NSUInteger)numberOfChildren 92 | { 93 | return _children.count; 94 | } 95 | 96 | - (HTMLNode *)childAtIndex:(NSUInteger)index 97 | { 98 | return _children[index]; 99 | } 100 | 101 | - (NSUInteger)indexOfChild:(HTMLNode *)child 102 | { 103 | return [_children indexOfObject:child]; 104 | } 105 | 106 | - (void)insertObject:(HTMLNode *)node inChildrenAtIndex:(NSUInteger)index 107 | { 108 | [_children insertObject:node atIndex:index]; 109 | [node setParentNode:self updateChildren:NO]; 110 | } 111 | 112 | - (void)insertChildren:(NSArray *)array atIndexes:(NSIndexSet *)indexes 113 | { 114 | [_children insertObjects:array atIndexes:indexes]; 115 | for (HTMLNode *node in array) { 116 | [node setParentNode:self updateChildren:NO]; 117 | } 118 | } 119 | 120 | - (void)removeObjectFromChildrenAtIndex:(NSUInteger)index 121 | { 122 | HTMLNode *node = _children[index]; 123 | [_children removeObjectAtIndex:index]; 124 | [node setParentNode:nil updateChildren:NO]; 125 | } 126 | 127 | - (void)removeChildrenAtIndexes:(NSIndexSet *)indexes 128 | { 129 | NSArray *nodes = [_children objectsAtIndexes:indexes]; 130 | [_children removeObjectsAtIndexes:indexes]; 131 | for (HTMLNode *node in nodes) { 132 | [node setParentNode:nil updateChildren:NO]; 133 | } 134 | } 135 | 136 | - (void)replaceObjectInChildrenAtIndex:(NSUInteger)index withObject:(HTMLNode *)node 137 | { 138 | HTMLNode *old = _children[index]; 139 | [_children replaceObjectAtIndex:index withObject:node]; 140 | [old setParentNode:nil updateChildren:NO]; 141 | [node setParentNode:self updateChildren:NO]; 142 | } 143 | 144 | - (void)addChild:(HTMLNode *)node updateParentNode:(BOOL)updateParentNode 145 | { 146 | [_children addObject:node]; 147 | if (updateParentNode) { 148 | [node setParentNode:self updateChildren:NO]; 149 | } 150 | } 151 | 152 | - (void)removeChild:(HTMLNode *)node updateParentNode:(BOOL)updateParentNode 153 | { 154 | [_children removeObject:node]; 155 | if (updateParentNode) { 156 | [node setParentNode:nil updateChildren:NO]; 157 | } 158 | } 159 | 160 | - (void)insertString:(NSString *)string atChildNodeIndex:(NSUInteger)index 161 | { 162 | id candidate = index > 0 ? _children[index - 1] : nil; 163 | HTMLTextNode *textNode; 164 | if ([candidate isKindOfClass:[HTMLTextNode class]]) { 165 | textNode = candidate; 166 | } else { 167 | textNode = [HTMLTextNode new]; 168 | [[self mutableChildren] insertObject:textNode atIndex:index]; 169 | } 170 | [textNode appendString:string]; 171 | } 172 | 173 | - (NSArray *)childElementNodes 174 | { 175 | NSMutableArray *childElements = [NSMutableArray arrayWithCapacity:self.numberOfChildren]; 176 | for (id node in _children) { 177 | if ([node isKindOfClass:[HTMLElement class]]) { 178 | [childElements addObject:node]; 179 | } 180 | } 181 | return childElements; 182 | } 183 | 184 | - (NSEnumerator *)treeEnumerator 185 | { 186 | return [[HTMLTreeEnumerator alloc] initWithNode:self reversed:NO]; 187 | } 188 | 189 | - (NSEnumerator *)reversedTreeEnumerator 190 | { 191 | return [[HTMLTreeEnumerator alloc] initWithNode:self reversed:YES]; 192 | } 193 | 194 | - (NSString *)textContent 195 | { 196 | NSMutableArray *parts = [NSMutableArray new]; 197 | for (HTMLTextNode *node in self.treeEnumerator) { 198 | if ([node isKindOfClass:[HTMLTextNode class]]) { 199 | [parts addObject:node.data]; 200 | } 201 | } 202 | return [parts componentsJoinedByString:@""]; 203 | } 204 | 205 | - (void)setTextContent:(NSString *)textContent 206 | { 207 | [[self mutableChildren] removeAllObjects]; 208 | if (textContent.length > 0) { 209 | HTMLTextNode *textNode = [[HTMLTextNode alloc] initWithData:textContent]; 210 | [[self mutableChildren] addObject:textNode]; 211 | } 212 | } 213 | 214 | #pragma mark NSCopying 215 | 216 | - (id)copyWithZone:(NSZone *)zone 217 | { 218 | return [[self.class allocWithZone:zone] init]; 219 | } 220 | 221 | @end 222 | 223 | /** 224 | * The proxy returned by -mutableOrderedSetValueForKey: is quite useless, crashing in -removeObject: and -indexOfObject:. Here's an alternate. 225 | */ 226 | @implementation HTMLChildrenRelationshipProxy : NSMutableOrderedSet 227 | 228 | - (id)initWithNode:(HTMLNode *)node children:(NSMutableOrderedSet *)children 229 | { 230 | self = [super init]; 231 | if (!self) return nil; 232 | 233 | _node = node; 234 | _children = children; 235 | 236 | return self; 237 | } 238 | 239 | - (NSUInteger)count 240 | { 241 | return _children.count; 242 | } 243 | 244 | - (id)objectAtIndex:(NSUInteger)index 245 | { 246 | return [_children objectAtIndex:index]; 247 | } 248 | 249 | - (NSUInteger)indexOfObject:(id)object 250 | { 251 | return [_children indexOfObject:object]; 252 | } 253 | 254 | - (void)insertObject:(id)object atIndex:(NSUInteger)index 255 | { 256 | [_node insertObject:object inChildrenAtIndex:index]; 257 | } 258 | 259 | - (void)insertObjects:(NSArray *)objects atIndexes:(NSIndexSet *)indexes 260 | { 261 | [_node insertChildren:objects atIndexes:indexes]; 262 | } 263 | 264 | - (void)replaceObjectAtIndex:(NSUInteger)index withObject:(id)object 265 | { 266 | [_node replaceObjectInChildrenAtIndex:index withObject:object]; 267 | } 268 | 269 | - (void)removeObjectAtIndex:(NSUInteger)index 270 | { 271 | [_node removeObjectFromChildrenAtIndex:index]; 272 | } 273 | 274 | - (void)removeObjectsAtIndexes:(NSIndexSet *)indexes 275 | { 276 | [_node removeChildrenAtIndexes:indexes]; 277 | } 278 | 279 | @end 280 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLOrderedDictionary.h: -------------------------------------------------------------------------------- 1 | // HTMLOrderedDictionary.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import 6 | 7 | /** 8 | * An HTMLOrderedDictionary is a mutable dictionary type that maintains its keys' insertion order. 9 | */ 10 | @interface HTMLOrderedDictionary : NSMutableDictionary 11 | 12 | /** 13 | * Returns the location of a key in the dictionary, or NSNotFound if the key is not present. 14 | */ 15 | - (NSUInteger)indexOfKey:(id)key; 16 | 17 | /** 18 | * Moves or inserts a key in the dictionary, then pairs an object with that key. Throws an exception if either object or key is nil, or if index is out of bounds. 19 | */ 20 | - (void)insertObject:(id)object forKey:(id )key atIndex:(NSUInteger)index; 21 | 22 | /** 23 | * Returns the key at a particular index in the dictionary. Throws an exception if index is out of bounds. 24 | */ 25 | - (id)objectAtIndexedSubscript:(NSUInteger)index; 26 | 27 | /** 28 | * Returns the key at index 0 in the dictionary, or nil if the dictionary is empty. 29 | */ 30 | - (id)firstKey; 31 | 32 | /** 33 | * Returns the key at index (count - 1) in the dictionary, or nil if the dictionary is empty. 34 | */ 35 | - (id)lastKey; 36 | 37 | @end 38 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLOrderedDictionary.m: -------------------------------------------------------------------------------- 1 | // HTMLOrderedDictionary.m 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "HTMLOrderedDictionary.h" 6 | 7 | @implementation HTMLOrderedDictionary 8 | { 9 | CFMutableDictionaryRef _map; 10 | NSMutableArray *_keys; 11 | } 12 | 13 | - (id)initWithCapacity:(NSUInteger)numItems 14 | { 15 | self = [super init]; 16 | if (!self) return nil; 17 | 18 | _map = CFDictionaryCreateMutable(nil, numItems, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); 19 | _keys = [NSMutableArray arrayWithCapacity:numItems]; 20 | 21 | return self; 22 | } 23 | 24 | // Diagnostic needs ignoring on iOS 5. 25 | #pragma clang diagnostic push 26 | #pragma clang diagnostic ignored "-Wmismatched-parameter-types" 27 | - (id)initWithObjects:(const id [])objects forKeys:(const id [])keys count:(NSUInteger)count 28 | #pragma clang diagnostic pop 29 | { 30 | self = [self initWithCapacity:count]; 31 | if (!self) return nil; 32 | 33 | for (NSUInteger i = 0; i < count; i++) { 34 | id object = objects[i]; 35 | id key = keys[i]; 36 | 37 | if (!object) [NSException raise:NSInvalidArgumentException format:@"%@ object at %@ cannot be nil", NSStringFromSelector(_cmd), @(i)]; 38 | if (!key) [NSException raise:NSInvalidArgumentException format:@"%@ key at %@ cannot be nil", NSStringFromSelector(_cmd), @(i)]; 39 | 40 | self[keys[i]] = objects[i]; 41 | } 42 | 43 | return self; 44 | } 45 | 46 | // iOS 8 adds the NS_DESIGNATED_INITIALIZER attribute. Someday we should support that, but for now let's conveniently ignore it. 47 | #pragma clang diagnostic push 48 | #pragma clang diagnostic ignored "-Wobjc-designated-initializers" 49 | 50 | - (id)init 51 | { 52 | return [self initWithCapacity:0]; 53 | } 54 | 55 | - (id)initWithCoder:(NSCoder *)coder 56 | { 57 | NSDictionary *map = [coder decodeObjectForKey:@"map"]; 58 | NSArray *keys = [coder decodeObjectForKey:@"keys"]; 59 | HTMLOrderedDictionary *dictionary = [self initWithCapacity:keys.count]; 60 | for (id key in keys) { 61 | dictionary[key] = map[key]; 62 | } 63 | return dictionary; 64 | } 65 | 66 | #pragma clang diagnostic pop 67 | 68 | - (void)dealloc 69 | { 70 | CFRelease(_map); 71 | } 72 | 73 | - (Class)classForKeyedArchiver 74 | { 75 | return [self class]; 76 | } 77 | 78 | - (void)encodeWithCoder:(NSCoder *)coder 79 | { 80 | [coder encodeObject:(__bridge NSDictionary *)_map forKey:@"map"]; 81 | [coder encodeObject:_keys forKey:@"keys"]; 82 | } 83 | 84 | - (id)copyWithZone:(NSZone *)zone 85 | { 86 | HTMLOrderedDictionary *copy = [[[self class] allocWithZone:zone] initWithCapacity:self.count]; 87 | [copy addEntriesFromDictionary:self]; 88 | return copy; 89 | } 90 | 91 | - (id)mutableCopyWithZone:(NSZone *)zone 92 | { 93 | return [self copyWithZone:zone]; 94 | } 95 | 96 | - (NSUInteger)count 97 | { 98 | return _keys.count; 99 | } 100 | 101 | - (id)objectForKey:(id)key 102 | { 103 | return (__bridge id)CFDictionaryGetValue(_map, (__bridge const void *)key); 104 | } 105 | 106 | - (NSUInteger)indexOfKey:(id)key 107 | { 108 | if ([self objectForKey:key]) { 109 | return [_keys indexOfObject:key]; 110 | } else { 111 | return NSNotFound; 112 | } 113 | } 114 | 115 | - (id)firstKey 116 | { 117 | return _keys.firstObject; 118 | } 119 | 120 | - (id)lastKey 121 | { 122 | return _keys.lastObject; 123 | } 124 | 125 | - (void)setObject:(id)object forKey:(id)key 126 | { 127 | if (!object) [NSException raise:NSInvalidArgumentException format:@"%@ object cannot be nil", NSStringFromSelector(_cmd)]; 128 | if (!key) [NSException raise:NSInvalidArgumentException format:@"%@ key cannot be nil", NSStringFromSelector(_cmd)]; 129 | 130 | [self insertObject:object forKey:key atIndex:self.count]; 131 | } 132 | 133 | - (void)removeObjectForKey:(id)key 134 | { 135 | if (!key) [NSException raise:NSInvalidArgumentException format:@"%@ key cannot be nil", NSStringFromSelector(_cmd)]; 136 | 137 | if ([self objectForKey:key]) { 138 | CFDictionaryRemoveValue(_map, (__bridge const void *)key); 139 | [_keys removeObject:key]; 140 | } 141 | } 142 | 143 | - (void)insertObject:(id)object forKey:(id)key atIndex:(NSUInteger)index 144 | { 145 | if (!object) [NSException raise:NSInvalidArgumentException format:@"%@ object cannot be nil", NSStringFromSelector(_cmd)]; 146 | if (!key) [NSException raise:NSInvalidArgumentException format:@"%@ key cannot be nil", NSStringFromSelector(_cmd)]; 147 | if (index > self.count) [NSException raise:NSRangeException format:@"%@ index %@ beyond count %@ of array", NSStringFromSelector(_cmd), @(index), @(self.count)]; 148 | 149 | if (![self objectForKey:key]) { 150 | key = [key copy]; 151 | [_keys insertObject:key atIndex:index]; 152 | } 153 | CFDictionarySetValue(_map, (__bridge const void *)key, (__bridge const void *)object); 154 | } 155 | 156 | - (NSEnumerator *)keyEnumerator 157 | { 158 | return _keys.objectEnumerator; 159 | } 160 | 161 | - (id)objectAtIndexedSubscript:(NSUInteger)index 162 | { 163 | return _keys[index]; 164 | } 165 | 166 | - (NSUInteger)countByEnumeratingWithState:(NSFastEnumerationState *)state objects:(__unsafe_unretained id [])buffer count:(NSUInteger)len 167 | { 168 | return [_keys countByEnumeratingWithState:state objects:buffer count:len]; 169 | } 170 | 171 | @end 172 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLParser.h: -------------------------------------------------------------------------------- 1 | // HTMLParser.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import 6 | #import "HTMLDocument.h" 7 | #import "HTMLElement.h" 8 | 9 | /** 10 | * An HTMLParser turns a string into an HTMLDocument. 11 | * 12 | * For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html 13 | * 14 | * @see HTMLTokenizer 15 | */ 16 | @interface HTMLParser : NSObject 17 | 18 | /** 19 | * This is a designated initializer. 20 | * 21 | * @param string A string of HTML. 22 | * @param context A context element used for parsing a fragment of HTML, or nil if the fragment parsing algorithm is not to be used. 23 | * 24 | * For more information on the context parameter, see http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#parsing-html-fragments 25 | */ 26 | - (id)initWithString:(NSString *)string context:(HTMLElement *)context; 27 | 28 | /** 29 | * Instances of NSString representing the errors encountered while parsing the document. 30 | */ 31 | @property (readonly, copy, nonatomic) NSArray *errors; 32 | 33 | /** 34 | * The parsed document. Lazily created on first access. 35 | */ 36 | @property (readonly, strong, nonatomic) HTMLDocument *document; 37 | 38 | @end 39 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLPreprocessedInputStream.h: -------------------------------------------------------------------------------- 1 | // HTMLPreprocessedInputStream.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import 6 | 7 | /** 8 | * An HTMLPreprocessedInputStream handles carriage returns, disallowed characters, and surrogate pairs. 9 | * 10 | * For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream 11 | */ 12 | @interface HTMLPreprocessedInputStream : NSObject 13 | 14 | /** 15 | * This is the designated initializer. 16 | */ 17 | - (id)initWithString:(NSString *)string; 18 | 19 | /** 20 | * The string backing an input stream. 21 | */ 22 | @property (readonly, copy, nonatomic) NSString *string; 23 | 24 | /** 25 | * Consumes matching input characters. 26 | * 27 | * @param string The string to match. The whole string must match for any characters to be consumed. 28 | * @param caseSensitive YES if matching should consider ASCII case, otherwise NO. 29 | * 30 | * @return YES if input characters were consumed, otherwise NO. 31 | */ 32 | - (BOOL)consumeString:(NSString *)string matchingCase:(BOOL)caseSensitive; 33 | 34 | /** 35 | * Continually consumes characters until a certain character is encountered. 36 | * 37 | * @param predicate A block that is called with each character consumed. When the block returns YES, character consumption stops. 38 | * 39 | * @return A string of the characters consumed, or nil if the stream is fully consumed before the block returns YES. 40 | */ 41 | - (NSString *)consumeCharactersUpToFirstPassingTest:(BOOL(^)(UTF32Char character))test; 42 | 43 | /** 44 | * Consumes characters matching hexadecimal digits. 45 | * 46 | * @param number On return, the number represented by the matched digits. Pass NULL to skip over the digits. 47 | * 48 | * @return YES if any input characters were consumed, otherwise NO. 49 | */ 50 | - (BOOL)consumeHexInt:(out unsigned int *)number; 51 | 52 | /** 53 | * Consumes characters matching decimal digits. 54 | * 55 | * @param number On return, the number represented by the matched digits. Pass NULL to skip over the digits. 56 | * 57 | * @return YES if any input characters were consumed, otherwise NO. 58 | */ 59 | - (BOOL)consumeUnsignedInt:(out unsigned int *)number; 60 | 61 | /** 62 | * Returns, but does not consume, the next input character. No parse errors are emitted. If a stream is fully consumed, returns EOF. 63 | */ 64 | @property (readonly, assign, nonatomic) UTF32Char nextInputCharacter; 65 | 66 | /** 67 | * Returns a string of characters from the stream's current position. The characters are not preprocessed for carriage returns, and no parse errors are emitted. 68 | * 69 | * @param length The maximum length of the returned string. 70 | * 71 | * @return A string, or nil if the stream has no characters remaining. 72 | */ 73 | - (NSString *)nextUnprocessedCharactersWithMaximumLength:(NSUInteger)length; 74 | 75 | /** 76 | * Returns a scanner for the stream's unprocessed characters whose scan location is set to the stream's current location. 77 | */ 78 | - (NSScanner *)unprocessedScanner; 79 | 80 | /** 81 | * Returns the next input character and moves scanLocation ahead, emitting parse errors as appropriate. If a stream is fully consumed, returns EOF. 82 | */ 83 | - (UTF32Char)consumeNextInputCharacter; 84 | 85 | /** 86 | * Set the next input character to the current input character. This method is idempotent. 87 | */ 88 | - (void)reconsumeCurrentInputCharacter; 89 | 90 | /** 91 | * Rewinds the stream. 92 | */ 93 | - (void)unconsumeInputCharacters:(NSUInteger)numberOfCharactersToUnconsume; 94 | 95 | /** 96 | * A block called whenever a parse error occurs. The block has no return value and takes as parameters: 97 | * 98 | * @param error A description of the error. 99 | */ 100 | @property (copy, nonatomic) void (^errorBlock)(NSString *error); 101 | 102 | @end 103 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLPreprocessedInputStream.m: -------------------------------------------------------------------------------- 1 | // HTMLPreprocessedInputStream.m 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "HTMLPreprocessedInputStream.h" 6 | #import "HTMLString.h" 7 | 8 | @implementation HTMLPreprocessedInputStream 9 | { 10 | NSUInteger _scanLocation; 11 | CFStringInlineBuffer _buffer; 12 | BOOL _reconsume; 13 | UTF32Char _currentInputCharacter; 14 | } 15 | 16 | - (id)initWithString:(NSString *)string 17 | { 18 | self = [super init]; 19 | if (!self) return nil; 20 | _string = [string copy]; 21 | CFStringInitInlineBuffer((__bridge CFStringRef)_string, &_buffer, CFRangeMake(0, _string.length)); 22 | return self; 23 | } 24 | 25 | - (BOOL)consumeString:(NSString *)string matchingCase:(BOOL)caseSensitive 26 | { 27 | NSScanner *scanner = [self unprocessedScanner]; 28 | scanner.caseSensitive = caseSensitive; 29 | BOOL ok = [scanner scanString:string intoString:nil]; 30 | if (ok) { 31 | _scanLocation = scanner.scanLocation; 32 | } 33 | return ok; 34 | } 35 | 36 | - (NSString *)consumeCharactersUpToFirstPassingTest:(BOOL(^)(UTF32Char character))test 37 | { 38 | NSMutableString *consumed = [NSMutableString new]; 39 | for (;;) { 40 | UTF32Char c = [self consumeNextInputCharacter]; 41 | if (c == (UTF32Char)EOF) break; 42 | if (test(c)) { 43 | [self reconsumeCurrentInputCharacter]; 44 | break; 45 | } 46 | AppendLongCharacter(consumed, c); 47 | } 48 | if (consumed.length > 0) { 49 | return consumed; 50 | } else { 51 | return nil; 52 | } 53 | } 54 | 55 | - (BOOL)consumeHexInt:(out unsigned int *)number 56 | { 57 | // NSScanner's -scanHexInt: allows for a leading "0x" or "0X", while the HTML spec does not. 58 | NSCharacterSet *hexSet = [NSCharacterSet characterSetWithCharactersInString:@"0123456789ABCDEFabcdef"]; 59 | BOOL justHexDigits = [[self unprocessedScanner] scanCharactersFromSet:hexSet intoString:nil]; 60 | if (!justHexDigits) return NO; 61 | NSScanner *scanner = [self unprocessedScanner]; 62 | BOOL ok = [scanner scanHexInt:number]; 63 | if (ok) { 64 | _scanLocation = scanner.scanLocation; 65 | } 66 | return ok; 67 | } 68 | 69 | - (BOOL)consumeUnsignedInt:(out unsigned int *)outNumber 70 | { 71 | NSScanner *scanner = [self unprocessedScanner]; 72 | long long number; 73 | BOOL ok = [scanner scanLongLong:&number]; 74 | if (!ok || number < 0) return NO; 75 | _scanLocation = scanner.scanLocation; 76 | if (outNumber) { 77 | if (number > (long long)UINT_MAX) { 78 | *outNumber = UINT_MAX; 79 | } else { 80 | *outNumber = (unsigned int)number; 81 | } 82 | } 83 | return ok; 84 | } 85 | 86 | - (NSScanner *)unprocessedScanner 87 | { 88 | NSScanner *scanner = [NSScanner scannerWithString:_string]; 89 | scanner.charactersToBeSkipped = nil; 90 | scanner.scanLocation = _scanLocation; 91 | return scanner; 92 | } 93 | 94 | - (UTF32Char)nextInputCharacter 95 | { 96 | return [self nextInputCharacterAndConsume:NO]; 97 | } 98 | 99 | - (UTF32Char)consumeNextInputCharacter 100 | { 101 | return [self nextInputCharacterAndConsume:YES]; 102 | } 103 | 104 | - (UTF32Char)nextInputCharacterAndConsume:(BOOL)consume 105 | { 106 | if (_reconsume) { 107 | if (consume) { 108 | _reconsume = NO; 109 | } 110 | return _currentInputCharacter; 111 | } 112 | NSUInteger advance = 0; 113 | UTF32Char c = CFStringGetCharacterFromInlineBuffer(&_buffer, _scanLocation + advance); 114 | if (c == 0 && _scanLocation + advance >= _string.length) { 115 | c = EOF; 116 | } else { 117 | advance++; 118 | } 119 | if (CFStringIsSurrogateHighCharacter(c)) { 120 | unichar low = CFStringGetCharacterFromInlineBuffer(&_buffer, _scanLocation + advance); 121 | if (CFStringIsSurrogateLowCharacter(low)) { 122 | advance++; 123 | unichar high = c; 124 | c = CFStringGetLongCharacterForSurrogatePair(high, low); 125 | } else { 126 | if (self.errorBlock) { 127 | self.errorBlock(@"Isolated lead surrogate"); 128 | } 129 | // SPEC The spec does not say to emit a replacement character here. We'll do so anyway because of html5lib-tests. 130 | c = 0xFFFD; 131 | } 132 | } else if (CFStringIsSurrogateLowCharacter(c)) { 133 | if (self.errorBlock) { 134 | self.errorBlock(@"Isloated trail surrogate"); 135 | } 136 | // SPEC The spec does not say to emit a replacement character here. We'll do so anyway because of html5lib-tests. 137 | c = 0xFFFD; 138 | } else if (c == '\r') { 139 | c = '\n'; 140 | if (CFStringGetCharacterFromInlineBuffer(&_buffer, _scanLocation + advance) == '\n') { 141 | advance++; 142 | } 143 | } 144 | if (is_undefined_or_disallowed(c)) { 145 | if (self.errorBlock) { 146 | self.errorBlock(@"Noncharacter or disallowed control character"); 147 | } 148 | } 149 | if (consume) { 150 | _scanLocation += advance; 151 | _currentInputCharacter = c; 152 | } 153 | return c; 154 | } 155 | 156 | - (NSString *)nextUnprocessedCharactersWithMaximumLength:(NSUInteger)length 157 | { 158 | NSRange range = NSMakeRange(_scanLocation, length); 159 | if (NSMaxRange(range) > _string.length) { 160 | range.length = _string.length - range.location; 161 | } 162 | if (range.length > 0) { 163 | return [_string substringWithRange:range]; 164 | } else { 165 | return nil; 166 | } 167 | } 168 | 169 | - (void)reconsumeCurrentInputCharacter 170 | { 171 | _reconsume = YES; 172 | } 173 | 174 | - (void)unconsumeInputCharacters:(NSUInteger)numberOfCharactersToUnconsume 175 | { 176 | // TODO skip over ignored carriage returns 177 | // TODO skip over surrogate second halves 178 | // TODO bounds checking 179 | // TODO consider reconsume 180 | _scanLocation -= numberOfCharactersToUnconsume; 181 | } 182 | 183 | @end 184 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLQuirksMode.h: -------------------------------------------------------------------------------- 1 | // HTMLQuirksMode.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | /** 6 | * HTMLDocumentQuirksMode can change parts of the parsing algorithm. 7 | * 8 | * For more information, see http://dom.spec.whatwg.org/#concept-document-quirks 9 | */ 10 | typedef NS_ENUM(NSInteger, HTMLQuirksMode) 11 | { 12 | /** 13 | * The default quirks mode. 14 | */ 15 | HTMLQuirksModeNoQuirks, 16 | 17 | /** 18 | * A quirks mode for old versions of HTML. 19 | */ 20 | HTMLQuirksModeQuirks, 21 | 22 | /** 23 | * A quirks mode for (XHTML 1.0 or HTML 4.01) (Frameset or Transitional). 24 | */ 25 | HTMLQuirksModeLimitedQuirks, 26 | }; 27 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLReader.h: -------------------------------------------------------------------------------- 1 | // HTMLReader.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "HTMLDocument.h" 6 | #import "HTMLSelector.h" 7 | #import "HTMLSerialization.h" 8 | #import "NSString+HTMLEntities.h" 9 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLSelector.h: -------------------------------------------------------------------------------- 1 | // HTMLSelector.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import 6 | #import "HTMLElement.h" 7 | 8 | /** 9 | * An HTMLSelector concisely describes a set of nodes. 10 | * 11 | * It implements (CSS) Selectors Level 3 http://www.w3.org/TR/css3-selectors/ per the WHATWG HTML spec with the following exceptions: 12 | * 13 | * @li The target pseudo-class (:target) is not supported. 14 | * @li The :lang() and :dir() pseudo-classes are not supported. 15 | * @li Pseudo-elements (including ::first-line, ::first-leter, ::before, ::after) are not supported. 16 | * @li The :not() pseudo-class supports any selector. (The spec only supports a simple selector.) 17 | */ 18 | @interface HTMLSelector : NSObject 19 | 20 | /** 21 | * Creates and initializes a new selector. 22 | */ 23 | + (instancetype)selectorForString:(NSString *)selectorString; 24 | 25 | /** 26 | * This is the designated initializer. 27 | */ 28 | - (id)initWithString:(NSString *)selectorString; 29 | 30 | /** 31 | * A string representation of the selector. 32 | */ 33 | @property (readonly, copy, nonatomic) NSString *string; 34 | 35 | /** 36 | * Whether or not an element is matched by the selector. 37 | */ 38 | - (BOOL)matchesElement:(HTMLElement *)element; 39 | 40 | /** 41 | * The error encountered when parsing the selector string, or nil if there was no error. Errors are in the HTMLSelectorErrorDomain. 42 | */ 43 | @property (readonly, strong, nonatomic) NSError *error; 44 | 45 | @end 46 | 47 | /** 48 | * Returns a character set containing all CSS whitespace characters. This is not necessarily identical to `+[NSCharacterSet whitespaceCharacterSet]` or `+[NSCharacterSet whitespaceAndNewlineCharacterSet]`. 49 | */ 50 | extern NSCharacterSet * HTMLSelectorWhitespaceCharacterSet(void); 51 | 52 | /** 53 | * Error domain for all selector parse errors. Errors in this domain describe in localizedFailureReason where in the input the error occurred. 54 | */ 55 | extern NSString * const HTMLSelectorErrorDomain; 56 | 57 | /** 58 | * The corresponding value is an NSString of the input that caused the error. 59 | */ 60 | extern NSString * const HTMLSelectorInputStringErrorKey; 61 | 62 | /** 63 | * The corresponding value is an NSNumber of the 0-based index into the input string at which the parse error occurred. 64 | */ 65 | extern NSString * const HTMLSelectorLocationErrorKey; 66 | 67 | /** 68 | * HTMLSelector expands the HTMLNode class to search for matches. 69 | */ 70 | @interface HTMLNode (HTMLSelector) 71 | 72 | /** 73 | * Returns the nodes matched by selectorString, or nil if the string could not be parsed. 74 | */ 75 | - (NSArray *)nodesMatchingSelector:(NSString *)selectorString; 76 | 77 | /** 78 | * Returns the first node matched by selectorString, or nil if there is no such node or the string could not be parsed. 79 | */ 80 | - (HTMLElement *)firstNodeMatchingSelector:(NSString *)selectorString; 81 | 82 | /** 83 | * Returns the nodes matched by selector. 84 | */ 85 | - (NSArray *)nodesMatchingParsedSelector:(HTMLSelector *)selector; 86 | 87 | /** 88 | * Returns the first node matched by selector, or nil if there is no such node. 89 | */ 90 | - (HTMLElement *)firstNodeMatchingParsedSelector:(HTMLSelector *)selector; 91 | 92 | @end 93 | 94 | /** 95 | * HTMLNthExpression represents the expression in an :nth-child (or similar) pseudo-class. 96 | */ 97 | typedef struct { 98 | 99 | /** 100 | * The coefficient. 101 | */ 102 | NSInteger n; 103 | 104 | /** 105 | * The constant. 106 | */ 107 | NSInteger c; 108 | } HTMLNthExpression; 109 | 110 | /** 111 | * Returns an initialized HTMLNthExpression. 112 | * 113 | * @param n The coefficient. 114 | * @param c The constant. 115 | */ 116 | extern HTMLNthExpression HTMLNthExpressionMake(NSInteger n, NSInteger c); 117 | 118 | /** 119 | * Returns YES if the two expressions are equal, or NO otherwise. 120 | */ 121 | extern BOOL HTMLNthExpressionEqualToNthExpression(HTMLNthExpression a, HTMLNthExpression b); 122 | 123 | /** 124 | * Translates a string resembling one of the forms `nx + c`, `odd`, or `even` into an HTMLNthExpression `{n, c}`. 125 | */ 126 | extern HTMLNthExpression HTMLNthExpressionFromString(NSString *string); 127 | 128 | /** 129 | * An HTMLNthExpression equivalent to the expression "odd". 130 | */ 131 | extern const HTMLNthExpression HTMLNthExpressionOdd; 132 | 133 | /** 134 | * An HTMLNthExpression equivalent to the expression "even". 135 | */ 136 | extern const HTMLNthExpression HTMLNthExpressionEven; 137 | 138 | /** 139 | * An invalid HTMLNthExpression. 140 | */ 141 | extern const HTMLNthExpression HTMLNthExpressionInvalid; 142 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLSerialization.h: -------------------------------------------------------------------------------- 1 | // HTMLSerialization.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "HTMLNode.h" 6 | 7 | @interface HTMLNode (Serialization) 8 | 9 | /** 10 | * Describes the entire subtree rooted at the node. 11 | */ 12 | - (NSString *)recursiveDescription; 13 | 14 | /** 15 | * Returns the serialized HTML fragment of this node's children. 16 | * 17 | * For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#serializing-html-fragments 18 | */ 19 | - (NSString *)innerHTML; 20 | 21 | /** 22 | * Returns the serialized HTML fragment of this node. 23 | */ 24 | - (NSString *)serializedFragment; 25 | 26 | @end 27 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLSerialization.m: -------------------------------------------------------------------------------- 1 | // HTMLSerialization.m 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "HTMLSerialization.h" 6 | #import "HTMLComment.h" 7 | #import "HTMLDocumentType.h" 8 | #import "HTMLElement.h" 9 | #import "HTMLString.h" 10 | #import "HTMLTextNode.h" 11 | 12 | @implementation HTMLNode (Serialization) 13 | 14 | - (NSString *)recursiveDescription 15 | { 16 | NSMutableString *string = [NSMutableString new]; 17 | RecursiveDescriptionHelper(self, string, 0); 18 | return string; 19 | } 20 | 21 | static void RecursiveDescriptionHelper(HTMLNode *self, NSMutableString *string, NSInteger indentLevel) 22 | { 23 | if (indentLevel > 0) { 24 | [string appendString:[@"\n|" stringByPaddingToLength:indentLevel * 4 + 2 25 | withString:@" " 26 | startingAtIndex:0]]; 27 | } 28 | [string appendString:self.description]; 29 | for (HTMLNode *node in self.children) { 30 | RecursiveDescriptionHelper(node, string, indentLevel + 1); 31 | } 32 | } 33 | 34 | - (NSString *)innerHTML 35 | { 36 | NSArray *fragments = [self.children.array valueForKey:@"serializedFragment"]; 37 | return [fragments componentsJoinedByString:@""]; 38 | } 39 | 40 | - (NSString *)serializedFragment 41 | { 42 | [self doesNotRecognizeSelector:_cmd]; 43 | return nil; 44 | } 45 | 46 | @end 47 | 48 | @implementation HTMLComment (Serialization) 49 | 50 | - (NSString *)description 51 | { 52 | NSString *truncatedData = self.data; 53 | if (truncatedData.length > 37) { 54 | truncatedData = [[truncatedData substringToIndex:37] stringByAppendingString:@"…"]; 55 | } 56 | return [NSString stringWithFormat:@"<%@: %p >", self.class, self, truncatedData]; 57 | } 58 | 59 | - (NSString *)serializedFragment 60 | { 61 | return [NSString stringWithFormat:@"", self.data]; 62 | } 63 | 64 | @end 65 | 66 | @implementation HTMLDocumentType (Serialization) 67 | 68 | - (NSString *)description 69 | { 70 | NSMutableString *description = [NSMutableString new]; 71 | [description appendFormat:@"<%@: %p 0) { 75 | [description appendFormat:@" %@", name]; 76 | } 77 | 78 | NSString *publicIdentifier = self.publicIdentifier; 79 | NSString *systemIdentifier = self.systemIdentifier; 80 | if (publicIdentifier.length > 0 || systemIdentifier.length > 0) { 81 | [description appendFormat:@" \"%@\" \"%@\"", publicIdentifier, systemIdentifier]; 82 | } 83 | 84 | [description appendString:@"> >"]; 85 | return description; 86 | } 87 | 88 | - (NSString *)serializedFragment 89 | { 90 | return [NSString stringWithFormat:@"", self.name]; 91 | } 92 | 93 | @end 94 | 95 | @implementation HTMLElement (Serialization) 96 | 97 | - (NSString *)description 98 | { 99 | NSMutableString *description = [NSMutableString new]; 100 | [description appendFormat:@"<%@: %p <", self.class, self]; 101 | 102 | if (self.namespace == HTMLNamespaceMathML) { 103 | [description appendString:@"math "]; 104 | } else if (self.namespace == HTMLNamespaceSVG) { 105 | [description appendString:@"svg "]; 106 | } 107 | 108 | [description appendString:self.tagName]; 109 | 110 | [self.attributes enumerateKeysAndObjectsUsingBlock:^(id name, id value, BOOL *stop) { 111 | [description appendFormat:@" %@=\"%@\"", name, value]; 112 | }]; 113 | 114 | [description appendFormat:@"> %@ child", @(self.numberOfChildren)]; 115 | if (self.numberOfChildren != 1) { 116 | [description appendString:@"ren"]; 117 | } 118 | 119 | [description appendString:@">"]; 120 | return description; 121 | } 122 | 123 | - (NSString *)serializedFragment 124 | { 125 | NSMutableString *fragment = [NSMutableString new]; 126 | [fragment appendFormat:@"<%@", self.tagName]; 127 | 128 | [self.attributes enumerateKeysAndObjectsUsingBlock:^(NSString *name, NSString *value, BOOL *stop) { 129 | if ([name isEqualToString:@"xmlns:xmlns"]) { 130 | name = @"xmlns"; 131 | } 132 | NSMutableString *escapedValue = [value mutableCopy]; 133 | void (^replace)(id, id) = ^(NSString *search, NSString *replace) { 134 | NSRange range = NSMakeRange(0, escapedValue.length); 135 | [escapedValue replaceOccurrencesOfString:search withString:replace options:0 range:range]; 136 | }; 137 | replace(@"&", @"&"); 138 | replace(@"\u00A0", @" "); 139 | replace(@"\"", @"""); 140 | [fragment appendFormat:@" %@=\"%@\"", name, escapedValue]; 141 | }]; 142 | 143 | [fragment appendString:@">"]; 144 | 145 | if (StringIsEqualToAnyOf(self.tagName, @"area", @"base", @"basefont", @"bgsound", @"br", @"col", @"embed", @"frame", @"hr", @"img", @"input", @"keygen", @"link", @"menuitem", @"meta", @"param", @"source", @"track", @"wbr")) { 146 | return fragment; 147 | } 148 | 149 | if (StringIsEqualToAnyOf(self.tagName, @"pre", @"textarea", @"listing")) { 150 | if ([self.children.firstObject isKindOfClass:[HTMLTextNode class]]) { 151 | HTMLTextNode *textNode = self.children.firstObject; 152 | if ([textNode.data hasPrefix:@"\n"]) { 153 | [fragment appendString:@"\n"]; 154 | } 155 | } 156 | } 157 | 158 | [fragment appendString:self.innerHTML]; 159 | [fragment appendFormat:@"", self.tagName]; 160 | return fragment; 161 | } 162 | 163 | @end 164 | 165 | @implementation HTMLTextNode (Serialization) 166 | 167 | - (NSString *)description 168 | { 169 | NSString *truncatedData = self.data; 170 | if (truncatedData.length > 37) { 171 | truncatedData = [[truncatedData substringToIndex:37] stringByAppendingString:@"…"]; 172 | } 173 | return [NSString stringWithFormat:@"<%@: %p '%@'>", self.class, self, truncatedData]; 174 | } 175 | 176 | - (NSString *)serializedFragment 177 | { 178 | NSString *parentTagName = self.parentElement.tagName; 179 | if (StringIsEqualToAnyOf(parentTagName, @"style", @"script", @"xmp", @"iframe", @"noembed", @"noframes", @"plaintext", @"noscript")) { 180 | return self.data; 181 | } else { 182 | NSString *escaped = [self.data stringByReplacingOccurrencesOfString:@"&" withString:@"&"]; 183 | escaped = [escaped stringByReplacingOccurrencesOfString:@"\u00A0" withString:@" "]; 184 | escaped = [escaped stringByReplacingOccurrencesOfString:@"<" withString:@"<"]; 185 | escaped = [escaped stringByReplacingOccurrencesOfString:@">" withString:@">"]; 186 | return escaped; 187 | } 188 | } 189 | 190 | @end 191 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLString.h: -------------------------------------------------------------------------------- 1 | // HTMLString.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import 6 | 7 | // These are internal methods, so they should stay out of categories on Foundation classes. 8 | 9 | /** 10 | * Append a single Unicode code point to an NSMutableString. This takes care of code points that require the use of surrogate pairs. 11 | * 12 | * @param self The NSMutableString that will get a character. 13 | * @param character The character to append. 14 | */ 15 | extern void AppendLongCharacter(NSMutableString *self, UTF32Char character); 16 | 17 | /** 18 | * Execute a block on every Unicode code point in a string. This takes care of code points that require the use of surrogate pairs. 19 | * 20 | * @param self The string whose code points are enumerated. 21 | * @param block The block to execute, which has no return value and takes a code point as its sole parameter. 22 | */ 23 | extern void EnumerateLongCharacters(NSString *self, void (^block)(UTF32Char character)); 24 | 25 | /** 26 | * Returns a string consisting solely of the character. 27 | */ 28 | extern NSString * StringWithLongCharacter(UTF32Char character); 29 | 30 | /** 31 | * Whether or not the character is a whitespace character. 32 | * 33 | * For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/common-microsyntaxes.html#space-character 34 | */ 35 | extern BOOL is_whitespace(UTF32Char c); 36 | 37 | /** 38 | * Whether or not the character is allowed to be in an HTML document. 39 | * 40 | * For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream 41 | */ 42 | extern BOOL is_undefined_or_disallowed(UTF32Char c); 43 | 44 | /** 45 | * @return YES if the first parameter is equal to any subsequent parameter, otherwise NO. 46 | */ 47 | #define StringIsEqualToAnyOf(search, ...) ({ \ 48 | NSString *s = (search); \ 49 | __unsafe_unretained NSString *potentials[] = { __VA_ARGS__ }; \ 50 | BOOL found = NO; \ 51 | size_t count = sizeof(potentials) / sizeof(potentials[0]); \ 52 | for (NSUInteger i = 0; i < count; i++) { \ 53 | if ([s isEqualToString:potentials[i]]) { \ 54 | found = YES; \ 55 | break; \ 56 | } \ 57 | } \ 58 | found; \ 59 | }) 60 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLString.m: -------------------------------------------------------------------------------- 1 | // HTMLString.m 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "HTMLString.h" 6 | 7 | void AppendLongCharacter(NSMutableString *self, UTF32Char character) 8 | { 9 | unichar surrogates[2]; 10 | Boolean two = CFStringGetSurrogatePairForLongCharacter(character, surrogates); 11 | CFStringAppendCharacters((__bridge CFMutableStringRef)self, surrogates, two ? 2 : 1); 12 | } 13 | 14 | void EnumerateLongCharacters(NSString *self, void (^block)(UTF32Char character)) 15 | { 16 | CFStringInlineBuffer buffer; 17 | CFIndex length = self.length; 18 | if (length == 0) return; 19 | CFStringInitInlineBuffer((__bridge CFStringRef)self, &buffer, CFRangeMake(0, length)); 20 | unichar highSurrogate = 0; 21 | for (CFIndex i = 0; i < length; i++) { 22 | unichar character = CFStringGetCharacterFromInlineBuffer(&buffer, i); 23 | if (highSurrogate) { 24 | if (CFStringIsSurrogateLowCharacter(character)) { 25 | block(CFStringGetLongCharacterForSurrogatePair(highSurrogate, character)); 26 | } else { 27 | block(highSurrogate); 28 | block(character); 29 | } 30 | highSurrogate = 0; 31 | } else if (CFStringIsSurrogateHighCharacter(character) && i < length - 1) { 32 | highSurrogate = character; 33 | } else { 34 | block(character); 35 | } 36 | } 37 | } 38 | 39 | NSString * StringWithLongCharacter(UTF32Char character) 40 | { 41 | unichar surrogates[2]; 42 | if (CFStringGetSurrogatePairForLongCharacter(character, surrogates)) { 43 | return [NSString stringWithFormat:@"%C%C", surrogates[0], surrogates[1]]; 44 | } else { 45 | return [NSString stringWithFormat:@"%C", surrogates[0]]; 46 | } 47 | } 48 | 49 | BOOL is_whitespace(UTF32Char c) 50 | { 51 | return c == '\t' || c == '\n' || c == '\f' || c == ' '; 52 | } 53 | 54 | BOOL is_undefined_or_disallowed(UTF32Char c) 55 | { 56 | return ((c >= 0x0001 && c <= 0x0008) || 57 | (c >= 0x000E && c <= 0x001F) || 58 | (c >= 0x007F && c <= 0x009F) || 59 | (c >= 0xFDD0 && c <= 0xFDEF) || 60 | c == 0x000B || 61 | c == 0xFFFE || 62 | c == 0xFFFF || 63 | c == 0x1FFFE || 64 | c == 0x1FFFF || 65 | c == 0x2FFFE || 66 | c == 0x2FFFF || 67 | c == 0x3FFFE || 68 | c == 0x3FFFF || 69 | c == 0x4FFFE || 70 | c == 0x4FFFF || 71 | c == 0x5FFFE || 72 | c == 0x5FFFF || 73 | c == 0x6FFFE || 74 | c == 0x6FFFF || 75 | c == 0x7FFFE || 76 | c == 0x7FFFF || 77 | c == 0x8FFFE || 78 | c == 0x8FFFF || 79 | c == 0x9FFFE || 80 | c == 0x9FFFF || 81 | c == 0xAFFFE || 82 | c == 0xAFFFF || 83 | c == 0xBFFFE || 84 | c == 0xBFFFF || 85 | c == 0xCFFFE || 86 | c == 0xCFFFF || 87 | c == 0xDFFFE || 88 | c == 0xDFFFF || 89 | c == 0xEFFFE || 90 | c == 0xEFFFF || 91 | c == 0xFFFFE || 92 | c == 0xFFFFF || 93 | c == 0x10FFFE || 94 | c == 0x10FFFF); 95 | } 96 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLSupport.h: -------------------------------------------------------------------------------- 1 | // HTMLSupport.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import 6 | 7 | // NS_ENUM was defined circa iOS 6 and OS X 10.8, so we can't count on its presence. 8 | #ifndef NS_ENUM 9 | # define NS_ENUM(_type, _name) _type _name; enum 10 | #endif 11 | 12 | // -[NSArray firstObject] was only publicly exposed in iOS 7 and OS X 10.9, but it was implemented much earlier. 13 | #if (__IPHONE_OS_VERSION_MIN_REQUIRED >= 40000 && __IPHONE_OS_VERSION_MIN_REQUIRED < 70000) || \ 14 | (__MAC_OS_X_VERSION_MIN_REQUIRED >= 1060 && __MAC_OS_X_VERSION_MIN_REQUIRED < 1090) 15 | @interface NSArray (HTMLFirstObjectSupport) 16 | 17 | - (id)firstObject; 18 | 19 | @end 20 | #endif 21 | 22 | // NSArray and NSDictionary have subscripting support via ARCLite, but the compiler wasn't always happily exposing that fact. 23 | #if __IPHONE_OS_VERSION_MAX_ALLOWED >= 40300 && __IPHONE_OS_VERSION_MAX_ALLOWED < 60000 24 | @interface NSArray (HTMLSubscriptingSupport) 25 | 26 | - (id)objectAtIndexedSubscript:(NSUInteger)index; 27 | - (void)setObject:(id)object atIndexedSubscript:(NSUInteger)index; 28 | 29 | @end 30 | 31 | @interface NSDictionary (HTMLSubscriptingSupport) 32 | 33 | - (id)objectForKeyedSubscript:(id)key; 34 | - (void)setObject:(id)object forKeyedSubscript:(id )key; 35 | 36 | @end 37 | 38 | @interface NSOrderedSet (HTMLSubscriptingSupport) 39 | 40 | - (id)objectAtIndexedSubscript:(NSUInteger)index; 41 | - (void)setObject:(id)object atIndexedSubscript:(NSUInteger)index; 42 | 43 | @end 44 | #endif 45 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLTextNode.h: -------------------------------------------------------------------------------- 1 | // HTMLTextNode.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "HTMLNode.h" 6 | 7 | /** 8 | * An HTMLTextNode represents text. 9 | * 10 | * For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character 11 | */ 12 | @interface HTMLTextNode : HTMLNode 13 | 14 | /** 15 | * This is the designated initializer. 16 | */ 17 | - (id)initWithData:(NSString *)data; 18 | 19 | /** 20 | * The text. 21 | */ 22 | @property (readonly, copy, nonatomic) NSString *data; 23 | 24 | /** 25 | * Adds a string to the end of the node's text. 26 | */ 27 | - (void)appendString:(NSString *)string; 28 | 29 | @end 30 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLTextNode.m: -------------------------------------------------------------------------------- 1 | // HTMLTextNode.m 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "HTMLTextNode.h" 6 | 7 | @implementation HTMLTextNode 8 | { 9 | NSMutableString *_data; 10 | } 11 | 12 | - (id)init 13 | { 14 | self = [super init]; 15 | if (!self) return nil; 16 | 17 | _data = [NSMutableString new]; 18 | 19 | return self; 20 | } 21 | 22 | - (id)initWithData:(NSString *)data 23 | { 24 | self = [self init]; 25 | if (!self) return nil; 26 | [_data setString:data]; 27 | return self; 28 | } 29 | 30 | - (void)appendString:(NSString *)string 31 | { 32 | [_data appendString:string]; 33 | } 34 | 35 | - (NSString *)data 36 | { 37 | return [_data copy]; 38 | } 39 | 40 | #pragma mark NSCopying 41 | 42 | - (id)copyWithZone:(NSZone *)zone 43 | { 44 | HTMLTextNode *copy = [super copyWithZone:zone]; 45 | [copy->_data setString:_data]; 46 | return copy; 47 | } 48 | 49 | @end 50 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLTokenizer.h: -------------------------------------------------------------------------------- 1 | // HTMLTokenizer.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import 6 | #import "HTMLOrderedDictionary.h" 7 | #import "HTMLParser.h" 8 | #import "HTMLTokenizerState.h" 9 | 10 | /** 11 | * An HTMLTokenizer emits tokens derived from a string of HTML. 12 | * 13 | * For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html 14 | */ 15 | @interface HTMLTokenizer : NSEnumerator 16 | 17 | /** 18 | * This is the designated initializer. 19 | */ 20 | - (id)initWithString:(NSString *)string; 21 | 22 | /** 23 | * The string where tokens come from. 24 | */ 25 | @property (readonly, copy, nonatomic) NSString *string; 26 | 27 | /** 28 | * The current state of the tokenizer. Sometimes the parser needs to change this. 29 | */ 30 | @property (assign, nonatomic) HTMLTokenizerState state; 31 | 32 | /** 33 | * The parser that is consuming the tokenizer's tokens. Sometimes the tokenizer needs to know the parser's state. 34 | */ 35 | @property (weak, nonatomic) HTMLParser *parser; 36 | 37 | @end 38 | 39 | /** 40 | * An HTMLDOCTYPEToken represents a `` tag. 41 | */ 42 | @interface HTMLDOCTYPEToken : NSObject 43 | 44 | /** 45 | * The name of the DOCTYPE, or nil if it has none. 46 | */ 47 | @property (copy, nonatomic) NSString *name; 48 | 49 | /** 50 | * The public identifier of the DOCTYPE, or nil if it has none. 51 | */ 52 | @property (copy, nonatomic) NSString *publicIdentifier; 53 | 54 | /** 55 | * The system identifier of the DOCTYPE, or nil if it has none. 56 | */ 57 | @property (copy, nonatomic) NSString *systemIdentifier; 58 | 59 | /** 60 | * YES if the parsed HTMLDocument's quirks mode should be set, or NO if other indicators should be used. 61 | */ 62 | @property (assign, nonatomic) BOOL forceQuirks; 63 | 64 | @end 65 | 66 | /** 67 | * An HTMLTagToken abstractly represents opening (`

`) and closing (`

`) HTML tags with optional attributes. 68 | */ 69 | @interface HTMLTagToken : NSObject 70 | 71 | /** 72 | * This is the designated initializer. 73 | */ 74 | - (id)init; 75 | 76 | /** 77 | * Initializes a token with a tag name. 78 | */ 79 | - (id)initWithTagName:(NSString *)tagName; 80 | 81 | /** 82 | * The name of this tag. 83 | */ 84 | @property (copy, nonatomic) NSString *tagName; 85 | 86 | /** 87 | * A dictionary mapping HTMLAttributeName keys to NSString values. 88 | */ 89 | @property (copy, nonatomic) HTMLOrderedDictionary *attributes; 90 | 91 | /** 92 | * YES if this tag is a self-closing tag (
), or NO otherwise (
or
). 93 | */ 94 | @property (nonatomic) BOOL selfClosingFlag; 95 | 96 | @end 97 | 98 | /** 99 | * An HTMLStartTagToken represents a start tag like `

`. 100 | */ 101 | @interface HTMLStartTagToken : HTMLTagToken 102 | 103 | /** 104 | * Returns an initialized copy of this start tag token with a new tag name. 105 | * 106 | * @param tagName The tag name of the copied token. 107 | */ 108 | - (id)copyWithTagName:(NSString *)tagName; 109 | 110 | @end 111 | 112 | /** 113 | * An HTMLEndTagToken represents an end tag like `

`. 114 | */ 115 | @interface HTMLEndTagToken : HTMLTagToken 116 | 117 | @end 118 | 119 | /** 120 | * An HTMLCommentToken represents a comment . 121 | */ 122 | @interface HTMLCommentToken : NSObject 123 | 124 | /** 125 | * This is the designated initializer. 126 | * 127 | * @param data The comment's data. 128 | */ 129 | - (id)initWithData:(NSString *)data; 130 | 131 | /** 132 | * The comment's data. 133 | */ 134 | @property (readonly, copy, nonatomic) NSString *data; 135 | 136 | @end 137 | 138 | /** 139 | * An HTMLCharacterToken represents a series of code points as text in an HTML document. 140 | */ 141 | @interface HTMLCharacterToken : NSObject 142 | 143 | /** 144 | * This is the designated initializer. 145 | */ 146 | - (id)initWithString:(NSString *)string; 147 | 148 | /** 149 | * The code points represented by this token. 150 | */ 151 | @property (readonly, copy, nonatomic) NSString *string; 152 | 153 | /** 154 | * Returns a token for the leading whitespace, or nil if there is no leading whitespace. 155 | */ 156 | - (instancetype)leadingWhitespaceToken; 157 | 158 | /** 159 | * Returns a token for the characters after leading whitespace, or nil if the token is entirely whitespace. 160 | */ 161 | - (instancetype)afterLeadingWhitespaceToken; 162 | 163 | @end 164 | 165 | /** 166 | * An HTMLParseErrorToken represents a parse error during tokenization. 167 | * 168 | * Parse errors are emitted as tokens for context. 169 | */ 170 | @interface HTMLParseErrorToken : NSObject 171 | 172 | /** 173 | * This is the designated initializer. 174 | * 175 | * @param error The reason for the parse error. 176 | */ 177 | - (id)initWithError:(NSString *)error; 178 | 179 | /** 180 | * The reason for the parse error. 181 | */ 182 | @property (readonly, copy, nonatomic) NSString *error; 183 | 184 | @end 185 | 186 | /** 187 | * A single HTMLEOFToken is emitted when the end of the file is parsed and no further tokens will be emitted. 188 | */ 189 | @interface HTMLEOFToken : NSObject 190 | 191 | @end 192 | 193 | @interface HTMLTokenizer (Testing) 194 | 195 | /** 196 | * Sets the name of the last start tag, which is used at certain steps of tokenization. 197 | * 198 | * @param tagName The name of the pretend last start tag. 199 | */ 200 | - (void)setLastStartTag:(NSString *)tagName; 201 | 202 | @end 203 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLTokenizerState.h: -------------------------------------------------------------------------------- 1 | // HTMLTokenizerState.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | /** 6 | * The many states of an HTML tokenizer. 7 | * 8 | * For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html 9 | */ 10 | typedef NS_ENUM(NSInteger, HTMLTokenizerState) 11 | { 12 | HTMLDataTokenizerState, 13 | HTMLCharacterReferenceInDataTokenizerState, 14 | HTMLRCDATATokenizerState, 15 | HTMLCharacterReferenceInRCDATATokenizerState, 16 | HTMLRAWTEXTTokenizerState, 17 | HTMLScriptDataTokenizerState, 18 | HTMLPLAINTEXTTokenizerState, 19 | HTMLTagOpenTokenizerState, 20 | HTMLEndTagOpenTokenizerState, 21 | HTMLTagNameTokenizerState, 22 | HTMLRCDATALessThanSignTokenizerState, 23 | HTMLRCDATAEndTagOpenTokenizerState, 24 | HTMLRCDATAEndTagNameTokenizerState, 25 | HTMLRAWTEXTLessThanSignTokenizerState, 26 | HTMLRAWTEXTEndTagOpenTokenizerState, 27 | HTMLRAWTEXTEndTagNameTokenizerState, 28 | HTMLScriptDataLessThanSignTokenizerState, 29 | HTMLScriptDataEndTagOpenTokenizerState, 30 | HTMLScriptDataEndTagNameTokenizerState, 31 | HTMLScriptDataEscapeStartTokenizerState, 32 | HTMLScriptDataEscapeStartDashTokenizerState, 33 | HTMLScriptDataEscapedTokenizerState, 34 | HTMLScriptDataEscapedDashTokenizerState, 35 | HTMLScriptDataEscapedDashDashTokenizerState, 36 | HTMLScriptDataEscapedLessThanSignTokenizerState, 37 | HTMLScriptDataEscapedEndTagOpenTokenizerState, 38 | HTMLScriptDataEscapedEndTagNameTokenizerState, 39 | HTMLScriptDataDoubleEscapeStartTokenizerState, 40 | HTMLScriptDataDoubleEscapedTokenizerState, 41 | HTMLScriptDataDoubleEscapedDashTokenizerState, 42 | HTMLScriptDataDoubleEscapedDashDashTokenizerState, 43 | HTMLScriptDataDoubleEscapedLessThanSignTokenizerState, 44 | HTMLScriptDataDoubleEscapeEndTokenizerState, 45 | HTMLBeforeAttributeNameTokenizerState, 46 | HTMLAttributeNameTokenizerState, 47 | HTMLAfterAttributeNameTokenizerState, 48 | HTMLBeforeAttributeValueTokenizerState, 49 | HTMLAttributeValueDoubleQuotedTokenizerState, 50 | HTMLAttributeValueSingleQuotedTokenizerState, 51 | HTMLAttributeValueUnquotedTokenizerState, 52 | HTMLCharacterReferenceInAttributeValueTokenizerState, 53 | HTMLAfterAttributeValueQuotedTokenizerState, 54 | HTMLSelfClosingStartTagTokenizerState, 55 | HTMLBogusCommentTokenizerState, 56 | HTMLMarkupDeclarationOpenTokenizerState, 57 | HTMLCommentStartTokenizerState, 58 | HTMLCommentStartDashTokenizerState, 59 | HTMLCommentTokenizerState, 60 | HTMLCommentEndDashTokenizerState, 61 | HTMLCommentEndTokenizerState, 62 | HTMLCommentEndBangTokenizerState, 63 | HTMLDOCTYPETokenizerState, 64 | HTMLBeforeDOCTYPENameTokenizerState, 65 | HTMLDOCTYPENameTokenizerState, 66 | HTMLAfterDOCTYPENameTokenizerState, 67 | HTMLAfterDOCTYPEPublicKeywordTokenizerState, 68 | HTMLBeforeDOCTYPEPublicIdentifierTokenizerState, 69 | HTMLDOCTYPEPublicIdentifierDoubleQuotedTokenizerState, 70 | HTMLDOCTYPEPublicIdentifierSingleQuotedTokenizerState, 71 | HTMLAfterDOCTYPEPublicIdentifierTokenizerState, 72 | HTMLBetweenDOCTYPEPublicAndSystemIdentifiersTokenizerState, 73 | HTMLAfterDOCTYPESystemKeywordTokenizerState, 74 | HTMLBeforeDOCTYPESystemIdentifierTokenizerState, 75 | HTMLDOCTYPESystemIdentifierDoubleQuotedTokenizerState, 76 | HTMLDOCTYPESystemIdentifierSingleQuotedTokenizerState, 77 | HTMLAfterDOCTYPESystemIdentifierTokenizerState, 78 | HTMLBogusDOCTYPETokenizerState, 79 | HTMLCDATASectionTokenizerState, 80 | }; 81 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLTreeEnumerator.h: -------------------------------------------------------------------------------- 1 | // HTMLTreeEnumerator.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import 6 | @class HTMLNode; 7 | 8 | /** 9 | * An HTMLTreeEnumerator emits HTMLNode instances in tree order (preorder, depth-first) or reverse tree order (preorder, depth-first starting with the last child). 10 | */ 11 | @interface HTMLTreeEnumerator : NSEnumerator 12 | 13 | /** 14 | * This is the designated initializer. 15 | */ 16 | - (id)initWithNode:(HTMLNode *)node reversed:(BOOL)reversed; 17 | 18 | @end 19 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/HTMLTreeEnumerator.m: -------------------------------------------------------------------------------- 1 | // HTMLTreeEnumerator.m 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "HTMLTreeEnumerator.h" 6 | #import "HTMLNode.h" 7 | 8 | // For performance we'll cache the number of nodes at each level of the tree. 9 | typedef struct { 10 | NSUInteger i; 11 | NSUInteger count; 12 | } Row; 13 | 14 | typedef struct { 15 | Row *path; 16 | NSUInteger length; 17 | NSUInteger capacity; 18 | } IndexPath; 19 | 20 | @implementation HTMLTreeEnumerator 21 | { 22 | HTMLNode *_nextNode; 23 | BOOL _reversed; 24 | IndexPath _indexPath; 25 | } 26 | 27 | - (void)dealloc 28 | { 29 | free(_indexPath.path); 30 | } 31 | 32 | - (id)initWithNode:(HTMLNode *)node reversed:(BOOL)reversed 33 | { 34 | self = [super init]; 35 | if (!self) return nil; 36 | 37 | _nextNode = node; 38 | _reversed = reversed; 39 | 40 | return self; 41 | } 42 | 43 | - (id)nextObject 44 | { 45 | // This enumerator works by storing the *next* node we intend to emit, and the index path that points to that next node. 46 | HTMLNode *currentNode = _nextNode; 47 | 48 | NSUInteger numberOfChildren = currentNode.numberOfChildren; 49 | 50 | if (numberOfChildren > 0) { 51 | 52 | // Depth-first means the next node we'll emit is the current node's first child. 53 | if (_indexPath.length == _indexPath.capacity) { 54 | _indexPath.capacity += 16; 55 | _indexPath.path = reallocf(_indexPath.path, sizeof(_indexPath.path[0]) * _indexPath.capacity); 56 | } 57 | Row *row = _indexPath.path + _indexPath.length; 58 | _indexPath.length++; 59 | row->count = numberOfChildren; 60 | row->i = _reversed ? numberOfChildren - 1 : 0; 61 | _nextNode = [currentNode childAtIndex:row->i]; 62 | 63 | } else { 64 | 65 | // We're out of children on this row, so walk back up the tree until we find a level with spare children. 66 | HTMLNode *parentNode = currentNode.parentNode; 67 | while (_indexPath.length > 0) { 68 | Row *row = _indexPath.path + _indexPath.length - 1; 69 | if (_reversed && row->i > 0) { 70 | row->i--; 71 | } else if (!_reversed && row->i + 1 < row->count) { 72 | row->i++; 73 | } else { 74 | _indexPath.length--; 75 | parentNode = parentNode.parentNode; 76 | continue; 77 | } 78 | _nextNode = [parentNode childAtIndex:row->i]; 79 | break; 80 | } 81 | 82 | // No more spare children means we're done. 83 | if (_indexPath.length == 0) { 84 | _nextNode = nil; 85 | } 86 | } 87 | return currentNode; 88 | } 89 | 90 | @end 91 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/NSString+HTMLEntities.h: -------------------------------------------------------------------------------- 1 | // NSString+HTMLEntities.h 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import 6 | 7 | @interface NSString (HTMLEntities) 8 | 9 | /** 10 | * Returns a copy of the string with the necessary characters escaped for HTML. 11 | * 12 | * For more information, see http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#escapingString (the algorithm is not invoked in the "attribute mode"). 13 | */ 14 | - (NSString *)html_stringByEscapingForHTML; 15 | 16 | /** 17 | * Returns a copy of the string with all recognized HTML entities replaced by their respective code points. If no replacement is necessary, the same instance may be returned. 18 | */ 19 | - (NSString *)html_stringByUnescapingHTML; 20 | 21 | @end 22 | -------------------------------------------------------------------------------- /TextBookParse/epubParse/htmlParse/NSString+HTMLEntities.m: -------------------------------------------------------------------------------- 1 | // NSString+HTMLEntities.m 2 | // 3 | // Public domain. https://github.com/nolanw/HTMLReader 4 | 5 | #import "NSString+HTMLEntities.h" 6 | #import "HTMLEntities.h" 7 | #import "HTMLString.h" 8 | 9 | @implementation NSString (HTMLEntities) 10 | 11 | - (NSString *)html_stringByEscapingForHTML 12 | { 13 | NSMutableString *escaped = [self mutableCopy]; 14 | void (^replace)() = ^(NSString *find, NSString *replace) { 15 | [escaped replaceOccurrencesOfString:find withString:replace options:0 range:NSMakeRange(0, escaped.length)]; 16 | }; 17 | replace(@"&", @"&"); 18 | replace(@"\u00A0", @" "); 19 | replace(@"\"", @"""); 20 | replace(@"<", @"<"); 21 | replace(@">", @">"); 22 | return escaped; 23 | } 24 | 25 | - (NSString *)html_stringByUnescapingHTML 26 | { 27 | NSRange ampersand = [self rangeOfString:@"&" options:NSBackwardsSearch]; 28 | if (ampersand.location == NSNotFound || NSMaxRange(ampersand) == self.length) return self; 29 | 30 | // These are expensive to create, so we'll lazily create them once per unescaping operation. 31 | NSCharacterSet *decimalDigitCharacterSet; 32 | NSCharacterSet *hexadecimalDigitCharacterSet; 33 | 34 | NSRange searchRange = NSMakeRange(0, self.length); 35 | NSScanner *scanner = [NSScanner scannerWithString:self]; 36 | NSMutableString *unescaped = [self mutableCopy]; 37 | do { 38 | searchRange.length = ampersand.location; 39 | 40 | NSString *replacement; 41 | 42 | // Numeric entity. 43 | scanner.scanLocation = NSMaxRange(ampersand); 44 | if ([scanner scanString:@"#" intoString:nil]) { 45 | 46 | UInt32 entity; 47 | 48 | // Hex number. 49 | if ([scanner scanString:@"x" intoString:nil]) { 50 | if (!hexadecimalDigitCharacterSet) { 51 | hexadecimalDigitCharacterSet = [NSCharacterSet characterSetWithCharactersInString:@"0123456789ABCDEFabcdef"]; 52 | } 53 | NSString *entityString; 54 | if ([scanner scanCharactersFromSet:hexadecimalDigitCharacterSet intoString:&entityString]) { 55 | NSScanner *hexScanner = [NSScanner scannerWithString:entityString]; 56 | unsigned int hex; 57 | [hexScanner scanHexInt:&hex]; 58 | entity = hex; 59 | } else { 60 | continue; 61 | } 62 | } 63 | 64 | // Decimal number. 65 | else { 66 | if (!decimalDigitCharacterSet) { 67 | decimalDigitCharacterSet = [NSCharacterSet characterSetWithCharactersInString:@"0123456789"]; 68 | } 69 | NSString *entityString; 70 | if ([scanner scanCharactersFromSet:decimalDigitCharacterSet intoString:&entityString]) { 71 | NSInteger decimal = entityString.integerValue; 72 | if (decimal > 0x10FFFF) { 73 | entity = UINT32_MAX; 74 | } else { 75 | entity = (UInt32)decimal; 76 | } 77 | } else { 78 | continue; 79 | } 80 | } 81 | 82 | UTF32Char win1252Replacement = ReplacementForNumericEntity(entity); 83 | if (win1252Replacement) { 84 | entity = win1252Replacement; 85 | } 86 | 87 | if ((entity >= 0xD800 && entity <= 0xDFFF) || entity > 0x10FFFF) { 88 | entity = 0xFFFD; 89 | } 90 | 91 | replacement = StringWithLongCharacter(entity); 92 | 93 | // Optional semicolon. 94 | [scanner scanString:@";" intoString:nil]; 95 | } 96 | 97 | // Named entity. 98 | else { 99 | NSRange nameRange = NSMakeRange(NSMaxRange(ampersand), LongestEntityNameLength); 100 | if (NSMaxRange(nameRange) > self.length) { 101 | nameRange.length = self.length - nameRange.location; 102 | } 103 | NSString *nameString = [self substringWithRange:nameRange]; 104 | NSString *parsedEntity; 105 | replacement = StringForNamedEntity(nameString, &parsedEntity); 106 | if (replacement) { 107 | [scanner scanString:parsedEntity intoString:nil]; 108 | } else { 109 | continue; 110 | } 111 | } 112 | 113 | [unescaped replaceCharactersInRange:NSMakeRange(ampersand.location, scanner.scanLocation - ampersand.location) withString:replacement]; 114 | } while ((ampersand = [self rangeOfString:@"&" options:NSBackwardsSearch range:searchRange]).location != NSNotFound); 115 | return unescaped; 116 | } 117 | 118 | @end 119 | -------------------------------------------------------------------------------- /TextBookParse/main.m: -------------------------------------------------------------------------------- 1 | // 2 | // main.m 3 | // TextBookParse 4 | // 5 | // Created by xxsy-ima001 on 14-8-5. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | #import "AppDelegate.h" 12 | 13 | int main(int argc, char * argv[]) 14 | { 15 | @autoreleasepool { 16 | return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class])); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /TextBookParseTests/TextBookParseTests-Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | en 7 | CFBundleExecutable 8 | ${EXECUTABLE_NAME} 9 | CFBundleIdentifier 10 | xiaoxiangwenxue.${PRODUCT_NAME:rfc1034identifier} 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundlePackageType 14 | BNDL 15 | CFBundleShortVersionString 16 | 1.0 17 | CFBundleSignature 18 | ???? 19 | CFBundleVersion 20 | 1 21 | 22 | 23 | -------------------------------------------------------------------------------- /TextBookParseTests/TextBookParseTests.m: -------------------------------------------------------------------------------- 1 | // 2 | // TextBookParseTests.m 3 | // TextBookParseTests 4 | // 5 | // Created by xxsy-ima001 on 14-8-5. 6 | // Copyright (c) 2014年 ___xiaoxiangwenxue___. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | @interface TextBookParseTests : XCTestCase 12 | 13 | @end 14 | 15 | @implementation TextBookParseTests 16 | 17 | - (void)setUp 18 | { 19 | [super setUp]; 20 | // Put setup code here. This method is called before the invocation of each test method in the class. 21 | } 22 | 23 | - (void)tearDown 24 | { 25 | // Put teardown code here. This method is called after the invocation of each test method in the class. 26 | [super tearDown]; 27 | } 28 | 29 | - (void)testExample 30 | { 31 | XCTFail(@"No implementation for \"%s\"", __PRETTY_FUNCTION__); 32 | } 33 | 34 | @end 35 | -------------------------------------------------------------------------------- /TextBookParseTests/en.lproj/InfoPlist.strings: -------------------------------------------------------------------------------- 1 | /* Localized versions of Info.plist keys */ 2 | 3 | --------------------------------------------------------------------------------