基本的思路:
- 用正则获取章节名称所在的位置(
NSRange
)
- 结合下一章的章节名的位置拿到本章内容信息
具体实现方法
+ (NSArray<NSTextCheckingResult *> *)extractChapterListWithContent:(NSString *)content{
NSString* regPattern = @"(\\s)+[第]{0,1}[0-9一二三四五六七八九十百千万]+[章回节卷集幕计][ \t]*(\\S)*";
NSError* error = NULL;
NSRegularExpression* regExp = [NSRegularExpression regularExpressionWithPattern:regPattern
options:NSRegularExpressionCaseInsensitive
error:&error];
return [regExp matchesInString:content options:NSMatchingReportCompletion range:NSMakeRange(0, content.length)];
}
根据 title Range 提取章节所需信息
+ (NSArray<ChapterModel *> *)analyseTxtWithContent:(NSString *)content
maintainEmptyCharcter:(BOOL)maintainEmptyCharcter{
NSArray<NSTextCheckingResult *> *matchResult = [self extractChapterListWithContent:content];
NSMutableArray *chapterModels = @[].mutableCopy ;
if (matchResult.count == 0) {
ChapterModel *model = [ChapterModel new] ;
model.title = @"内容";
model.contentRange = NSMakeRange(0, content.length);
model.allContentRange = NSMakeRange(0, content.length);
return @[model];
}
for (NSInteger i = 0; i < matchResult.count ; i++) {
NSRange titleRange = matchResult[i].range;
NSString *chapterTitle = [[content yj_substringWithRange:titleRange] trimmed];
NVLog(@"%@",chapterTitle);
if (i == 0) {
NSString *firstTitle = @"开始";
NSString *contentString = [content yj_substringWithRange:NSMakeRange(0, titleRange.location)];
if (contentString.trimmed.length > 0 ) {
ChapterModel *model2 = [ChapterModel modelWithTitle:firstTitle
titleRange:NSMakeRange(0, 0)
allContentRange:NSMakeRange(0, titleRange.location)];
[chapterModels addObject:model2];
}
}
if (i < matchResult.count-1) {
NSRange nextRange = matchResult[i+1].range;
if (nextRange.location > titleRange.location) {
NSInteger length = nextRange.location - titleRange.location ;
ChapterModel *model2 = [ChapterModel modelWithTitle:chapterTitle
titleRange:titleRange
allContentRange:NSMakeRange(titleRange.location, length)];
[self chapterModels:chapterModels addModel:model2 content:content maintainEmpty:maintainEmptyCharcter];
}
}
if (i == matchResult.count-1){
ChapterModel *model2 = [ChapterModel modelWithTitle:chapterTitle
titleRange:titleRange
allContentRange:NSMakeRange(titleRange.location,content.length - titleRange.location)];
[self chapterModels:chapterModels addModel:model2 content:content maintainEmpty:maintainEmptyCharcter];
}
}
return [chapterModels copy];
}
+ (void)chapterModels:(NSMutableArray *)chapterModels
addModel:(ChapterModel *)model
content:(NSString *)content
maintainEmpty:(BOOL)maintainEmptyCharcter{
NSInteger contentLength = [[content yj_substringWithRange:model.contentRange] trimmed].length;
if (maintainEmptyCharcter == YES || contentLength > 0) {
[chapterModels addObject:model];
}
}
添加辅助方法
新增ChapterModel
类:
@interface ChapterModel : NSObject
@property(nonatomic,copy)NSString *title ;
@property(nonatomic)NSRange titleRange ;
@property(nonatomic)NSRange contentRange ;
@property(nonatomic)NSRange allContentRange;
@end
@implementation ChapterModel
+ (instancetype)modelWithTitle:(NSString *)title
titleRange:(NSRange )titleRange
allContentRange:(NSRange )allContentRange
{
ChapterModel *model = [[ChapterModel alloc]init];
model.title = title ;
model.titleRange = titleRange ;
model.allContentRange = allContentRange ;
model.contentRange = NSMakeRange(titleRange.location + titleRange.length,
allContentRange.length - titleRange.length);
return model;
}
@end
添加NSString
类别,防止substringWithRange
越界
@implementation NSString (YJSafe)
- (NSString *)yj_substringWithRange:(NSRange)range{
if (self.length >= range.location + range.length) {
return [self substringWithRange:range];
}
return @"";
}
- (NSString *)trimmed{
NSCharacterSet* whiteSpaceSet = [NSCharacterSet whitespaceAndNewlineCharacterSet];
return [self stringByTrimmingCharactersInSet:whiteSpaceSet];
}
@end
异步获取
+ (void)extractNovelWithContent:(NSString *)content
async:(BOOL)isAsync
maintainEmptyCharcter:(BOOL)isNeedMaintainEmptyCharcter
result:(void(^)(NSArray<ChapterModel *> *models))result {
if (result == nil) { return ;}
if (isAsync) {
dispatch_async(dispatch_get_global_queue(0,0), ^{
NSArray *models = [self analyseTxtWithContent:content maintainEmptyCharcter:isNeedMaintainEmptyCharcter];
dispatch_async(dispatch_get_main_queue(), ^{
result(models);
});
});
}else {
result([self analyseTxtWithContent:content maintainEmptyCharcter:isNeedMaintainEmptyCharcter]);
}
}
预览:
方法调用
效果预览
此外还需要解决的问题:
-
这个正则是根据这篇文章修改的,还不能匹配第n章
和章节名字多个空格的情况
-
有的标题可能比较特别 ,好比【《》目录 第二回 悟彻菩提真妙理 断魔归本合元神】
。 这样匹配“第二回”所在的一行应该比较好
-
不过还有这样的【《》目录 第十一回 还受生唐王遵善果 度孤魂萧【《》目录 第十二回 玄奘秉诚建大会 观音显象化金蝉 】
,章节内容直接缺失。 如果匹配所在行估计会出问题。
-
忽然觉得自己该恶补下正则的知识了。。。。
参考内容
iOS txt小说断章正则表达式实现
TXT小说断章实现