iOS第三方HTML解析 TFHpple 的簡單使用

壯壯發表於2015-10-15

準備工作

1.匯入TFHpple
2.引入靜態庫檔案libxml2.2.dylib
3.PROJECT 中的 Search Path – header search paths新增 /usr/include/libxml2


解析步驟

1.初始化data
2.根據data建立TFHpple例項
3.查詢節點存入陣列
4.在該節點下 迴圈查詢子節點

源HTML程式碼:

<div class="cell item" style=""><div style="position: absolute; margin: -10px -10px 0px 650px;"></div>
    <table cellpadding="0" cellspacing="0" border="0" width="100%">
        <tr>
            
            <td width="48" valign="top" align="center"><a href="/member/zhangyi2099"><img src="//cdn.v2ex.co/avatar/d00c/ceb1/18330_normal.png?m=1345037943" class="avatar" border="0" align="default" /></a></td>
            <td width="10"></td>
            
            <td width="auto" valign="middle"><span class="item_title"><a href="/t/228173#reply1">看了本「網球優等生」</a></span>
            <div class="sep5"></div>
            <span class="small fade"><div class="votes"></div><a class="node" href="/go/acg">ACG</a> &nbsp;•&nbsp; <strong><a href="/member/zhangyi2099">zhangyi2099</a></strong> &nbsp;•&nbsp; 20 分鐘前 &nbsp;•&nbsp; 最後回覆來自 <strong><a href="/member/yishanxin">yishanxin</a></strong></span>
            </td>
            <td width="70" align="right" valign="middle">
                
                <a href="/t/228173#reply1" class="count_livid">1</a>
                
            </td>
        </tr>
    </table>
</div>

Object-C程式碼

NSData *htmlData = [[NSData alloc]initWithContentsOfURL:[NSURL URLWithString:@"http://www.xxx.com/xxxx?x=1"]];

TFHpple *xpathParser = [[TFHpple alloc]initWithHTMLData:htmlData];

#pragma mark 每頁主題
NSArray *itemArray = [xpathParser searchWithXPathQuery:@"//div[@class = `cell item`]"];

//通過for in 在itemArray陣列中 迴圈查詢子節點
for (TFHppleElement *hppleElement in itemArray) {
    
/***
這段被正則表達代替 @"//div[@class = `cell item`]"]
  if ([[hppleElement objectForKey:@"class" ] isEqualToString:@"cell item"]) {
  [self.allDataMutableArray addObject:hppleElement];
  }
*/
    
#pragma mark 子節點頭像
    
    NSArray *IMGElementsArr = [hppleElement searchWithXPathQuery:@"//img"];
    for (TFHppleElement *tempAElement in IMGElementsArr) {
        NSString *imgStr = [tempAElement objectForKey:@"src"];
        NSString *subStr = [@"http:" stringByAppendingString:imgStr];
        [self.avatarMutableArray addObject:subStr];
    }
    
#pragma mark 子節點標題/連結
    
   NSArray *TitleElementArr = [hppleElement searchWithXPathQuery:@"//span[@class=`item_title`]"];
    for (TFHppleElement *tempAElement in TitleElementArr) {
        //獲得標題
        NSString *titleStr =  [tempAElement content];
        
        //1.獲得子節點(正文連線節點) 2.獲得節點屬性值 3.加入到字典中
        NSArray * arr = [tempAElement children];
        TFHppleElement *href = arr.firstObject;
        NSString * titleHrefStr = [href objectForKey:@"href"];
        
        [self.allDataMutableDict setObject:titleStr forKey:@"title"];
        self.allDataMutableDict[@"titleHref"] = titleHrefStr;
    }
    
    
#pragma mark 子節點fade
    //簡化寫法 簡化3步
    NSArray *nodeElementArr = [hppleElement searchWithXPathQuery:@"//a[@class=`node`]"];
    self.allDataMutableDict[@"node"] = [nodeElementArr.firstObject content];

    NSArray *fadeElementArr = [hppleElement searchWithXPathQuery:@"//span[@class = `small fade`]"];
    NSArray *subArray = [ [fadeElementArr.firstObject content] componentsSeparatedByString:@"  •  "];
    
    self.allDataMutableDict[@"louZhu"] = [subArray objectAtIndex:1];
    self.allDataMutableDict[@"lastTime"] = [subArray objectAtIndex:2];
    

    
#pragma mark 子節點回複數
    NSArray * repeatElementArr = [hppleElement searchWithXPathQuery:@"//a[@class = `count_livid`]"];
    if ([repeatElementArr.firstObject content ]) {
        self.allDataMutableDict[@"repeatCount"] = [repeatElementArr.firstObject content];
    }else{
        self.allDataMutableDict[@"repeatCount"] = [NSString stringWithFormat:@"%d",0];
    }
    
    
    
#pragma mark 轉化model 存進陣列
    [model setValuesForKeysWithDictionary:self.allDataMutableDict];
    [self.allDataMutableArray addObject:model];
    
    
}

相關文章