联系客服: iP138客服 |

iP查询接口
  • API接口QQ交流群:177096428   iP138 api接口
  • 数据修正QQ处理群:94181690    iP138数据修正

网页提取接口支持HTTPS(赠送100次)

简介:新闻网页正文抽取,可提取互联网上99% 已上文章,智能识别包含的标题及正文内容。 互联网上几百万个站点,每个站点还有N个不同的文章页面模版,您只需要接入我们的接口,就无需再为编写获取文章内容的正则而苦恼了,直接提取标题及正文内容。

连接应用数:385

PHP调用网页提取接口示例:

                                <?php
                                $queryUrl = "http://www.sina.com.cn/";
                                $queryUrl = urlencode($queryUrl);
                                $url = 'http://api.ip138.com/text/?url='.$queryUrl.'&type=1';

                                $header = array('token:00d5cb1fac5dc5cbfe2ff218292a2dfd33');
                                echo getData($url,$header);   

                                function getData($url,$header){  
                                    $ch = curl_init();  
                                    curl_setopt($ch,CURLOPT_URL,$url);
                                    curl_setopt($ch,CURLOPT_HTTP_VERSION,CURL_HTTP_VERSION_1_1);
                                    curl_setopt($ch,CURLOPT_HTTPHEADER,$header); 
                                    curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);  
                                    curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,3);  
                                    $handles = curl_exec($ch);  
                                    curl_close($ch);  
                                    return $handles;  
                                }
                                ?>
                                

Go语言调用网页提取接口示例:

                                    package main

                                    import (
                                        "encoding/json"
                                        "fmt"
                                        "io/ioutil"
                                        "net/url"
                                        "net/http"
                                    )

                                    const (
                                        APIURL   = "http://api.ip138.com/text/"
                                        TOKEN = "bd4c2bf9a38ab06f7cae88c9759ee172"
                                    )

                                    //----------------------------------
                                    // 正文提取接口调用示例代码
                                    //----------------------------------
                               
                                    //json struct
                                    type jsoninfo struct {
                                        Ret     string    `json:"ret"`
                                        Url      string    `json:"url"`
                                        Data [2] string    `json:"data"`
                                    }

                                    func main() {
                                        queryUrl := "http://www.sina.com.cn/"
                                        queryUrl = url.QueryEscape(queryUrl)
                                        getText(queryUrl)
                                    }

                                    func getText(url string) {

                                        queryUrl := fmt.Sprintf("%s?url=%s&",APIURL,url)
                                        client := &http.Client{}
                                        reqest, err := http.NewRequest("GET",queryUrl,nil)

                                        if err != nil {
                                            fmt.Println("Fatal error ",err.Error())
                                        }

                                        reqest.Header.Add("token",TOKEN)
                                        response, err := client.Do(reqest)
                                        defer response.Body.Close()

                                        if err != nil {
                                            fmt.Println("Fatal error ",err.Error())
                                        }
                                        if response.StatusCode == 200 {
                                            bodyByte, _ := ioutil.ReadAll(response.Body)
                                            var info jsoninfo
                                            json.Unmarshal(bodyByte,&info)
                                            fmt.Println(info.Data)
                                        }

                                        return
                                    }
                                

Python调用网页提取接口示例:

                                    #!/usr/bin/python
                                    # -*- coding: utf-8 -*-
                                    #python3
                                    import httplib2, urllib
                                    params = urllib.urlencode({'url':'http://www.sina.com.cn/','type':'1'})
                                    url = 'http://api.ip138.com/url/?'+params
                                    headers = {"token":"8594766483a2d65d76804906dd1a1c6a"}#token为示例
                                    http = httplib2.Http()
                                    response, content = http.request(url,'GET',headers=headers)
                                    print(content)
                                

ASP接口调用示例:

                                    <%@LANGUAGE="VBSCRIPT" CODEPAGE="936"%>
                                    <% Option Explicit
                                    '=========================================================
                                    ' File      : .asp
                                    ' Version   : 1.0.0.0
                                    ' Create    : 
                                    ' Modify    : 
                                    '=========================================================
                                    ' 1.0.0.0   : 
                                    '========================================================= %>
                                    <% Dim url,datatype,token,queryurl

                                    url = "http%3a%2f%2fwww.sina.com.cn%2f"
                                    token = "00d5cb1fac5dc5cbfe2ff218292a2dfd33"    '用户中心查看您的token
                                    queryurl = "http://api.ip138.com/text/?url="&url&"&type="&1&"&token="&token

                                    Response.Write (GetURL(queryurl))

                                    Function GetURL(queryurl)
                                        on error resume next
                                        dim Retrieval
                                        Set Retrieval = Server.CreateObject("MSXML2.ServerXMLHTTP")
                                                Retrieval.setTimeouts 600000, 600000, 600000, 600000
                                                Retrieval.Open "GET", queryurl, false
                                                Retrieval.Send
                                                GetURL = Retrieval.ResponseBody
                                        Set Retrieval = Nothing
                                        GetURL=BytesToBstr(GetURL,"utf-8")
                                    End Function

                                    Function BytesToBstr(strBody,CodeBase)
                                            dim objStream
                                            set objStream = Server.CreateObject("Adodb.Stream")
                                            objStream.Type = 1
                                            objStream.Mode =3
                                            objStream.Open
                                            objStream.Write strBody
                                            objStream.Position = 0
                                            objStream.Type = 2
                                            objStream.Charset = CodeBase
                                            BytesToBstr = objStream.ReadText 
                                            objStream.Close
                                            set objStream = nothing
                                    End Function
                                     %>