• 企业400电话
  • 微网小程序
  • AI电话机器人
  • 电商代运营
  • 全 部 栏 目

    企业400电话 网络优化推广 AI电话机器人 呼叫中心 网站建设 商标✡知产 微网小程序 电商运营 彩铃•短信 增值拓展业务
    实例讲解实现抓取网上房产信息的ASP程序
    %@LANGUAGE="VBSCRIPT" CODEPAGE="936"%>
    !-- #include file="conn.asp" -->

    !-- #include file="inc/function.asp" -->
    !DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
    html>
    head>
    title>Untitled Document/title>
    meta http-equiv="Content-Type" content="text/html; charset=gb2312">
    meta http-equiv="refresh" content="300;URL=steal_house.asp">
    /head>

    body>
    %
    on error resume next
    '
    Server.ScriptTimeout = 999999
    '========================================================
    '字符编码函数
    '====================================================
    Function BytesToBstr(body,code)
            dim objstream
            set objstream = Server.CreateObject("adodb.stream")
            objstream.Type = 1
            objstream.Mode =3
            objstream.Open
            objstream.Write body
            objstream.Position = 0
            objstream.Type = 2
            objstream.Charset =code
            BytesToBstr = objstream.ReadText 
            objstream.Close
            set objstream = nothing
    End Function

    '取行字符串在另一字符串中的出现位置
    Function Newstring(wstr,strng)
            Newstring=Instr(lcase(wstr),lcase(strng))
            if Newstring=0 then Newstring=Len(wstr)
    End Function
    '替换字符串函数
    function ReplaceStr(ori,str1,str2)
    ReplaceStr=replace(ori,str1,str2)
    end function
    '====================================================
    function ReadXml(url,code,start,ends)
    set oSend=createobject("Microsoft.XMLHTTP")
    SourceCode = oSend.open ("GET",url,false)
    oSend.send()
    ReadXml=BytesToBstr(oSend.responseBody,code )
    start=Instr(ReadXml,start)
    ReadXml=mid(ReadXml,start)
    ends=Instr(ReadXml,ends)
    ReadXml=left(ReadXml,ends-1)
    end function

    function SubStr(body,start,ends)
    start=Instr(body,start)
    SubStr=mid(body,start+len(start)+1)
    ends=Instr(SubStr,ends)
    SubStr=left(SubStr,ends-1)
    end function

    dim getcont,NewsContent
    dim url,title
    url="http://www.***.com"'新闻网址knowsky.com
    getcont=ReadXml(url,"gb2312","table class=k2 border=""0""","/table>")
    getcont=RegexHtml(getcont)
    dim KeyId,NewsClass,City,Position,HouseType,Level,Area,Price,Demostra

    dim ContactMan,Contact
    for i=2 to ubound(getcont)
     response.Write(getcont(i)"__br>")

     tempLink=mid(getcont(i),instr(getcont(i),"href=""")+6,instr(getcont(i),""" onClick")-10)
     tempLink=replace(tempLink,"../","")

     response.Write(i":"tempLink"br>")
     NewsContent=ReadXml(tempLink,"gb2312","td valign=""bottom"" width=""400"">","hr width=""760"" noshade size=""1"" color=""#808080""> ")
     NewsContent=RemoveHtml(NewsContent)
     NewsContent=replace(NewsContent,VbCrLf,"")
     NewsContent=replace(NewsContent,vbNewLine,"")
     NewsContent=replace(NewsContent," ","")
     NewsContent=replace(NewsContent," ","")
     NewsContent=replace(NewsContent,"nbsp;","")
     NewsContent=replace(NewsContent,"\n","")
     NewsContent=replace(NewsContent,chr(10),"")
     NewsContent=replace(NewsContent,chr(13),"")
     '===============get Content=======================
     response.Write(NewsContent)
     KeyId=SubStr(NewsContent,"列号:","信息类别:")
     NewsClass=SubStr(NewsContent,"类别:","所在城市:")
     City=SubStr(NewsContent,"城市:","房屋具体位置:")
     Position=SubStr(NewsContent,"位置:","房屋类型:")
     HouseType=SubStr(NewsContent,"类型:","楼层:")
     Level=SubStr(NewsContent,"楼层:","使用面积:")
     Area=SubStr(NewsContent,"面积:","房价:")
     Price=SubStr(NewsContent,"房价:","其他说明:")
     Demostra=SubStr(NewsContent,"说明:","联系人:")
     ContactMan=SubStr(NewsContent,"联系人:","联系方式:")
     Contact=SubStr(NewsContent,"联系方式:","信息来源:")
     response.Write("总序列号:"KeyId"br>")
     response.Write("信息类别:"NewsClass"br>")
     response.Write("所在城市:"City"br>")
     response.Write("房屋具体位置:"Position"br>")
     response.Write("房屋类型:"HouseType"br>")
     response.Write("楼层:"Level"br>")
     response.Write("使用面积:"Area"br>")
     response.Write("房价:"Price"br>")
     response.Write("其他说明:"Demostra"br>")
     response.Write("联系人:"ContactMan"br>")
     response.Write("联系方式:"Contact"br>")
     'title=RemoveHTML(aa(i))
     'response.Write("title:"title)
     for n=0 to application.Contents.count
       if(application.Contents(n)=KeyId) then
        ifexit=true    
       end if  
     next 
     if not ifexit then
       application(timei)=KeyId
     '添加到数据库
     '====================================================
     set rs=server.CreateObject("adodb.recordset")
     rs.open "select top 1 * from news order by id desc",conn,3,3
     rs.addnew
     rs("NewsClass")=NewsClass
     rs("City")=City
     rs("Position")=Position
     rs("HouseType")=HouseType
     rs("Level")=Level
     rs("Area")=Area
     rs("Price")=Price
     rs("Demostra")=Demostra
     rs("ContactMan")=ContactMan
     rs("Contact")=Contact
     rs.update
     rs.close
     set rs=nothing
     end if
     '==================================================

    next
    function RemoveTag(body)

     Set regEx = New RegExp
     regEx.Pattern = "[a].*?\/[a]>"
     regEx.IgnoreCase = True
     regEx.Global = True
     Set Matches = regEx.Execute(body)
     dim i,arr(15),ifexit
     i=0
     j=0
     For Each Match in Matches
      TempStr = Match.Value 
      TempStr=replace(TempStr,"td>","")
      TempStr=replace(TempStr,"/td>","")
      TempStr=replace(TempStr,"tr>","")
      TempStr=replace(TempStr,"/tr>","") 
      arr(i)=TempStr 
      i=i+1
      if(i>=15) then
       exit for
      end if
     Next
     Set regEx=nothing
     Set Matches =nothing
     RemoveTag=arr

    end function
    function RegexHtml(body)
     dim r_arr(47),r_temp
     Set regEx2 = New RegExp
     regEx2.Pattern ="a.*?\/a>"
     regEx2.IgnoreCase = True
     regEx2.Global = True
     Set Matches2 = regEx2.Execute(body)
     iii=0
     For Each Match in Matches2

      r_arr(iii)=Match.Value

      iii=iii+1 
     Next
     RegexHtml=r_arr
     set regEx2=nothing
     set Matches2=nothing
    end function
    '======================================================

    conn.close
    set conn=nothing
    %>
    /body>
    /html>

     


      function.asp

     %
    '**************************************************
    '函数名:gotTopic
    '作  用:截字符串,汉字一个算两个字符,英文算一个字符
    '参  数:str   ----原字符串
    '       strlen ----截取长度
    '返回值:截取后的字符串
    '**************************************************
    function gotTopic(str,strlen)
     if str="" then
      gotTopic=""
      exit function
     end if
     dim l,t,c, i
     str=replace(replace(replace(replace(str,"nbsp;"," "),"quot;",chr(34)),"gt;",">"),"lt;","")
     str=replace(str,"?","")
     l=len(str)
     t=0
     for i=1 to l
      c=Abs(Asc(Mid(str,i,1)))
      if c>255 then
       t=t+2
      else
       t=t+1
      end if
      if t>=strlen then
       gotTopic=left(str,i) "…"
       exit for
      else
       gotTopic=str
      end if
     next
     gotTopic=replace(replace(replace(replace(gotTopic," ","nbsp;"),chr(34),"quot;"),">","gt;"),"","lt;")
    end function
    '=========================================================
    '函数:RemoveHTML(strHTML)
    '功能:去除HTML标记
    '参数:strHTML  --要去除HTML标记的字符串
    '=========================================================
    Function RemoveHTML(strHTML)
    Dim objRegExp, Match, Matches
    Set objRegExp = New Regexp

    objRegExp.IgnoreCase = True
    objRegExp.Global = True
    '取闭合的>
    objRegExp.Pattern = ".+?>"
    '进行匹配
    Set Matches = objRegExp.Execute(strHTML)

    ' 遍历匹配集合,并替换掉匹配的项目
    For Each Match in Matches
    strHtml=Replace(strHTML,Match.Value,"")
    Next
    RemoveHTML=strHTML
    Set objRegExp = Nothing
    set Matches=nothing
    End Function

    %>
     


      conn.asp

     %
    'on error resume next
    set conn=server.CreateObject("adodb.connection")
    con= "driver={Microsoft Access Driver (*.mdb)};dbq=" Server.MapPath("stest.mdb")
    conn.open con

    sub connclose
       conn.close
       set conn=nothing  
    end sub
    %>
     


      附:抓取信息的详细页面事例

    总序列号:

    479280  

    信息类别:

    出租

    所在城市:

    济南

    房屋具体位置:

    华龙路华信路交界口

    房屋类型:

    其他

    楼层:

    六层

    使用面积:

    24~240 平方米之间

    房价:

    0  [租赁:元/月,买卖:万元/套]

    其他说明:

    华信商务楼3至6层小空间对外出租(0.5元/平起),本楼属纯商务办公投资使用,可用于办公写字间,周边设施齐全、交通便利(37、80、K95在本楼前经过),全产权、市证,楼内设施包括水、电、暖、电梯设施齐全,有意者可电讯!

    联系人:

    鲁、王

    联系方式:

    88017966、86812217

    信息来源:

    2005-8-4 8:28:55  来自:218.98.86.175

    点击次数:

    19

    上一篇:asp的程序能实现伪静态化的方法
    下一篇:ASP数据库连接方式大全
  • 相关文章
  • 

    © 2016-2020 巨人网络通讯 版权所有

    《增值电信业务经营许可证》 苏ICP备15040257号-8

    实例讲解实现抓取网上房产信息的ASP程序 实例,讲解,实现,抓取,网上,