打开APP
userphoto
未登录

开通VIP,畅享免费电子书等14项超值服

开通VIP
webbrowser1提取网页链接

webbrowser1提取网页链接  

2011-07-31 21:47:01|  分类: 默认分类 |字号 订阅

1.

Private Sub Command1_Click()
Dim aa As String
Dim i As Integer
Dim s As String
s = "question"
fname = "C:\TDDOWNLOAD\a.txt"

Open fname For Input As #1
Do Until EOF(1)
Line Input #1, aa
List1.AddItem aa, i
i = i + 1
Loop
Close #1


For i = 0 To List1.ListCount - 1
If InStr(1, List1.List(i), s) > 0 Then
List2.AddItem List1.List(i)
End If
Next
Kill "C:\TDDOWNLOAD\a.txt"
End Sub

Private Sub Command2_Click()
Dim s As String
s = "question"
For i = 0 To List1.ListCount - 1
If InStr(1, List1.List(i), s) > 0 Then
List2.AddItem List1.List(i)
End If
Next

End Sub


Private Sub Form_Load()
WebBrowser1.Navigate "http://zhidao.baidu.com/browse/867"
End Sub


Private Sub List2_DblClick()
    Form2.Show
    Form2.WebBrowser1.Navigate Me.List1.List(Me.List1.ListIndex)
End Sub


Private Sub WebBrowser1_DocumentComplete(ByVal pDisp As Object, URL As Variant)
Dim Document, i, s1, s2
Open "C:\TDDOWNLOAD\a.txt" For Output As #1
For i = 0 To WebBrowser1.Document.links.length - 1
s1 = WebBrowser1.Document.links(i).href
s2 = WebBrowser1.Document.links(i).innertext
Print #1, s1 & "," & s2
Next
Close #1
End Sub

===========================================================================================

Private Sub Form_Load()

WebBrowser1.Navigate "http://zhidao.baidu.com/browse/867"
End Sub

 

Private Sub WebBrowser1_DocumentComplete(ByVal pDisp As Object, URL As Variant)
Dim Document, i, s1, s2
For i = 0 To WebBrowser1.Document.links.length - 1
s1 = WebBrowser1.Document.links(i).href
s2 = WebBrowser1.Document.links(i).innertext
List1.AddItem s2 & " : " & s1
Next
End Sub

.................

Private Declare Function SendMessage Lib "user32" Alias "SendMessageA" _
    (ByVal hwnd As Long, ByVal wMsg As Long, ByVal wParam As Long, _
    lParam As Any) As Long
Const LB_SETHORIZONTALEXTENT = &H194

Private Sub Form_Load()
  WebBrowser1.Navigate "http://zhidao.baidu.com/browse/867"
End Sub
Private Sub Command1_Click()
  Dim TagName, str As String
  Dim count, i, k As Integer
  Dim cols
  List1.Clear
  Set cols = WebBrowser1.Document.All
  count = cols.length
  k = 0
  While i < count
    TagName = cols.Item(i).TagName
    If TagName = "A" Or TagName = "IMG" Then  '查找超链接和img图形
        str = k & "  " & TagName & "... " & cols.Item(i).href
        List1.AddItem (str)       '增加超链接
        SendMessage List1.hwnd, LB_SETHORIZONTALEXTENT, Me.TextWidth(str), ByVal 0&  '为list加水平滚动条
        k = k + 1
    End If
    i = i + 1
  Wend
  Label1.Caption = "本网页共有超级连接:" & k & "  个"
End Sub

============================================================================================

2.


'下载好网页
Private Sub Command3_Click()
Dim fso, ts, re
Set fso = CreateObject("Scripting.FileSystemObject")
Set ts = fso.OpenTextFile("d:\百度知道_VB_全部问题.htm")
s = ts.ReadAll
ts.Close
Set re = CreateObject("VBScript.RegExp")
re.IgnoreCase = True
re.Global = True
re.Pattern = "<a[\s ]+href ?= ?[""'](.*?)[""']"
For Each i In re.Execute(s)
Debug.Print i.submatches(0)
Next
End Sub

3.

Private Sub aa_Load()
Dim XXX As Long
Dim Text As String, Temp As String
Dim A As Long, X As Long, aa As Integer
Dim N As Long
Dim TextA As String

Const Xpath = "G:\MM\桌面\c\" '文件目录
Const OutPath = "G:\MM\桌面/3.txt" '输出文件目录及文件名
Const FindType = ".jpg" '图片类型
Const FindSer = "http:" '查找字符串类型

File1.Path = Xpath
X = 0: XXX = 0
Open OutPath For Output As #2
For aa = 0 To File1.ListCount - 1
Open Xpath & File1.List(aa) For Input As #1
DoEvents
Me.Caption = aa & " " & XXX
Text = ""
Do While Not EOF(1)
Line Input #1, Temp
Text = Text & Temp
Loop
N = 0
X = 0
Do
A = InStr(X + 1, Text, FindType)
If A = 0 Then Exit Do
If A <> 0 Then
For i = 1 To 255
Temp = Mid(Text, A - i, 5)
If Temp = FindSer Then
TextA = Mid(Text, A - i, i + 4)
Print #2, "<a href=" & """" & TextA & """" & ">OK</a>"
XXX = XXX + 1
X = A
Exit For
End If
Next i
End If
N = N + 1
Loop Until N > 50
Close #1
Next aa
Close #2
MsgBox XXX
End Sub
Private Sub Command1_Click()
aa_Load
End Sub

4.

加入webbrowser和scriptlet控件(引用部件Microsoft HTMl object Library

添加一个Listbox控件(用于存放读出的网址)命名为(listurl),个textbox()控件用于打开网址命名为txtsearch

Option Explicit
Dim UrlNow As IHTMLDocument2
Private Sub CmdGeturl_Click()
      'WebBrowser1.Stop
      'Dim UrlNow
      On Error GoTo errordes
      Set UrlNow = WebBrowser1.Document
      If UrlNow Is Nothing Then
          MsgBox "当前页面没有链接", vbInformation, "注意"
      Else
          Dim UrlIndex As Long
          For UrlIndex = 0 To UrlNow.links.length - 1
              ListUrl.AddItem UrlNow.links(UrlIndex)
          Next UrlIndex
      End If
      Exit Sub
errordes:
      MsgBox "未知错误", vbCritical, "错误"
    
End Sub

Private Sub Cmdstart_Click()
      WebBrowser1.Navigate2 txtSearch.Text

End Sub

5.VB获得指定网页里面的图片和连接地址

Option Explicit

'首先在工程中加入对Microsoft Internet Controls的引用
'指定浏览器对象的Document
Private mDocument As Object
Private Sub Command2_Click()
On Error Resume Next
DoEvents
mComGetIEWindows "zcsor的专栏" '给初学者:VB如何操作WEB页的浏览提交———八:获取网页上的链接、图片指向地址"
If mDocument Is Nothing Then
     MsgBox  "未打开指定页"
Else
     Dim mIndex As Long, mIndexEx As Long
     For mIndex = 0 To mDocument.Forms.length - 1        '输出每个FORM
         Print mDocument.Forms(mIndex).Name
         lstLinks.AddItem  "输出连接"
         For mIndexEx = 0 To mDocument.links.length - 1  '输出连接
             lstLinks.AddItem mDocument.links(mIndexEx)
         Next
         lstLinks.AddItem  "图片地址"
         For mIndexEx = 0 To mDocument.images.length - 1  '输出图片
             lstLinks.AddItem mDocument.images(mIndexEx).src      '图片地址
         Next
     Next
     Text1.Text = mDocument.documentElement.innerHTML
End If
End Sub

 

'参数为网页标题
Private Sub mComGetIEWindows(ByVal IETitle As String)
'浏览器对象集合(包含IE也包含资源管理器)
Dim mShellWindow As New SHDocVw.ShellWindows
'循环变量
Dim mIndex As Long
'从第一个浏览器对象循环到最后一个
For mIndex = 0 To mShellWindow.Count - 1
     If VBA.TypeName(mShellWindow.Item(mIndex).Document) =  "HTMLDocument" Then   '如果是IE窗口而不是资源管理器
         If mShellWindow.Item(mIndex).Document.Title = IETitle Then  '如果是指定窗口(用窗口标题判断的,其他也可以,例如URL)
             Set mDocument = mShellWindow.Item(mIndex).Document  '锁定我们要的浏览器对象
             Exit Sub
         End If
     End If
Next mIndex
End Sub

==============================================================================================

 

1.VB判断哪些字段具有超链接并把该超链接提取出来

'主要先取 <a href 和 </a> 这两段间的数据 然后再分离

'=================================

'留个名
'给一个我自己用来分析HTML源码的函数你

'*************************************************************************
'**函 数 名:FindStr
'**中文意译:
'**输 入:ByVal vSourceStr(String) -
'** :ByVal vFunType(Integer) -
'** :Optional ByVal vsStr(String) -
'** :Optional ByVal veStr(String) -
'**输 出:(String) -
'**功能描述:
'** :
'**作 者:秋色烽火
'**日 期:2007-11-20 22:02:05
'*************************************************************************
Public Function FindStr(ByVal vSourceStr As String, ByVal vFunType As Integer, Optional ByVal vsStr As String, Optional ByVal veStr As String) As String
Dim sourceStr, sourceStrtemp, sourceStrtemp2, sStr, eStr, s, E, opStr
'"头部前<b>实体内容</b>尾部后"
sourceStr = vSourceStr
sStr = vsStr
eStr = veStr
Select Case vFunType
Case 0 '实体内容
s = InStr(sourceStr, sStr)
If s <> 0 Then
sourceStr = Mid$(sourceStr, s + Len(sStr))
E = InStr(sourceStr, eStr)
If E <> 0 Then
FindStr = Mid$(sourceStr, 1, E - 1)
Else
FindStr = ""
End If
End If
'**********************
Case 1 '<b>实体内容</b>
sourceStr = FindStr(sourceStr, 0, sStr, eStr)
FindStr = sStr & sourceStr & eStr
'**********************
Case 2 '<b>实体内容
sourceStr = FindStr(sourceStr, 0, sStr, eStr)
FindStr = sStr & sourceStr
'**********************
Case 3 '实体内容</b>
sourceStr = FindStr(sourceStr, 0, sStr, eStr)
FindStr = sourceStr & eStr
'**********************
Case 4 '头部前<b>实体内容</b>
E = InStr(sourceStr, sStr)
If E <> 0 Then
FindStr = Mid$(sourceStr, 1, E - 1) & FindStr(sourceStr, 1, sStr, eStr)
Else
FindStr = ""
End If
'**********************
Case 5 '头部前<b>实体内容
E = InStr(sourceStr, sStr)
If E <> 0 Then
FindStr = Mid$(sourceStr, 1, E - 1) & FindStr(sourceStr, 2, sStr, eStr)
Else
FindStr = ""
End If
'**********************
Case 6 '<b>实体内容</b>尾部后
s = InStr(sourceStr, sStr)
If s <> 0 Then
FindStr = Mid$(sourceStr, s)
Else
FindStr = ""
End If
'**********************
Case 7 '实体内容</b>尾部后
s = InStr(sourceStr, sStr)
If s <> 0 Then
FindStr = Mid$(sourceStr, s + Len(sStr))
Else
FindStr = ""
End If
'**********************
Case 8 '1 多项结果返回 递归调用循环返回用$分隔的多项结果 主要用于split侵害
sourceStrtemp = FindStr(sourceStr, 7, sStr, eStr)
Do While sourceStrtemp <> ""
E = InStr(sourceStrtemp, eStr)
If E <> 0 Then
opStr = opStr & "$$" & Mid$(sourceStrtemp, 1, E - 1)
sourceStrtemp = FindStr(Mid$(sourceStrtemp, E + Len(eStr)), 7, sStr, eStr)
End If
Loop
FindStr = opStr
'**********************
Case 9 '从右向左匹配字符串
' For i = Len(sourceStr) To 1 Step -1
' sourceStrtemp = sourceStrtemp & Mid$(sourceStr, i, 1)
' Next
' DoEvents
' For i = Len(sStr) To 1 Step -1
' sourceStrtemp2 = sourceStrtemp2 & Mid$(sStr, i, 1)
' Next
' DoEvents
sourceStrtemp = StrReverse(sourceStr)
sourceStrtemp2 = StrReverse(sStr)
s = InStr(sourceStrtemp, sourceStrtemp2)
If s <> 0 Then
sourceStrtemp = Mid$(sourceStrtemp, 1, s - 1)
sourceStrtemp2 = ""
For i = Len(sourceStrtemp) To 1 Step -1
sourceStrtemp2 = sourceStrtemp2 & Mid$(sourceStrtemp, i, 1)
Next
DoEvents
FindStr = sourceStrtemp2
Else
FindStr = ""
End If
End Select
End Function

2.

Private Sub Command1_Click()
For i = 0 To WebBrowser1.Document.All.length - 1
'如果是<A ......>标记,则提取其超链接(href)及<A>与</A>间的文本
If UCase(WebBrowser1.Document.All(i).tagname) = "A" Then
Text1.Text = Text1.Text & WebBrowser1.Document.All(i).outertext & ":" & WebBrowser1.Document.All(i).href & vbCrLf
End If
Next i
End Sub

Private Sub Form_Load()
Command1.Caption = "提取超链接"
WebBrowser1.Navigate "http://zhidao.baidu.com/browse/867"
End Sub

====================================================================================================

'text1(0-1),command2(0-2)
Dim isGoUrl As Boolean

Private Sub Command1_Click()
    Command1.Enabled = False
    WebBrowser1.Navigate Combo1.Text
    isGoUrl = True
End Sub

Private Sub Command2_Click(Index As Integer)
    Dim doc As IHTMLDocument2, C
    Set doc = WebBrowser1.Document
    On Error Resume Next
    Label1.Tag = 0
   
    If Index = 0 Then
        For Each C In doc.links
            If Trim$(C.innerText) = Trim$(Text1(0).Text) Then
                Label1.Caption = C.href
                Label1.Tag = 1
                Label2.Caption = "连接已找到,点击可以浏览"
                Exit For
            End If
        Next
    ElseIf Index = 1 Then
        For Each C In doc.links
            If InStr(1, Trim$(C.href), Trim$(Text1(1).Text)) > 0 Then
                Label1.Caption = C.href
                Label1.Tag = 1
                Label2.Caption = "连接已找到,点击可以浏览"
                Exit For
            End If
        Next
       
    ElseIf Index = 2 Then
        RichTextBox1.Text = "查找结果:" & vbCrLf
        RichTextBox1.SelStart = Len(RichTextBox1.Text)
        For Each C In doc.links
             If InStr(1, Trim$(C.href), Trim$(Text1(1).Text)) > 0 Then
                RichTextBox1.SelText = "-------------------" & vbCrLf
                RichTextBox1.SelText = C.href & vbCrLf
                Label1.Tag = Label1.Tag + 1
             End If
        Next
        If Label1.Tag > 0 Then
            RichTextBox1.SelText = "-------------------" & vbCrLf
            RichTextBox1.SelText = "一共找到" & Label1.Tag & "个符合条件的连接!"
        Else
            RichTextBox1.SelText = "没有找到符合条件的连接!"
        End If
    End If
       
    If Label1.Tag = 0 Then
        With Label1
            .Caption = "没有找到指定的连接"
            .Font.Underline = False
            .ForeColor = vbRed
            .MousePointer = 0
        End With
        Label2.Caption = ""
    Else
        With Label1
            .Font.Underline = True
            .ForeColor = vbBlue
            .MousePointer = 99
        End With
    End If
       
End Sub


Private Sub Form_Load()
    WebBrowser1.Navigate "about:blank"
    isGoUrl = False
End Sub

Private Sub Label1_Click()
On Error GoTo e2
    If Label1.Tag = 1 Then
        WebBrowser1.Navigate Label1.Caption
    End If
e2:
End Sub

Private Sub WebBrowser1_DocumentComplete(ByVal pDisp As Object, URL As Variant)
    If Not (pDisp Is WebBrowser1.Object) Then Exit Sub
    If isGoUrl = True And (URL = Combo1.Text Or URL = Combo1.Text & "/") Then
        GetHtml
        Command1.Enabled = True
    End If
End Sub

Sub GetHtml()
    Dim doc As IHTMLDocument2, C
    Set doc = WebBrowser1.Document
    On Error Resume Next
    RichTextBox1.Text = ""
    RichTextBox1.SelText = "=======================================" & vbCrLf
    RichTextBox1.SelText = "一共有 " & doc.links.length & " 个超链接" & vbCrLf
    RichTextBox1.SelText = "=======================================" & vbCrLf
   
    For Each C In doc.links
        RichTextBox1.SelText = C.innerText & " [" & C.href & "]" & vbCrLf
    Next
End Sub

=============================================================================================

Dim isGoUrl As Boolean

Private Sub Command1_Click()
    Command1.Enabled = False
    WebBrowser1.Navigate Combo1.Text
    isGoUrl = True
End Sub

Private Sub Command2_Click()
    Dim doc As IHTMLDocument2, C
    Set doc = WebBrowser1.Document
    On Error Resume Next
    Label1.Tag = 0
    For Each C In doc.links
        If Trim$(C.innerText) = Trim$(Text1.Text) Then
            Label1.Caption = C.href
            Label1.Tag = 1
            Label2.Caption = "点击下面的连接在上面的窗口打开"
            Exit For
        End If
    Next
    If Label1.Tag = 0 Then
        With Label1
            .Caption = "没有找到指定的连接"
            .Font.Underline = False
            .ForeColor = vbRed
            .MousePointer = 0
        End With
        Label2.Caption = ""
    Else
        With Label1
            .Font.Underline = True
            .ForeColor = vbBlue
            .MousePointer = 99
        End With
    End If
       
End Sub

Private Sub Form_Load()
    WebBrowser1.Navigate "about:blank"
    isGoUrl = False
End Sub

Private Sub Label1_Click()
On Error GoTo e2
    If Label1.Tag = 1 Then
        WebBrowser1.Navigate Label1.Caption
    End If
e2:
End Sub

Private Sub WebBrowser1_DocumentComplete(ByVal pDisp As Object, URL As Variant)
    If Not (pDisp Is WebBrowser1.Object) Then Exit Sub
    If isGoUrl = True And (URL = Combo1.Text Or URL = Combo1.Text & "/") Then
        GetHtml
        Command1.Enabled = True
    End If
End Sub

Sub GetHtml()
    Dim doc As IHTMLDocument2, C
    Set doc = WebBrowser1.Document
    On Error Resume Next
    RichTextBox1.Text = ""
    RichTextBox1.SelText = "=======================================" & vbCrLf
    RichTextBox1.SelText = "一共有 " & doc.links.length & " 个超链接" & vbCrLf
    RichTextBox1.SelText = "=======================================" & vbCrLf
   
    For Each C In doc.links
        RichTextBox1.SelText = C.innerText & " [" & C.href & "]" & vbCrLf
    Next
End Sub

================================================================================================

提取图片链接
Private Declare Function ShellExecute Lib "shell32.dll" Alias "ShellExecuteA" _
          (ByVal hwnd As Long, _
          ByVal lpOperation As String, _
          ByVal lpFile As String, _
          ByVal lpParameters As String, _
          ByVal lpDirectory As String, _
          ByVal nShowCmd As Long) As Long
Private Sub GetLinks()
  Dim L     As Integer
  Dim i     As Integer
  Dim Varl     As Variant

  Set Doc = WebBrowser1.Document
  Set All = Doc.images         '取图片的连接
  L = All.length
  For i = 0 To L - 1
        Set Varl = All.Item(i, varempty)
          Text1.Text = Text1.Text & vbCrLf & Varl.href & vbCrLf
        Set Varl = Nothing
  Next i
  Set All = Nothing
  Set Doc = Nothing
  End Sub

Private Sub Cmd_get_Click()

If Txt_url = "" Then
MsgBox "先输入网址,不要那么着急"
Exit Sub
End If
GetLinks
End Sub

Private Sub Cmd_load_Click()
A = Txt_url.Text
If Left(A, 7) <> "http://" Then '判断连接前是否有"http://"字符串
Text1.Text = "http://" & Txt_url.Text
 End If
Text1 = ""
If Txt_url = "" Then
MsgBox "没输入网地"
Exit Sub
End If
WebBrowser1.Navigate Txt_url.Text
Lab_Tip.Caption = "网地加载完成,请点击提取图片按钮......"

End Sub

Private Sub Command3_Click()
dhk.CancelError = False
dhk.ShowSave
save = dhk.FileName
Open save For Output As #1
Print #1, Text1
Close #1
MsgBox "输出成功", vbOKOnly, "恭喜!" '输出结果
Lab_Tip.Caption = "请自行添加注释"
End Sub

本站仅提供存储服务,所有内容均由用户发布,如发现有害或侵权内容,请点击举报
打开APP,阅读全文并永久保存 查看更多类似文章
猜你喜欢
类似文章
【热】打开小程序,算一算2024你的财运
VB关于webbrowser相关操作大全
VB中如何保存Webbrowser中的整个页面到一幅图片
WebBrowser 技巧
IE webbrowser技巧集
关于webbrower控件的使用
webbrowser?原窗口打开页面代码
更多类似文章 >>
生活服务
热点新闻
分享 收藏 导长图 关注 下载文章
绑定账号成功
后续可登录账号畅享VIP特权!
如果VIP功能使用有故障,
可点击这里联系客服!

联系客服