Hi. So I have this web scrape code that pulls data from a work website using Internet Explorer. Works fine. However, when there is a lot of data it is slow. So I have been trying to convert it using Get request since it seems to be quicker. However, it pulls nothing. Running the code step by step it skips all lines after “For Each Div in Divs” and goes straight to End sub. Here are my the two codes. I thought I only changed what needed to in order to switch it form IE to Get request. Thank you in advance!
VBA Code:
[/
Dim Document As HTMLDocument
'
Dim Div As IHTMLElement
Dim H3 As IHTMLElement
Dim Table As IHTMLElement
Dim TD As IHTMLElement
Dim TR As IHTMLElement
'
Dim Divs As IHTMLElementCollection
Dim Tables As IHTMLElementCollection
Dim TDs As IHTMLElementCollection
Dim TRs As IHTMLElementCollection
'
Dim Column As Integer
Dim Row As Integer
'
Dim Browser As InternetExplorer
'
Dim URL As String
'
Dim ws As Worksheet
'
Set ws = ThisWorkbook.Worksheets("Setup")
'
Row = 1
Column = 1
'
Set ws = Sheets("PROCESS")
'
ws.Cells.Clear
'
URL = "Work URL"
'
Set Browser = New InternetExplorerMedium
'
Browser.navigate URL
'
' Wait for page to load
Do While Browser.Busy Or Browser.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
'
' Scan the document
Set Document = Browser.Document
'
Set Divs = Document.getElementById("secondaryProductivityList").getElementsByTagName("div")
'
For Each Div In Divs
Set H3 = Div.getElementsByTagName("h3")(0)
'
If Not Div.className = "floatHeader" And Not H3 Is Nothing Then
ws.Cells(Row, 1).Value = H3.innerText
Row = Row + 1
'
Set Tables = Div.getElementsByTagName("table")
Set Table = Tables(0)
Set TRs = Table.getElementsByTagName("tr")
'
For Each TR In TRs
Column = 1
'
Set TDs = TR.getElementsByTagName("th")
'
For Each TD In TDs
ws.Cells(Row, Column).Value = TD.innerText
ws.Cells(Row, Column).Font.Bold = True
'
If TD.getAttribute("colspan") Then
Column = Column + TD.getAttribute("colspan")
Else
Column = Column + 1
End If
Next
'
Set TDs = TR.getElementsByTagName("td")
'
For Each TD In TDs
ws.Cells(Row, Column).Value = TD.innerText
Column = Column + 1
Next
'
Row = Row + 1
Next
End If
'
Row = Row + 1
Next
'
Browser.Quit
'
Get Request Code
Dim Table As IHTMLElement
Dim Tables As IHTMLElementCollection
Dim Div As IHTMLElement
Dim Divs As IHTMLElementCollection
Dim H3 As IHTMLElement
Dim TR As IHTMLElement
Dim TRs As IHTMLElementCollection
Dim TD As IHTMLElement
Dim TDs As IHTMLElementCollection
Dim Row As Integer
Dim Column As Integer
Dim ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Setup")
Row = 1
Column = 1
Set ws = Sheets("PROCESS")
ws.Cells.Clear
Dim H As Object, doc As New HTMLDocument
Set H = CreateObject("WinHTTP.WinHTTPRequest.5.1")
URL = "WORK URL"
Debug.Print URL
ReTry:
H.SetAutoLogonPolicy 0
H.setTimeouts 0, 0, 0, 0
H.Open "GET", URL, False
H.send
H.waitForResponse
If H.Status <> 200 Then
MsgBox H.Status & " - " & H.statusText
Exit Sub
End If
Debug.Print URL
doc.body.innerHTML = H.responseText
Set Divs =doc.getElementById("secondaryProductivityList").getElementsByTagName("div")
For Each Div In Divs
Set H3 = Div.getElementsByTagName("h3")(0)
If Not Div.className = "floatHeader" And Not H3 Is Nothing Then
ws.Cells(Row, 1).Value = H3.innerText
Row = Row + 1
Set Tables = Div.getElementsByTagName("table")
Set Table = Tables(0)
Set TRs = Table.getElementsByTagName("tr")
For Each TR In TRs
Column = 1
Set TDs = TR.getElementsByTagName("th")
For Each TD In TDs
ws.Cells(Row, Column).Value = TD.innerText
ws.Cells(Row, Column).Font.Bold = True
If TD.getAttribute("colspan") Then
Column = Column + TD.getAttribute("colspan")
Else
Column = Column + 1
End If
Next
Set TDs = TR.getElementsByTagName("td")
For Each TD In TDs
ws.Cells(Row, Column).Value = TD.innerText
Column = Column + 1
Next
Row = Row + 1
Next
End If
Row = Row + 1
Next
End Sub
Last edited: