I use the code below to scrape data from ~200 identical pages data (which I have listed on a sheet titled "URLs" starting in cell A2). However, the site seems to have recently implemented some type of server traffic-management / throttling tool that makes my code fail after around 125 - 150 URLs instead of completing all 200 like it used to. When the code fails, I actually can't even go to any page on the domain in my browser...the browser just returns some type of timeout / 'cannot access server' error, which seems to last for a few minutes. (My guess is that they instituted it as some kind of DDOS / bot-deterrent.)
I don't know exactly what the # of requested pages is that triggers the error / domain-cutoff, but I seem to be able to get through 100 URLs without problem...so I want to amend the loop below so that it pauses 60-seconds after the first 100 URLs, which I think should be enough to avoid the block / code-failure.
I don't know exactly what the # of requested pages is that triggers the error / domain-cutoff, but I seem to be able to get through 100 URLs without problem...so I want to amend the loop below so that it pauses 60-seconds after the first 100 URLs, which I think should be enough to avoid the block / code-failure.
VBA Code:
Sub Quote_loop_through()
Dim h1 As Worksheet, h2 As Worksheet
Dim u1 As Long, u2 As Long
Dim MyUrl As String
'
Application.ScreenUpdating = False
Application.StatusBar = False
Set h1 = Sheets("URLs") 'origin
Set h2 = Sheets("My_Quotes") 'destiny
'
u1 = h1.Range("A" & Rows.Count).End(xlUp).Row
For i = 2 To u1
MyUrl = h1.Cells(i, "A").Value
Application.StatusBar = "import data : " & i - 1 & " of : " & u1 - 1
u2 = h2.Range("A" & Rows.Count).End(xlUp).Row + 1
With h2.QueryTables.Add(Connection:="URL;" & MyUrl, Destination:=h2.Range("A" & u2))
.Name = "quotes_page.php?symbol=BMO"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlOverwriteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = False
.RefreshPeriod = 0
.WebSelectionType = xlSpecifiedTables
.WebFormatting = xlWebFormattingNone
.WebTables = "1"
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
u3 = h2.Range("A" & Rows.Count).End(xlUp).Row
h2.Range("P" & u2 & ":P" & u3).Value = MyUrl
Next
Application.StatusBar = False
Application.ScreenUpdating = True
Application.CutCopyMode = False
End Sub