Rich (BB code):
Sub BETWEF()
Sheets("BetEX").Select
Dim httpReq As WinHttp.WinHttpRequest
Dim HTMLdoc As HTMLDocument
Dim para As HTMLParaElement
Dim tRows As IHTMLElementCollection
Dim tRow As HTMLTableRow
Dim URLs As Range, URL As Range
Dim destSheet As Worksheet
Dim parts As Variant
Dim matchURL As String, matchOddsURL As String
Dim r As Long
Dim matchData(1 To 7) As Variant
Dim HTML As String
Dim bookmakerFound As Boolean
Dim startTime As Single
startTime = Timer
Set destSheet = ThisWorkbook.Worksheets("WEB")
With destSheet
.UsedRange.ClearContents
.Range("A1:J1").Value = Array("LEG", "SEASON", "DATE", "HOME", "AWAY", "RESULT", "HALFS", "W", "D", "L")
End With
r = 2
With ThisWorkbook.Worksheets("BetEX")
Set URLs = .Range("D2", .Cells(Rows.Count, "D").End(xlUp))
End With
Set httpReq = New WinHttp.WinHttpRequest
For Each URL In URLs
'Request the match result page by removing "#1x2" from the URL
matchURL = Replace(URL.Value, "#1x2", "")
Debug.Print matchURL
With httpReq
.Open "GET", matchURL, False
.setRequestHeader "Host", "www.betexplorer.com"
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko" 'Windows 10, IE11
.setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
.setRequestHeader "Accept-Language", "en-US,en;q=0.5"
.setRequestHeader "Upgrade-Insecure-Requests", "1"
.setRequestHeader "Referer", "http://www.betexplorer.com/soccer/italy/serie-a-2015-2016/results/"
Debug.Print .Status, .statusText
'Put response in a HTMLDocument for parsing
Set HTMLdoc = New HTMLDocument
HTMLdoc.body.innerHTML = .responseText
End With
DoEvents
matchData(1) = HTMLdoc.getElementsByClassName("list-breadcrumb__item")(2).innerText
matchData(2) = HTMLdoc.getElementsByClassName("list-breadcrumb__item")(3).innerText
'Date and time of match is in data-dt attribute of "match-date" P element:
'< P id=match-date class=list-details__item__date data-dt="15,5,2016,18,00">< /P>
Set para = HTMLdoc.getElementById("match-date")
parts = Split(para.getAttribute("data-dt"), ",")
matchData(3) = DateSerial(parts(2), parts(1), parts(0)) + TimeSerial(parts(3) + 7, parts(4), 0)
matchData(4) = HTMLdoc.getElementsByTagName("h2")(0).innerText
matchData(5) = HTMLdoc.getElementsByTagName("h2")(1).innerText
'Construct match odds URL from match result URL
parts = Split(URL.Value, "/")
matchOddsURL = "http://www.betexplorer.com/gres/ajax/matchodds.php?p=1&b=1x2&e=" & parts(7)
Debug.Print matchOddsURL
'Request the match odds data. The response is a JSON string containing HTML which itself contains the odds data
With httpReq
.Open "GET", matchOddsURL, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko" 'Windows 10, IE11
.setRequestHeader "Accept", "application/json, text/javascript, */*; q=0.01"
.setRequestHeader "Accept-Language", "en-US,en;q=0.5"
.setRequestHeader "X-Requested-With", "XMLHttpRequest"
.setRequestHeader "Referer", URL.Value
Debug.Print .Status, .statusText
'Extract HTML from JSON response and put in HTMLDocument
HTML = Mid(.responseText, Len("{'odds':'") + 1) 'remove {"odds":" at the start
HTML = Left(HTML, Len(HTML) - 2) 'remove "} at the end
HTML = Replace(HTML, "", "") 'remove newlines
HTML = Replace(HTML, "", "") 'remove escape characters (assumes that every "" is the escape character preceding the escaped character)
Set HTMLdoc = New HTMLDocument
HTMLdoc.body.innerHTML = HTML
End With
DoEvents
bookmakerFound = False
Set tRows = HTMLdoc.getElementsByTagName("TR")
For Each tRow In tRows
If tRow.getElementsByTagName("TABLE").Length = 0 Then 'no inner tables?
If InStr(1, tRow.innerText, "bet365", vbTextCompare) > 0 Then
bookmakerFound = True
destSheet.Cells(r, "A").Resize(1, 7) = matchData
destSheet.Cells(r, "H").Value = tRow.Cells(3).getAttribute("data-odd")
destSheet.Cells(r, "I").Value = tRow.Cells(4).getAttribute("data-odd")
destSheet.Cells(r, "J").Value = tRow.Cells(5).getAttribute("data-odd")
r = r + 1
End If
End If
Next
If Not bookmakerFound Then
destSheet.Cells(r, "A").Resize(1, 7) = matchData
r = r + 1
End If
Next
Debug.Print "Elapsed time = " & Timer - startTime & " seconds"
Debug.Print "Elapsed time = " & Format(TimeSerial(0, 0, Timer - startTime), "hh:mm:ss")
End Sub
Can you tell me why the VBA scraper Green code doesn't work?
It's John's work.
But he sadly answered only that he did not know.
So I ask other people.
https://www.betexplorer.com/soccer/south-korea/k-league-1/suwon-bluewings-seoul/0IFC6i6I/
Last edited by a moderator: