fredrerik84
Active Member
- Joined
- Feb 26, 2017
- Messages
- 383
Hi. I was able to put together this web scraping code with alot of help from this great community:
Problem is that this code sometimes break down at 3 different points:
Point one:
- This is handled by an error timer and just a restart of the code is enough to by pass this.
2.nd error place:
- Sometimes the codes goes into infi loop here and there is no error handler here , would be really helpfull if someone could help me add an similar error handler similar to the one I have in the first error place.
3rd error point:
- also the code goes into infi loop at this point with unknown interval, all in all - this code I would say has a 70% success rate of being run to the end.
I really need help with 2nd and 3rd error point to add some error code at those points to kill the sub if the code stop at these points for some reason
Most of the time if I rerun the code I get my desired results...
Did you guys have similar problem with web scraping before ?
All help as usual is greatly appreciated
Rich (BB code):
Option Explicit
Sub SoccerTest(ByVal rr As Long, ByVal HoTeam As String, ByVal AwTeam As String, ByVal SDate As String)
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim HTMLRows As MSHTML.IHTMLElementCollection
Dim HTMLRows2 As MSHTML.IHTMLElementCollection
Dim HTMLRows3 As MSHTML.IHTMLElementCollection
Dim HTMLRow As MSHTML.IHTMLElement
Dim League As String
Dim sheet As Worksheet
Dim R As Long
Dim i As Long
Dim j As Long
Dim K As Long
Dim L As Long
Dim MSG As Long
Dim vMatch As String
Dim vStatus As Variant
Dim mDate As String
Dim sKO As String
Dim strURL As String
Dim sngStartTime As Single
Dim strErrMsg As String
Dim blnSuccessful As Boolean
Dim Convert As String
Dim country As String
Dim CR As String
Dim ShrtCountry As String
'New
Dim Test1 As String
Dim Test2 As String
Dim KO As String
Dim MCountry As String
Dim MLeague As String
Dim Mscore As String
Dim HTeam As String
Dim ATeam As String
Dim timeout As Boolean
Dim x As Long
Dim z As Long
Dim y As Long
Dim GoalR As String
Dim GoalS As String
Dim strLeft As String
Dim Cell As String
Application.Cursor = xlWait
Test1 = HoTeam
Test2 = AwTeam
Const MAX_WAIT_SEC As Integer = 6 'change wait time as desired
Const URL As String = "http://www.xscores.com/"
If TypeName(ActiveSheet) <> "Worksheet" Then
MsgBox "Please make sure that a worksheet is the active sheet, " & _
"and try again.", vbExclamation
Exit Sub
End If
mDate = SDate
Set sheet = ActiveWorkbook.Sheets("Conversion")
vStatus = "all_games"
strURL = "http://www.xscores.com/" & "soccer" & "/" & vStatus & "/" & Format(mDate, "dd-mm")
On Error GoTo ErrHandler
If vStatus = "scheduled_games" Then
L = 0
MSG = 5
ElseIf vStatus = "live_games" Then
L = 0
ElseIf vStatus = "finished_games" Then
L = 0
MSG = 4
Else
L = 0
MSG = 3
End If
With Application
Application.DisplayStatusBar = True
Application.StatusBar = "Loading results..."
.DisplayAlerts = False
.EnableEvents = False
.ScreenUpdating = False
End With
With IE
.Navigate strURL
.Visible = True
sngStartTime = Timer
Do While .Busy Or .readyState <> READYSTATE_COMPLETE
DoEvents
If Timer - sngStartTime > MAX_WAIT_SEC Then
strErrMsg = "Unable to connect to :" & vbCrLf & vbCrLf & strURL
timeout = True
GoTo ErrHandler
End If
Loop
End With
Set HTMLDoc = IE.document
Set HTMLRows = HTMLDoc.getElementById("scoretable").getElementsByTagName("tr")
Range("AB" & rr & ":BS" & rr).ClearContents
Columns("AK:AN").NumberFormat = "@"
Columns("AP:AR").NumberFormat = "0"
Columns("BC:BF").NumberFormat = "0"
R = 2
z = 45
y = 58
For i = L To HTMLRows.Length - 1
sKO = HTMLRows(i).Cells(0).innerText
If i = 0 Or IsDate(sKO) Then
KO = sKO
HTeam = capsf(HTMLRows(i).Cells(IIf(i > 0, 5, 4)).innerText)
If i > 0 Then
country = GetCountry(HTMLRows(i).Cells(3).getElementsByTagName("a")(0).getAttribute("title"))
CR = Replace(country, "(", "")
CR = Replace(CR, ")", "")
If CR = "TOURNAMENTS" Then
MCountry = "World"
ElseIf CR = "GAMES" Then
MCountry = "World"
ElseIf CR = "USA" Then
MCountry = "USA"
ElseIf CR = "CAF" Then
MCountry = "Africa"
ElseIf CR = "UEFA" Then
MCountry = "Europe"
Else
MCountry = capsf(CR)
End If
League = capsf(GetLeague(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******")))
On Error Resume Next
ShrtCountry = LCase(Application.VLookup(country, sheet.Range("P1:T259"), 2, False))
On Error GoTo 0
If Not IsEmpty(ShrtCountry) Then
LCase (ShrtCountry)
Else
ShrtCountry = ""
End If
If InStr(League, "Round") <> 0 Then
MLeague = capsf(fredrerikExtract(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******"))) & " (" & ShrtCountry & ")"
League = capsf(fredrerikExtract(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******")))
ElseIf InStr(League, "Australia") <> 0 Then
MLeague = capsf(fredrerikExtract(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******"))) & " (" & ShrtCountry & ")"
League = capsf(fredrerikExtract(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******")))
ElseIf InStr(League, "Stage") <> 0 Then
MLeague = capsf(fredrerikExtract(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******"))) & " (" & ShrtCountry & ")"
League = capsf(fredrerikExtract(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******")))
ElseIf InStr(League, "Final") <> 0 Then
MLeague = capsf(fredrerikExtract(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******"))) & " (" & ShrtCountry & ")"
League = capsf(fredrerikExtract(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******")))
Else
MLeague = League & " (" & ShrtCountry & ")"
End If
If League = "Uefa Champions League" Then
MLeague = "Champions League"
ElseIf League = "Friendly International" Then
MLeague = "Friendly International"
ElseIf League = "International Toulon Youth Festival" Then
MLeague = "Int. Toulon Youth Festival"
ElseIf League = "Premier Soccer League Relegation Play Offs" Then
MLeague = "Premier Soccer League (sa)"
ElseIf League = "Caf Champions League" Then
MLeague = "Caf Champions League"
ElseIf League = "First League Relegation Play Offs" Then
MLeague = "Parva Liga (bg)"
ElseIf League = "Major League Soccer" Then
MLeague = "Major League Soccer"
ElseIf League = "National Womens Soccer League" Then
MLeague = "NWSL (Women)"
End If
Else
MCountry = "Country"
MLeague = "League"
End If
Mscore = capsf(HTMLRows(i).Cells(IIf(i > 0, 14, 11)).innerText)
ATeam = capsf(HTMLRows(i).Cells(IIf(i > 0, 9, 7)).innerText)
If (Test1 = HTeam) Then
If (Test2 = ATeam) Then
'Vennue
Cells(26, "AJ").value = "Vennue"
'Results
Cells(26, "AK").value = "HT"
Cells(26, "AL").value = "FT"
Cells(26, "AE").value = "FT"
Cells(26, "AM").value = "ET"
Cells(26, "AN").value = "PN"
Cells(26, "AO").value = "Game Status"
'Home (lp,Y,R)
Cells(26, "AP").value = "League Pos"
Cells(26, "AQ").value = "Yellow cards"
Cells(26, "AR").value = "Red cards"
'Away
Cells(26, "BC").value = "League Pos"
Cells(26, "BD").value = "Yellow cards"
Cells(26, "BE").value = "Red card(s)"
'Home (lp,Y,R)
Cells(rr, "AP").value = HTMLRows(i).Cells(IIf(i > 0, 6, 4)).innerText
Cells(rr, "AQ").value = HTMLRows(i).Cells(IIf(i > 0, 7, 4)).innerText
Cells(rr, "AR").value = HTMLRows(i).Cells(IIf(i > 0, 8, 4)).innerText
'Away (lp,Y,R)
Cells(rr, "BC").value = HTMLRows(i).Cells(IIf(i > 0, 10, 7)).innerText
Cells(rr, "BD").value = HTMLRows(i).Cells(IIf(i > 0, 11, 7)).innerText
Cells(rr, "BE").value = HTMLRows(i).Cells(IIf(i > 0, 12, 7)).innerText
'Results
Cells(rr, "AK").value = ClearText2(HTMLRows(i).Cells(IIf(i > 0, 13, 7)).innerText)
Cells(rr, "AE").value = ClearText2(Mscore)
Cells(rr, "AL").value = ClearText2(Mscore)
Cells(rr, "AM").value = ClearText2(HTMLRows(i).Cells(IIf(i > 0, 15, 4)).innerText)
Cells(rr, "AN").value = HTMLRows(i).Cells(IIf(i > 0, 16, 4)).innerText
Cells(rr, "AO").value = HTMLRows(i).Cells(1).innerText
blnSuccessful = True
'Vennue
HTMLRows(i).Cells(18).Children(0).Children(0).Click
Set HTMLRows3 = HTMLDoc.getElementById("divInfo").getElementsByTagName("tr")
Cells(rr, "AJ").value = HTMLRows3(1).Cells(0).innerText
HTMLDoc.getElementById("divInfo").getElementsByTagName("img")(0).Click
'scorers
HTMLRows(i).Cells(19).Children(0).Children(0).Click
Set HTMLRows2 = HTMLDoc.getElementById("div1").getElementsByTagName("tr")
For x = 3 To HTMLRows2.Length - 2
If HTMLRows2(x).Cells(0).innerText <> "" Then
GoalR = HTMLRows2(x).Cells(0).innerText
GoalS = HTMLRows2(x).Cells(1).innerText
If GoalR <> "" Then
'Cells(z, "AM").value = GoalR
strLeft = Left(GoalS, 1)
If strLeft = "(" Then
GoalS = capsf(GoalS)
Cells(rr, z).value = ClearText(GoalS)
z = z + 1
Else
GoalS = capsf(GoalS)
Cells(rr, y).value = ClearText(GoalS)
y = y + 1
End If
End If
End If
Next x
End If
End If
R = R + 1
End If
Next i
ExitHandler:
If Not IE Is Nothing Then
IE.Quit
End If
With Application
.DisplayAlerts = True
.EnableEvents = True
.ScreenUpdating = True
End With
Application.Cursor = xlDefault
Application.StatusBar = False
Set IE = Nothing
Set HTMLDoc = Nothing
Set HTMLRows = Nothing
Set HTMLRow = Nothing
Set sheet = Nothing
If timeout = True Then
Exit Sub
End If
If blnSuccessful Then
MsgBox i - MSG & " Matches has been searched. And results where found!", vbInformation
Else
MsgBox "Error! No results where found!." & vbNewLine & _
"Please check if the Nation, League, Home and Away values are correct.", vbExclamation, "Data Not Found!"
End If
Exit Sub
ErrHandler:
If Len(strErrMsg) > 0 Then
MsgBox strErrMsg, vbCritical, "Error"
GoTo ExitHandler
Else
If Err <> 0 Then
MsgBox "Error " & Err.Number & ": " & Err.Description, vbCritical, "Error"
Resume ExitHandler
End If
End If
End Sub
Function ClearText(Cell)
With CreateObject("VBScript.RegExp")
.Pattern = "\(\d+\)"
ClearText = .Replace(Cell, "")
End With
End Function
Function ClearText2(Cell)
With CreateObject("VBScript.RegExp")
.Pattern = "(\d+)-(\d+)"
ClearText2 = .Replace(Cell, "$1 - $2")
End With
End Function
Problem is that this code sometimes break down at 3 different points:
Point one:
Rich (BB code):
Do While .Busy Or .readyState <> READYSTATE_COMPLETE
DoEvents
If Timer - sngStartTime > MAX_WAIT_SEC Then
strErrMsg = "Unable to connect to :" & vbCrLf & vbCrLf & strURL
timeout = True
GoTo ErrHandler
End If
- This is handled by an error timer and just a restart of the code is enough to by pass this.
2.nd error place:
Rich (BB code):
'Vennue
HTMLRows(i).Cells(18).Children(0).Children(0).Click
Set HTMLRows3 = HTMLDoc.getElementById("divInfo").getElementsByTagName("tr")
Cells(rr, "AJ").value = HTMLRows3(1).Cells(0).innerText
HTMLDoc.getElementById("divInfo").getElementsByTagName("img")(0).Click
3rd error point:
Rich (BB code):
'scorers
HTMLRows(i).Cells(19).Children(0).Children(0).Click
Set HTMLRows2 = HTMLDoc.getElementById("div1").getElementsByTagName("tr")
For x = 3 To HTMLRows2.Length - 2
If HTMLRows2(x).Cells(0).innerText <> "" Then
GoalR = HTMLRows2(x).Cells(0).innerText
GoalS = HTMLRows2(x).Cells(1).innerText
If GoalR <> "" Then
'Cells(z, "AM").value = GoalR
strLeft = Left(GoalS, 1)
If strLeft = "(" Then
GoalS = capsf(GoalS)
Cells(rr, z).value = ClearText(GoalS)
z = z + 1
Else
GoalS = capsf(GoalS)
Cells(rr, y).value = ClearText(GoalS)
y = y + 1
End If
End If
End If
Next x
- also the code goes into infi loop at this point with unknown interval, all in all - this code I would say has a 70% success rate of being run to the end.
I really need help with 2nd and 3rd error point to add some error code at those points to kill the sub if the code stop at these points for some reason
Most of the time if I rerun the code I get my desired results...
Did you guys have similar problem with web scraping before ?
All help as usual is greatly appreciated
Last edited: