error handling with web scraping.

fredrerik84

Active Member
Joined
Feb 26, 2017
Messages
383
Hi. I was able to put together this web scraping code with alot of help from this great community:

Rich (BB code):
Option Explicit




Sub SoccerTest(ByVal rr As Long, ByVal HoTeam As String, ByVal AwTeam As String, ByVal SDate As String)




    Dim IE As New SHDocVw.InternetExplorer
    Dim HTMLDoc As New MSHTML.HTMLDocument
    Dim HTMLRows As MSHTML.IHTMLElementCollection
    Dim HTMLRows2 As MSHTML.IHTMLElementCollection
    Dim HTMLRows3 As MSHTML.IHTMLElementCollection
    Dim HTMLRow As MSHTML.IHTMLElement
    Dim League As String
    Dim sheet As Worksheet
    Dim R As Long
    Dim i As Long
    Dim j As Long
    Dim K As Long
    Dim L As Long
    Dim MSG As Long
    Dim vMatch As String
    Dim vStatus As Variant
    Dim mDate As String
    Dim sKO As String
    Dim strURL As String
    Dim sngStartTime As Single
    Dim strErrMsg As String
    Dim blnSuccessful As Boolean
    Dim Convert As String
    Dim country As String
    Dim CR As String
    Dim ShrtCountry As String
    'New
    Dim Test1 As String
    Dim Test2 As String
    Dim KO As String
    Dim MCountry As String
    Dim MLeague As String
    Dim Mscore As String
    Dim HTeam As String
    Dim ATeam As String
    Dim timeout As Boolean
    
    Dim x As Long
    Dim z As Long
    Dim y As Long
    Dim GoalR As String
    Dim GoalS As String
    Dim strLeft As String
    Dim Cell As String
    
    Application.Cursor = xlWait
    
    Test1 = HoTeam
    Test2 = AwTeam
    
    Const MAX_WAIT_SEC As Integer = 6 'change wait time as desired
    Const URL As String = "http://www.xscores.com/"
    
    If TypeName(ActiveSheet) <> "Worksheet" Then
        MsgBox "Please make sure that a worksheet is the active sheet, " & _
            "and try again.", vbExclamation
        Exit Sub
    End If
    
    mDate = SDate
    Set sheet = ActiveWorkbook.Sheets("Conversion")
    vStatus = "all_games"
    strURL = "http://www.xscores.com/" & "soccer" & "/" & vStatus & "/" & Format(mDate, "dd-mm")
    
    On Error GoTo ErrHandler
    
    If vStatus = "scheduled_games" Then
       L = 0
       MSG = 5
    ElseIf vStatus = "live_games" Then
       L = 0
    ElseIf vStatus = "finished_games" Then
       L = 0
       MSG = 4
    Else
       L = 0
       MSG = 3
       
    End If
    
    With Application
        Application.DisplayStatusBar = True
        Application.StatusBar = "Loading results..."
        .DisplayAlerts = False
        .EnableEvents = False
        .ScreenUpdating = False
    End With
    
    
    With IE
        .Navigate strURL
        .Visible = True
        sngStartTime = Timer


        Do While .Busy Or .readyState <> READYSTATE_COMPLETE
            DoEvents
            If Timer - sngStartTime > MAX_WAIT_SEC Then
                strErrMsg = "Unable to connect to :" & vbCrLf & vbCrLf & strURL
                timeout = True
                GoTo ErrHandler
            End If
        Loop
    End With
            
    Set HTMLDoc = IE.document
    Set HTMLRows = HTMLDoc.getElementById("scoretable").getElementsByTagName("tr")
    
    Range("AB" & rr & ":BS" & rr).ClearContents
    Columns("AK:AN").NumberFormat = "@"
    Columns("AP:AR").NumberFormat = "0"
    Columns("BC:BF").NumberFormat = "0"
    
    R = 2
    z = 45
    y = 58
    For i = L To HTMLRows.Length - 1
        sKO = HTMLRows(i).Cells(0).innerText
        If i = 0 Or IsDate(sKO) Then
            KO = sKO
            HTeam = capsf(HTMLRows(i).Cells(IIf(i > 0, 5, 4)).innerText)
            If i > 0 Then
                country = GetCountry(HTMLRows(i).Cells(3).getElementsByTagName("a")(0).getAttribute("title"))
                CR = Replace(country, "(", "")
                CR = Replace(CR, ")", "")
                If CR = "TOURNAMENTS" Then
                   MCountry = "World"
                ElseIf CR = "GAMES" Then
                   MCountry = "World"
                ElseIf CR = "USA" Then
                   MCountry = "USA"
                ElseIf CR = "CAF" Then
                   MCountry = "Africa"
                ElseIf CR = "UEFA" Then
                   MCountry = "Europe"
                Else
                   MCountry = capsf(CR)
                End If
                League = capsf(GetLeague(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******")))
                On Error Resume Next
                ShrtCountry = LCase(Application.VLookup(country, sheet.Range("P1:T259"), 2, False))
                On Error GoTo 0
                If Not IsEmpty(ShrtCountry) Then
                       LCase (ShrtCountry)
                Else
                       ShrtCountry = ""
                End If
                
                If InStr(League, "Round") <> 0 Then
                    MLeague = capsf(fredrerikExtract(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******"))) & " (" & ShrtCountry & ")"
                    League = capsf(fredrerikExtract(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******")))
                ElseIf InStr(League, "Australia") <> 0 Then
                    MLeague = capsf(fredrerikExtract(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******"))) & " (" & ShrtCountry & ")"
                    League = capsf(fredrerikExtract(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******")))
                ElseIf InStr(League, "Stage") <> 0 Then
                    MLeague = capsf(fredrerikExtract(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******"))) & " (" & ShrtCountry & ")"
                    League = capsf(fredrerikExtract(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******")))
                ElseIf InStr(League, "Final") <> 0 Then
                    MLeague = capsf(fredrerikExtract(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******"))) & " (" & ShrtCountry & ")"
                    League = capsf(fredrerikExtract(HTMLRows(i).Cells(4).getElementsByTagName("a")(0).getAttribute("*******")))
                Else
                    MLeague = League & " (" & ShrtCountry & ")"
                End If
                
                If League = "Uefa Champions League" Then
                   MLeague = "Champions League"
                ElseIf League = "Friendly International" Then
                   MLeague = "Friendly International"
                ElseIf League = "International Toulon Youth Festival" Then
                   MLeague = "Int. Toulon Youth Festival"
                ElseIf League = "Premier Soccer League Relegation Play Offs" Then
                   MLeague = "Premier Soccer League (sa)"
                ElseIf League = "Caf Champions League" Then
                   MLeague = "Caf Champions League"
                ElseIf League = "First League Relegation Play Offs" Then
                   MLeague = "Parva Liga (bg)"
                ElseIf League = "Major League Soccer" Then
                   MLeague = "Major League Soccer"
                ElseIf League = "National Womens Soccer League" Then
                   MLeague = "NWSL (Women)"
                End If
            Else
                MCountry = "Country"
                MLeague = "League"
            End If
            Mscore = capsf(HTMLRows(i).Cells(IIf(i > 0, 14, 11)).innerText)
            ATeam = capsf(HTMLRows(i).Cells(IIf(i > 0, 9, 7)).innerText)
            If (Test1 = HTeam) Then
               If (Test2 = ATeam) Then
               
                   'Vennue
                   Cells(26, "AJ").value = "Vennue"
                   
                   'Results
                   Cells(26, "AK").value = "HT"
                   Cells(26, "AL").value = "FT"
                   Cells(26, "AE").value = "FT"
                   Cells(26, "AM").value = "ET"
                   Cells(26, "AN").value = "PN"
                   Cells(26, "AO").value = "Game Status"
                   
                   'Home (lp,Y,R)
                   Cells(26, "AP").value = "League Pos"
                   Cells(26, "AQ").value = "Yellow cards"
                   Cells(26, "AR").value = "Red cards"
                   
                   'Away
                   Cells(26, "BC").value = "League Pos"
                   Cells(26, "BD").value = "Yellow cards"
                   Cells(26, "BE").value = "Red card(s)"
                   
                   'Home (lp,Y,R)
                   Cells(rr, "AP").value = HTMLRows(i).Cells(IIf(i > 0, 6, 4)).innerText
                   Cells(rr, "AQ").value = HTMLRows(i).Cells(IIf(i > 0, 7, 4)).innerText
                   Cells(rr, "AR").value = HTMLRows(i).Cells(IIf(i > 0, 8, 4)).innerText
                   
                   'Away  (lp,Y,R)
                   Cells(rr, "BC").value = HTMLRows(i).Cells(IIf(i > 0, 10, 7)).innerText
                   Cells(rr, "BD").value = HTMLRows(i).Cells(IIf(i > 0, 11, 7)).innerText
                   Cells(rr, "BE").value = HTMLRows(i).Cells(IIf(i > 0, 12, 7)).innerText
                   
                   'Results
                   Cells(rr, "AK").value = ClearText2(HTMLRows(i).Cells(IIf(i > 0, 13, 7)).innerText)
                   Cells(rr, "AE").value = ClearText2(Mscore)
                   Cells(rr, "AL").value = ClearText2(Mscore)
                   Cells(rr, "AM").value = ClearText2(HTMLRows(i).Cells(IIf(i > 0, 15, 4)).innerText)
                   Cells(rr, "AN").value = HTMLRows(i).Cells(IIf(i > 0, 16, 4)).innerText
                   Cells(rr, "AO").value = HTMLRows(i).Cells(1).innerText
                   
                   blnSuccessful = True


                   'Vennue
                   HTMLRows(i).Cells(18).Children(0).Children(0).Click
                   Set HTMLRows3 = HTMLDoc.getElementById("divInfo").getElementsByTagName("tr")
                   Cells(rr, "AJ").value = HTMLRows3(1).Cells(0).innerText
                   HTMLDoc.getElementById("divInfo").getElementsByTagName("img")(0).Click
            
                   'scorers
                   HTMLRows(i).Cells(19).Children(0).Children(0).Click
                   Set HTMLRows2 = HTMLDoc.getElementById("div1").getElementsByTagName("tr")
                   
                   For x = 3 To HTMLRows2.Length - 2
                       If HTMLRows2(x).Cells(0).innerText <> "" Then
                          GoalR = HTMLRows2(x).Cells(0).innerText
                          GoalS = HTMLRows2(x).Cells(1).innerText
                          If GoalR <> "" Then
                             'Cells(z, "AM").value = GoalR
                             strLeft = Left(GoalS, 1)
                             If strLeft = "(" Then
                                GoalS = capsf(GoalS)
                                Cells(rr, z).value = ClearText(GoalS)
                                z = z + 1
                             Else
                                GoalS = capsf(GoalS)
                                Cells(rr, y).value = ClearText(GoalS)
                                y = y + 1
                             End If
                          End If
                       End If
                   Next x
               End If
            End If
            R = R + 1
        End If
    Next i
    
ExitHandler:
    If Not IE Is Nothing Then
        IE.Quit
    End If
    
    With Application
        .DisplayAlerts = True
        .EnableEvents = True
        .ScreenUpdating = True
    End With
    
    Application.Cursor = xlDefault
    Application.StatusBar = False
    Set IE = Nothing
    Set HTMLDoc = Nothing
    Set HTMLRows = Nothing
    Set HTMLRow = Nothing
    Set sheet = Nothing
    
    If timeout = True Then
       Exit Sub
    End If
    
    If blnSuccessful Then
        MsgBox i - MSG & " Matches has been searched. And results where found!", vbInformation
    Else
        MsgBox "Error! No results where found!." & vbNewLine & _
            "Please check if the Nation, League, Home and Away values are correct.", vbExclamation, "Data Not Found!"
    End If
    
    Exit Sub
    
ErrHandler:
    If Len(strErrMsg) > 0 Then
        MsgBox strErrMsg, vbCritical, "Error"
        GoTo ExitHandler
    Else
        If Err <> 0 Then
            MsgBox "Error " & Err.Number & ":  " & Err.Description, vbCritical, "Error"
            Resume ExitHandler
        End If
    End If
    
End Sub


Function ClearText(Cell)
    With CreateObject("VBScript.RegExp")
        .Pattern = "\(\d+\)"
        ClearText = .Replace(Cell, "")
    End With
End Function
Function ClearText2(Cell)
    With CreateObject("VBScript.RegExp")
        .Pattern = "(\d+)-(\d+)"
        ClearText2 = .Replace(Cell, "$1 - $2")
    End With
End Function

Problem is that this code sometimes break down at 3 different points:

Point one:

Rich (BB code):
        Do While .Busy Or .readyState <> READYSTATE_COMPLETE
            DoEvents
            If Timer - sngStartTime > MAX_WAIT_SEC Then
                strErrMsg = "Unable to connect to :" & vbCrLf & vbCrLf & strURL
                timeout = True
                GoTo ErrHandler
            End If

- This is handled by an error timer and just a restart of the code is enough to by pass this.

2.nd error place:


Rich (BB code):
                   'Vennue
                   HTMLRows(i).Cells(18).Children(0).Children(0).Click
                   Set HTMLRows3 = HTMLDoc.getElementById("divInfo").getElementsByTagName("tr")
                   Cells(rr, "AJ").value = HTMLRows3(1).Cells(0).innerText
                   HTMLDoc.getElementById("divInfo").getElementsByTagName("img")(0).Click
- Sometimes the codes goes into infi loop here and there is no error handler here , would be really helpfull if someone could help me add an similar error handler similar to the one I have in the first error place.

3rd error point:

Rich (BB code):
'scorers
                   HTMLRows(i).Cells(19).Children(0).Children(0).Click
                   Set HTMLRows2 = HTMLDoc.getElementById("div1").getElementsByTagName("tr")

                   For x = 3 To HTMLRows2.Length - 2
                       If HTMLRows2(x).Cells(0).innerText <> "" Then
                          GoalR = HTMLRows2(x).Cells(0).innerText
                          GoalS = HTMLRows2(x).Cells(1).innerText
                          If GoalR <> "" Then
                             'Cells(z, "AM").value = GoalR
                             strLeft = Left(GoalS, 1)
                             If strLeft = "(" Then
                                GoalS = capsf(GoalS)
                                Cells(rr, z).value = ClearText(GoalS)
                                z = z + 1
                             Else
                                GoalS = capsf(GoalS)
                                Cells(rr, y).value = ClearText(GoalS)
                                y = y + 1
                             End If
                          End If
                       End If
                   Next x

- also the code goes into infi loop at this point with unknown interval, all in all - this code I would say has a 70% success rate of being run to the end.


I really need help with 2nd and 3rd error point to add some error code at those points to kill the sub if the code stop at these points for some reason

Most of the time if I rerun the code I get my desired results...


Did you guys have similar problem with web scraping before ?

All help as usual is greatly appreciated :)
 
Last edited:

Excel Facts

What does custom number format of ;;; mean?
Three semi-colons will hide the value in the cell. Although most people use white font instead.

Forum statistics

Threads
1,223,236
Messages
6,170,906
Members
452,366
Latest member
TePunaBloke

We've detected that you are using an adblocker.

We have a great community of people providing Excel help here, but the hosting costs are enormous. You can help keep this site running by allowing ads on MrExcel.com.
Allow Ads at MrExcel

Which adblocker are you using?

Disable AdBlock

Follow these easy steps to disable AdBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the icon in the browser’s toolbar.
2)Click on the "Pause on this site" option.
Go back

Disable AdBlock Plus

Follow these easy steps to disable AdBlock Plus

1)Click on the icon in the browser’s toolbar.
2)Click on the toggle to disable it for "mrexcel.com".
Go back

Disable uBlock Origin

Follow these easy steps to disable uBlock Origin

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back

Disable uBlock

Follow these easy steps to disable uBlock

1)Click on the icon in the browser’s toolbar.
2)Click on the "Power" button.
3)Click on the "Refresh" button.
Go back
Back
Top