jonathanwang003
Board Regular
- Joined
- May 9, 2014
- Messages
- 133
I used to scrape financial statements cleanly into spreadsheets by examining the HTML as a table and ensuring there is data by checking the number of columns and rows. This code used to work, but I'm redirecting to advfn.com and I'm having trouble properly grabbing the table.
This code is a recording of the macro where I can dump the table I want using the data import from web feature in Excel, but it's incredibly sloppy. Is this something that can be cleaned up?
Code:
'Download Info
Public oXML As MSXML2.XMLHTTP
Public oHTML As MSHTML.HTMLDocument
Public lngRow As Long
Public lngCol As Long
Public lngTable As Long
Public lngX As Long
Public strUrl(1 To 6) As String
Public rngPasteDest As Range
sub Download_Financials()
Set oXML = New MSXML2.XMLHTTP
Set oHTML = New MSHTML.HTMLDocument
Set rngPasteDest = Range("A1")
strUrl(1) = "https://www.advfn.com/stock-market/NASDAQ/SMCI/financials"
With oXML
.Open "GET", strUrl(1), False
.send
oHTML.body.innerHTML = .ResponseText
End With
lngX = 0
With oHTML.getElementsByTagName("Table 1")
If .Length <= 1 Then
If AttemptCount >= 10 Then
AttemptCount = 0
sheets(1).Cells.Clear
Exit Sub
End If
AttemptCount = AttemptCount + 1
GoTo ParsePage
End If
For lngTable = 0 To .Length - 1
For lngRow = 0 To .Item(lngTable).Rows.Length - 1
For lngCol = 0 To .Item(lngTable).Rows(lngRow).Cells.Length - 2
rngPasteDest.Offset(lngRow + lngX, lngCol).Value = .Item(lngTable).Rows(lngRow).Cells(lngCol).innerText
Next lngCol
Next lngRow
lngX = lngRow + lngX
Next lngTable
End With
Set rngPasteDest = Nothing
End Sub
This code is a recording of the macro where I can dump the table I want using the data import from web feature in Excel, but it's incredibly sloppy. Is this something that can be cleaned up?
VBA Code:
ActiveWorkbook.Queries.Add Name:="Table 1 (2)", Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Source = Web.BrowserContents(""https://www.advfn.com/stock-market/NASDAQ/SMCI/financials"")," & Chr(13) & "" & Chr(10) & " #""Extracted Table From Html"" = Html.Table(Source, {{""Column1"", ""DIV[id='quarterIncomeStatement'] > DIV.table-responsive > TABLE.table.financial-table:nth-child(1) > * > TR > :nth-child(1)""}, {""Column2"", ""DIV[id='quarterIncomeStatement'] > DIV.table-res" & _
"ponsive > TABLE.table.financial-table:nth-child(1) > * > TR > :nth-child(2)""}, {""Column3"", ""DIV[id='quarterIncomeStatement'] > DIV.table-responsive > TABLE.table.financial-table:nth-child(1) > * > TR > :nth-child(3)""}, {""Column4"", ""DIV[id='quarterIncomeStatement'] > DIV.table-responsive > TABLE.table.financial-table:nth-child(1) > * > TR > :nth-child(4)""}, " & _
"{""Column5"", ""DIV[id='quarterIncomeStatement'] > DIV.table-responsive > TABLE.table.financial-table:nth-child(1) > * > TR > :nth-child(5)""}, {""Column6"", ""DIV[id='quarterIncomeStatement'] > DIV.table-responsive > TABLE.table.financial-table:nth-child(1) > * > TR > :nth-child(6)""}}, [RowSelector=""DIV[id='quarterIncomeStatement'] > DIV.table-responsive > TABLE." & _
"table.financial-table:nth-child(1) > * > TR""])," & Chr(13) & "" & Chr(10) & " #""Changed Type"" = Table.TransformColumnTypes(#""Extracted Table From Html"",{{""Column1"", type text}, {""Column2"", type text}, {""Column3"", type text}, {""Column4"", type text}, {""Column5"", type text}, {""Column6"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Changed Type"""
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Table 1 (2)"";Extended Properties=""""" _
, Destination:=Range("$A$1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [Table 1 (2)]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
.ListObject.DisplayName = "Table_1__2"
.Refresh BackgroundQuery:=False
End With