jonathanwang003
Board Regular
- Joined
- May 9, 2014
- Messages
- 133
Hi All,
The code below used to work for scraping the WSJ for financial statements. They must have changed something on their end because now this macro shows "Access Denied". In order to run, need to enable references (Microsoft HTML Object Library and Microsoft XML, v3.0)
I've tried recording the macro of manually importing data from web. That would work but I'm not familiar with how to configure the macro so that the column headers are not fixed strings to this one download.
Here is the recorded macro that works. The problem is that the column headers would be fixed in this macro and I need to be able to detect what the sources column headers would be.
The code below used to work for scraping the WSJ for financial statements. They must have changed something on their end because now this macro shows "Access Denied". In order to run, need to enable references (Microsoft HTML Object Library and Microsoft XML, v3.0)
I've tried recording the macro of manually importing data from web. That would work but I'm not familiar with how to configure the macro so that the column headers are not fixed strings to this one download.
VBA Code:
Public Ticker As String
Public oXML As MSXML2.XMLHTTP
Public oHTML As MSHTML.HTMLDocument
Public lngRow As Long
Public lngCol As Long
Public lngTable As Long
Public lngX As Long
Public LastRow As Long
Public strUrl(1 To 6) As String
Public rngPasteDest As Range
Public TargetSheet As Worksheet
Sub Download_FinancialStatements()
Application.DisplayAlerts = False
Set oXML = New MSXML2.XMLHTTP
Set oHTML = New MSHTML.HTMLDocument
Application.Calculation = xlManual
Set TargetSheet = Sheets(1)
TargetSheet.Activate
If Ticker = NullString Then
Ticker = "AAPL"
Company = "Apple"
If Ticker = NullString Then Exit Sub
End If
TargetSheet.Cells.Clear
strUrl(1) = "https://quotes.wsj.com/" & Ticker & "/financials/annual/income-statement"
strUrl(2) = "https://quotes.wsj.com/" & Ticker & "/financials/annual/balance-sheet"
strUrl(3) = "https://quotes.wsj.com/" & Ticker & "/financials/annual/cash-flow"
strUrl(4) = "https://quotes.wsj.com/" & Ticker & "/financials/quarter/income-statement"
strUrl(5) = "https://quotes.wsj.com/" & Ticker & "/financials/quarter/balance-sheet"
strUrl(6) = "https://quotes.wsj.com/" & Ticker & "/financials/quarter/cash-flow"
For I = 1 To 6
Set rngPasteDest = TargetSheet.Cells(1, 1)
rngPasteDest.Select
With oXML
.Open "GET", strUrl(I), False
.send
oHTML.body.innerHTML = .responseText
End With
lngX = 0
With oHTML.getElementsByTagName("table")
For lngTable = 0 To .Length - 1
For lngRow = 0 To .Item(lngTable).Rows.Length - 1
For lngCol = 0 To .Item(lngTable).Rows(lngRow).Cells.Length - 2
rngPasteDest.Offset(lngRow + lngX, lngCol).Value = .Item(lngTable).Rows(lngRow).Cells(lngCol).innerText
Next lngCol
Next lngRow
lngX = lngRow + lngX
Next lngTable
End With
Set rngPasteDest = Nothing
Next I
Set oXML = Nothing
Set oHTML = Nothing
End Sub
Here is the recorded macro that works. The problem is that the column headers would be fixed in this macro and I need to be able to detect what the sources column headers would be.
Code:
Sub Recorded()
ActiveWorkbook.Queries.Add Name:="Table 0", Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Source = Web.Page(Web.Contents(""https://www.wsj.com/market-data/quotes/AAPL/financials/annual/income-statement""))," & Chr(13) & "" & Chr(10) & " Data0 = Source{0}[Data]," & Chr(13) & "" & Chr(10) & " #""Changed Type"" = Table.TransformColumnTypes(Data0,{{""Fiscal year is October-September. All values USD Millions."", type text}, {""2019"", type text}, {""2018"", type text}, {""2017"", type text}, {""2016" & _
""", type text}, {""2015"", type text}, {""5-year trend"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Changed Type"""
ActiveWorkbook.Worksheets.Add
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Table 0"";Extended Properties=""""" _
, Destination:=Range("$A$1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [Table 0]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
.ListObject.DisplayName = "Table_0"
.Refresh BackgroundQuery:=False
End With
End Sub