amirkhosravi
New Member
- Joined
- Jul 5, 2020
- Messages
- 1
- Office Version
- 2019
- Platform
- Windows
Hi,
I am trying to scrape and extract data from the link below by writing code in vba in excel:
.:TSETMC:. :: دیده بان بازار پیشرفته
I used different techniques:
I should say that other links of this site have the similar behavior, for example: .:TSETMC:. :: اطلاعات نماد.
I guess the site is designed dynamically and uses JavaScript to load contents during the loading procedure. Also, when using excel vba, it seems that the server recognizes that the request is not sent from a browser.
Please help to find a solution and scrape the table in the mentioned URL.
Sub CreateMainList()
Dim MainURL As String
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim MainDiv As MSHTML.IHTMLElement
Dim MainDivChildren As MSHTML.IHTMLElementCollection
Dim Res As String
Dim price As Integer
'MainURL = ThisWorkbook.Worksheets("Home").Range("C2").Value
MainURL = ".:TSETMC:. :: دیده بان بازار پیشرفته"
XMLReq.Open "GET", MainURL, False
'XMLReq.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
'XMLReq.setRequestHeader "Content-Type", "text/html; charset=utf-8"
'XMLReq.setRequestHeader "Content-Type", "text/html; charset=utf-8"
XMLReq.setRequestHeader "Accept-Language", "en-US,en;q=0.5"
XMLReq.setRequestHeader "Connection", "keep-alive"
XMLReq.setRequestHeader "accept-Encoding", "gzip , deflate"
XMLReq.setRequestHeader "accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
XMLReq.setRequestHeader "DNT", "1"
'XMLReq.setRequestHeader "Upgrade-Insecure- Requests", "1"
XMLReq.setRequestHeader "Set-Cookie", "ASP.NET_SessionId=cd03mksrog04g2ocuaeqxweb; path=/; HttpOnly"
'XMLReq.setRequestHeader "Cache-Control", "Max-age = 0"
'XMLReq.setRequestHeader "Cookie", MyCookie
XMLReq.send
If XMLReq.Status <> 200 Then
MsgBox "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
Exit Sub
End If
' Get the webpage response data into a variable.
'response = StrConv(request.responseBody, vbUnicode)
HTMLDoc.body.innerHTML = XMLReq.responseText
Debug.Print XMLReq.responseText
Set XMLReq = Nothing
Set MainDiv = HTMLDoc.getElementById("main")
End Sub
and the output is:
<!doctype html><html><head><title>.:TSETMC:. :: ÏíÏå ÈÇä ÈÇÒÇÑ íÔÑÝÊå</title><!-- Global site tag (gtag.js) - Google Analytics --><script async src="https://www.googletagmanager.com/gtag/js?id=UA-63076930-1"></script><script> window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config', 'UA-63076930-1');</script><script>var DBFlag='1' ;var LongRunnigPagesSite='http://cdn2.tsetmc.com';function ens(ty,sv){var lv=localStorage.getItem("v_"+ty);var t;if (1==1 || lv!=sv){var oX=new XMLHttpRequest();oX.open('GET', 'tsev2/res/loader.aspx?t='+ty+'&_'+sv,false);oX.send(null);t=oX.responseText;if(t[t.length-1]!=';') return;localStorage.setItem("v_"+ty,sv);localStorage.setItem("t_"+ty,t)}else{t=localStorage.getItem("t_"+ty)}if (ty=='s'){document.write("<style>"+t+"</style>")}else{var h=document.getElementsByTagName('HEAD').item(0);var s=document.createElement("script");s.defer=true;s.text=t;h.appendChild(s)}}var version='472';ens('j',version); ens('s',v
ersion); (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) })</script></head><body><div style="display:none">PSB is being developed by Pooya Paridel.</div><div id="FastView" class="slideTop"></div><div id="TopBar"><div id="company"><div id="company_logo"></div><div id="company_text">ÔÑßÊ ãÏíÑíÊ ÝäÇæÑí ÈæÑÓ ÊåÑÇä<br/> Tehran Securities Exchange Technology Management Co</div></div><div id="menu_btns"></div><script>ShowMenuIcon(1)</script></div><div class="MainContainer"><form method="post" action="./Loader.aspx?ParTree=15131F" id="form1">
<div class="aspNetHidden">
<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="" />
</div>
<div id="display" style="direction:rtl;font-size:12px;text-align:right">
</div>
<script>
ens('g', version);
if (location.href.split('&')[1] == "E")
{ens('me', version); mw = MarketWatchPlus(); mw.StartMarketWatch() }
else {
ens('p', version); ens('tw', version); tw = TweetEngine();mw = MarketWatchPlus(); mw.StartMarketWatch()
}
</script>
</form>
</div></body></html>
I am trying to scrape and extract data from the link below by writing code in vba in excel:
.:TSETMC:. :: دیده بان بازار پیشرفته
I used different techniques:
- MSXML2.XMLHTTP60 it does not work
- MSXML2.ServerXMLHTTP60 it does not work
- SHDocVw.InternetExplorer beside it is too slow, it rarely works.
I should say that other links of this site have the similar behavior, for example: .:TSETMC:. :: اطلاعات نماد.
I guess the site is designed dynamically and uses JavaScript to load contents during the loading procedure. Also, when using excel vba, it seems that the server recognizes that the request is not sent from a browser.
Please help to find a solution and scrape the table in the mentioned URL.
Sub CreateMainList()
Dim MainURL As String
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim MainDiv As MSHTML.IHTMLElement
Dim MainDivChildren As MSHTML.IHTMLElementCollection
Dim Res As String
Dim price As Integer
'MainURL = ThisWorkbook.Worksheets("Home").Range("C2").Value
MainURL = ".:TSETMC:. :: دیده بان بازار پیشرفته"
XMLReq.Open "GET", MainURL, False
'XMLReq.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
'XMLReq.setRequestHeader "Content-Type", "text/html; charset=utf-8"
'XMLReq.setRequestHeader "Content-Type", "text/html; charset=utf-8"
XMLReq.setRequestHeader "Accept-Language", "en-US,en;q=0.5"
XMLReq.setRequestHeader "Connection", "keep-alive"
XMLReq.setRequestHeader "accept-Encoding", "gzip , deflate"
XMLReq.setRequestHeader "accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
XMLReq.setRequestHeader "DNT", "1"
'XMLReq.setRequestHeader "Upgrade-Insecure- Requests", "1"
XMLReq.setRequestHeader "Set-Cookie", "ASP.NET_SessionId=cd03mksrog04g2ocuaeqxweb; path=/; HttpOnly"
'XMLReq.setRequestHeader "Cache-Control", "Max-age = 0"
'XMLReq.setRequestHeader "Cookie", MyCookie
XMLReq.send
If XMLReq.Status <> 200 Then
MsgBox "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
Exit Sub
End If
' Get the webpage response data into a variable.
'response = StrConv(request.responseBody, vbUnicode)
HTMLDoc.body.innerHTML = XMLReq.responseText
Debug.Print XMLReq.responseText
Set XMLReq = Nothing
Set MainDiv = HTMLDoc.getElementById("main")
End Sub
and the output is:
<!doctype html><html><head><title>.:TSETMC:. :: ÏíÏå ÈÇä ÈÇÒÇÑ íÔÑÝÊå</title><!-- Global site tag (gtag.js) - Google Analytics --><script async src="https://www.googletagmanager.com/gtag/js?id=UA-63076930-1"></script><script> window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config', 'UA-63076930-1');</script><script>var DBFlag='1' ;var LongRunnigPagesSite='http://cdn2.tsetmc.com';function ens(ty,sv){var lv=localStorage.getItem("v_"+ty);var t;if (1==1 || lv!=sv){var oX=new XMLHttpRequest();oX.open('GET', 'tsev2/res/loader.aspx?t='+ty+'&_'+sv,false);oX.send(null);t=oX.responseText;if(t[t.length-1]!=';') return;localStorage.setItem("v_"+ty,sv);localStorage.setItem("t_"+ty,t)}else{t=localStorage.getItem("t_"+ty)}if (ty=='s'){document.write("<style>"+t+"</style>")}else{var h=document.getElementsByTagName('HEAD').item(0);var s=document.createElement("script");s.defer=true;s.text=t;h.appendChild(s)}}var version='472';ens('j',version); ens('s',v
ersion); (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) })</script></head><body><div style="display:none">PSB is being developed by Pooya Paridel.</div><div id="FastView" class="slideTop"></div><div id="TopBar"><div id="company"><div id="company_logo"></div><div id="company_text">ÔÑßÊ ãÏíÑíÊ ÝäÇæÑí ÈæÑÓ ÊåÑÇä<br/> Tehran Securities Exchange Technology Management Co</div></div><div id="menu_btns"></div><script>ShowMenuIcon(1)</script></div><div class="MainContainer"><form method="post" action="./Loader.aspx?ParTree=15131F" id="form1">
<div class="aspNetHidden">
<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="" />
</div>
<div id="display" style="direction:rtl;font-size:12px;text-align:right">
</div>
<script>
ens('g', version);
if (location.href.split('&')[1] == "E")
{ens('me', version); mw = MarketWatchPlus(); mw.StartMarketWatch() }
else {
ens('p', version); ens('tw', version); tw = TweetEngine();mw = MarketWatchPlus(); mw.StartMarketWatch()
}
</script>
</form>
</div></body></html>