Sharid
Well-known Member
- Joined
- Apr 22, 2007
- Messages
- 1,066
- Office Version
- 2016
- Platform
- Windows
My code extracts emails from a website, this bit is fine
It can also extract URLs from a site, the PROBLEM is that it extracts all LINKS and not just the domain name
Also I can only extract one or the other, either emails on their own or Links, when what I want is for it to be something like this, and each new record to go into the next Blank Row.
[TABLE="class: grid, width: 500"]
<tbody>[TR]
[TD]Emails
[/TD]
[TD]Urls
[/TD]
[/TR]
[TR]
[TD]Jondoe@gmail.com
[/TD]
[TD]coolsite@mysite.com
[/TD]
[/TR]
[TR]
[TD]janedoe@yahoo.com
[/TD]
[TD]supersite@mysite.com
[/TD]
[/TR]
[TR]
[TD][/TD]
[TD]supersite@mysite.com
[/TD]
[/TR]
[TR]
[TD]Dave@gmail.com
[/TD]
[TD]Coolsite@mysite.com
[/TD]
[/TR]
[TR]
[TD]Joeblogs@hotmail.com
[/TD]
[TD]nicesite@mysite.com
[/TD]
[/TR]
</tbody>[/TABLE]
Thanks for having a look
It can also extract URLs from a site, the PROBLEM is that it extracts all LINKS and not just the domain name
Also I can only extract one or the other, either emails on their own or Links, when what I want is for it to be something like this, and each new record to go into the next Blank Row.
[TABLE="class: grid, width: 500"]
<tbody>[TR]
[TD]Emails
[/TD]
[TD]Urls
[/TD]
[/TR]
[TR]
[TD]Jondoe@gmail.com
[/TD]
[TD]coolsite@mysite.com
[/TD]
[/TR]
[TR]
[TD]janedoe@yahoo.com
[/TD]
[TD]supersite@mysite.com
[/TD]
[/TR]
[TR]
[TD][/TD]
[TD]supersite@mysite.com
[/TD]
[/TR]
[TR]
[TD]Dave@gmail.com
[/TD]
[TD]Coolsite@mysite.com
[/TD]
[/TR]
[TR]
[TD]Joeblogs@hotmail.com
[/TD]
[TD]nicesite@mysite.com
[/TD]
[/TR]
</tbody>[/TABLE]
Code:
Private Sub CommandButton1_Click()
Dim Collection As MSHTML.IHTMLElementCollection
Dim element As Object
Dim i As Integer
i = 1
[COLOR=#008000]'k = 2
[/COLOR]
'Currently it only extracts from 1 url but I can set it to extract from a list
URL = "https://www.dvscommercials.co.uk/"
Set XMLHTTP = CreateObject("MSXML2.XMLHTTP")
XMLHTTP.Open "GET", URL, False
XMLHTTP.setRequestHeader "Content-type", "text/xml"
XMLHTTP.send
Set HTML = CreateObject("htmlfile")
HTML.body.innerHTML = XMLHTTP.responseText
Set Collection = HTML.getElementsByTagName("a")
For Each element In Collection
On Error Resume Next
Email = element.href
If InStr(Email, "@") Then
[COLOR=#008000] 'If InStr(Email, "@") = 0 Then [B]'For Extracting All URLs[/B][/COLOR]
ThisWorkbook.Sheets(1).Cells(i, 1).Value = element.href
[COLOR=#008000] 'ThisWorkbook.Sheets(1).Cells(k, 2).Value = element.href[/COLOR]
Email = Replace(Email, "mailto:", "")
ThisWorkbook.Sheets(1).Cells(i, 1).Value = Email
i = i + 1
End If
[COLOR=#008000]'End If[/COLOR]
Next element
End Sub
Thanks for having a look