Sharid
Well-known Member
- Joined
- Apr 22, 2007
- Messages
- 1,066
- Office Version
- 2016
- Platform
- Windows
I am trying to link the below function with the rest of my code so it uses Regxp to extract emails, however I am struggling. Currently it extracts everything into column B as shown in the image. It should however be extracting emails only.
I am ONLY stuck on the email part below the rest of the code is fine. I am Not sure how to link "colEmails" to the Email part of the code. I have made sevel attempts nothing has worked I have left some of my attempts in the code they are commented out
The Following two line do extract emails, however they are not very affective. Either one works. I ahve commeted them out for now
Use Link to download demo Download File
Function
I am ONLY stuck on the email part below the rest of the code is fine. I am Not sure how to link "colEmails" to the Email part of the code. I have made sevel attempts nothing has worked I have left some of my attempts in the code they are commented out
The Following two line do extract emails, however they are not very affective. Either one works. I ahve commeted them out for now
VBA Code:
'If InStr(1, nodeOneLink.href, "@") Then
'If InStr(1, nodeOneLink.href, "mailto:") Then
Use Link to download demo Download File
VBA Code:
'Option Explicit
Private Sub CommandButton1_Click()
'Columns for both tables
Const colUrl As Long = 1 'Must always be the first column
Const colmail As Long = 2 'Must always be the first column before Some platforms
Const colFacebook As Long = 3 'Must always be the last column of Some platforms
Const colError As Long = 4 'Must always be the last column
Dim url As String
Dim http As Object
Dim htmlDoc As Object
Dim nodeAllLinks As Object
Dim nodeOneLink As Object
Dim pageLoadSuccessful As Boolean
Dim tableUrlsOneAddressLeft As String
Dim tableAllAddresses As String
Dim currentRowTableUrls As Long
Dim lastRowTableUrls As Long
Dim currentRowsTableAll(colUrl To colFacebook) As Long
Dim lastRowTableAll As Long
Dim addressCounters(colmail To colFacebook) As Long
Dim checkCounters As Long
'Initialize variables
tableUrlsOneAddressLeft = "Sheet2" 'Name of Sheet
currentRowTableUrls = 2 'First row for content
tableAllAddresses = "Sheet1" 'Name of Sheet
For checkCounters = colUrl To colFacebook
currentRowsTableAll(checkCounters) = 2 'First rows for content
Next checkCounters
Set htmlDoc = CreateObject("htmlfile")
Set http = CreateObject("MSXML2.ServerXMLHTTP.6.0")
'Clear all contents and comments in the URL source sheet from email column to error column
With Sheets(tableUrlsOneAddressLeft)
lastRowTableUrls = .Cells(Rows.Count, colUrl).End(xlUp).Row
.Range(.Cells(currentRowTableUrls, colmail), .Cells(lastRowTableUrls, colError)).ClearContents
.Range(.Cells(currentRowTableUrls, colmail), .Cells(lastRowTableUrls, colError)).ClearComments
End With
'Delete all rows except headline in the sheet with all addresses
lastRowTableAll = Sheets(tableAllAddresses).Cells(Rows.Count, colUrl).End(xlUp).Row
Sheets(tableAllAddresses).Rows(currentRowsTableAll(colUrl) & ":" & lastRowTableAll).Delete Shift:=xlUp
'Loop over all URLs in column A in the URL source sheet
Do While Sheets(tableUrlsOneAddressLeft).Cells(currentRowTableUrls, 1).Value <> ""
'Scroll for visual monitoring, if 'the sheet with the URLs are the
'active one
If ActiveSheet.Name = tableUrlsOneAddressLeft Then
If currentRowTableUrls > 14 Then
ActiveWindow.SmallScroll down:=1
End If
Sheets(tableUrlsOneAddressLeft).Cells(currentRowTableUrls, 1).Select
End If
'Get next url from the URL source sheet
url = Sheets(tableUrlsOneAddressLeft).Cells(currentRowTableUrls, colUrl).Value
'Try to load page 'Temporarily disable error handling if 'there is a timeout or onother error
On Error Resume Next
http.Open "GET", url, False
http.Send
'Check if page loading was successful
If Err.Number = 0 Then
pageLoadSuccessful = True
End If
On Error GoTo 0
If pageLoadSuccessful Then
'Build html document for DOM operations
htmlDoc.body.innerHTML = http.responseText
'Create node list from all links of the page
Set nodeAllLinks = htmlDoc.getElementsByTagName("a")
'Walk through all links of the node list
For Each nodeOneLink In nodeAllLinks
'''#####################################################################################################
'''################################### THIS IS THE START OF THE EMAIL SECTION ##########################
'''#####################################################################################################
DoEvents
'Dim colEmails As Collection
Set colEmails = GetEmailAddressesFromHtml(htmlDoc)
'If InStr(1, colmails) Then
'If colEmails.Length > 0 Then
'If colEmails Length > 0 Then
'If InStr(1, nodeOneLink.href, "@") Then
'If InStr(1, emailMatches) Then
'Check for mail address
'If InStr(1, nodeOneLink.href, "@") Then
'If InStr(1, nodeOneLink.href, "mailto:") Then
'Write mail address to both tables
Sheets(tableUrlsOneAddressLeft).Cells(currentRowTableUrls, colmail).Value = Right(nodeOneLink.href, Len(nodeOneLink.href) - InStr(nodeOneLink.href, ":"))
Sheets(tableAllAddresses).Cells(currentRowsTableAll(colmail), colmail).Value = Right(nodeOneLink.href, Len(nodeOneLink.href) - InStr(nodeOneLink.href, ":"))
'Check if it is a new line in the sheet with all addresses
If currentRowsTableAll(colmail) >= currentRowsTableAll(colUrl) Then
'Write URL in the new line of the sheet with all addresses
Sheets(tableAllAddresses).Cells(currentRowsTableAll(colUrl), colUrl).Value = url
'Increment url counter
currentRowsTableAll(colUrl) = currentRowsTableAll(colUrl) + 1
End If
'Increment mail counters
currentRowsTableAll(colmail) = currentRowsTableAll(colmail) + 1
addressCounters(colmail) = addressCounters(colmail) + 1
'End If
'''#####################################################################################################
'''################################### END OF THE EMAIL SECTION ########################################
'''#####################################################################################################
'Check for Facebook address
If InStr(1, UCase(nodeOneLink.href), "FACEBOOK") Then
'Write Facebook address to both tables
Sheets(tableUrlsOneAddressLeft).Cells(currentRowTableUrls, colFacebook).Value = nodeOneLink.href
Sheets(tableAllAddresses).Cells(currentRowsTableAll(colFacebook), colFacebook).Value = nodeOneLink.href
'Check if it is a new line in the sheet with all addresses
If currentRowsTableAll(colFacebook) >= currentRowsTableAll(colUrl) Then
'Write URL in the new line of the sheet with all addresses
Sheets(tableAllAddresses).Cells(currentRowsTableAll(colUrl), colUrl).Value = url
'Increment url counter
currentRowsTableAll(colUrl) = currentRowsTableAll(colUrl) + 1
End If
'Increment Facebook counters
currentRowsTableAll(colFacebook) = currentRowsTableAll(colFacebook) + 1
addressCounters(colFacebook) = addressCounters(colFacebook) + 1
End If
Next nodeOneLink
'Check address counters
For checkCounters = colmail To colFacebook
'Set comment if more than 1 link were found
If addressCounters(checkCounters) > 1 Then
Sheets(tableUrlsOneAddressLeft).Cells(currentRowTableUrls, checkCounters).AddComment Text:=CStr(addressCounters(checkCounters))
Sheets(tableUrlsOneAddressLeft).Cells(currentRowTableUrls, checkCounters).Comment.Shape.TextFrame.AutoSize = True
End If
Next checkCounters
Else
'Page not loaded
'Write message URL table
Sheets(tableUrlsOneAddressLeft).Cells(currentRowTableUrls, colError).Value = "Error with URL or timeout"
End If
'Prepare for next page
pageLoadSuccessful = False
Erase addressCounters
lastRowTableAll = Sheets(tableAllAddresses).Cells(Rows.Count, colUrl).End(xlUp).Row
For checkCounters = colUrl To colFacebook
currentRowsTableAll(checkCounters) = lastRowTableAll + 1 'First rows for next page content
Next checkCounters
currentRowTableUrls = currentRowTableUrls + 1
Loop
'Clean Up
Set http = Nothing
Set htmlDoc = Nothing
Set nodeAllLinks = Nothing
Set nodeOneLink = Nothing
End Sub
Function
VBA Code:
Private Function GetEmailAddressesFromHtml(ByVal htmlDocument As Object) As Collection
' Should return a collection of strings representing email addresses detected
' in the HTML document.
Dim outputCollection As Collection
Set outputCollection = New Collection
Dim regEx As Object
Set regEx = CreateObject("VBScript.RegExp")
With regEx
.Pattern = "([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)"
' .Pattern = "[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+"
.Global = True
Dim emailMatches As Object
Set emailMatches = .Execute(htmlDocument.body.innerHTML)
End With
Dim matchFound As Object
For Each matchFound In emailMatches
On Error Resume Next ' De-duplicate here.
outputCollection.Add matchFound.Value, Key:=matchFound.Value
On Error GoTo 0
Next matchFound
Set GetEmailAddressesFromHtml = outputCollection
End Function