imran ashraf
New Member
- Joined
- Nov 15, 2016
- Messages
- 39
PLEASE HELP I NEED TO SCRAP DATA FROM Arrest.org IN BELOW FORMAT .
[TABLE="width: 1203"]
<tbody>[TR]
[TD="width: 72"]Unique ID[/TD]
[TD="width: 80"]Arrest State[/TD]
[TD="width: 93"]Arrest County[/TD]
[TD="width: 67"]Name[/TD]
[TD="width: 44"]Age[/TD]
[TD="width: 64"]Gender[/TD]
[TD="width: 103"]Home Address[/TD]
[TD="width: 76"]Home City[/TD]
[TD="width: 81"]Home State[/TD]
[TD="width: 81"]Postal Code[/TD]
[TD="width: 80"]Occupation[/TD]
[TD="width: 83"]Arrested on[/TD]
[TD="width: 64"]Charges[/TD]
[TD="width: 74"]Photo File[/TD]
[TD="width: 77"]Photo Link[/TD]
[TD="width: 64"]Link[/TD]
[/TR]
</tbody>[/TABLE]
I HAVE ALL READY A SCRIPT THAT BUT IMPORT IN DIFFERENT TABS BUT I NEED IN ABOVE FORMAT. MANY THANKS
Sub scrap2()
Dim sKill As String
sKill = "taskkill /F /IM iexplore.exe"
Shell sKill, vbHide
FnWait 2
Application.StatusBar = ""
FnWait 1
Dim IE As InternetExplorer
Set IE = New InternetExplorer
IE.Visible = Sheets(1).Range("B8")
For r = 1 To Sheets(1).Range("b6")
Application.StatusBar = r
Dim objPage2 As HTMLDocument
URL = Sheets(2).Range("A" & r)
On Error Resume Next
IE.Navigate URL
WaitIE IE
FnWait (1)
Set objPage2 = IE.document
Set info1 = objPage2.getElementsByClassName("section-content")
Sheets(3).Range("A" & r) = Trim(Clean_mul(info1(0).innerText))
Set info1 = Nothing
Set info2 = objPage2.getElementsByClassName("section-content personal-information")
Sheets(3).Range("B" & r) = Trim(Clean_mul(info2(0).innerText))
Set info2 = Nothing
Set info3 = objPage2.getElementsByClassName("section-content charges")
Sheets(3).Range("C" & r) = Trim(Clean_mul(info3(0).innerText))
Set info3 = Nothing
Set imgs = objPage2.getElementsByTagName("img")
Sheets(3).Range("D" & r) = Trim(Clean_mul(imgs(0).src))
Set imgs = Nothing
'section-content charges
'http://georgia.arrests.org/Arrests/Reginald_Davis_29028530/
'/mugs/Houston/2016/2016_6088.jpg
'http://georgia.arrests.org/mugs/Houston/2016/2016_6088.jpg
' urll = "http://" & Sheets(1).Range("B4") & ".arrests.org/" & imgurl
objPage2 = Nothing
Next r
End Sub
Sub macro_scrap()
Dim sKill As String
sKill = "taskkill /F /IM iexplore.exe"
Shell sKill, vbHide
FnWait 2
Application.StatusBar = ""
FnWait 1
Set objPage = New HTMLDocument
ct = 1
ct2 = 1
p = 1
view_lis = 56
Dim IE As InternetExplorer
Set IE = New InternetExplorer
IE.Visible = Sheets(1).Range("B7")
For p = 1 To 18
ct2 = 1
Dim objPage2 As HTMLDocument
URL = "http://" & Sheets(1).Range("B4") & ".arrests.org/?page=" & p & "&results=" & view_lis
On Error Resume Next
IE.Navigate URL
WaitIE IE
FnWait (5)
Set objPage2 = IE.document
Set dados = objPage2.getElementsByTagName("a")
For Each oElement In dados
If InStr(oElement.href, "/?d=1") Then
Sheets(2).Range("a" & ct) = Replace(oElement.href, "about:", URL)
Sheets(2).Range("b" & ct) = p
ct = ct + 1
ct2 = ct2 + 1
If ct2 >= 112 Then
ct2 = 1
Exit For
End If
End If
Next oElement
objPage2 = Nothing
Application.StatusBar = p
Next p
'URL & arsit '/Arrests/Landon_Garrett_29025095/?d=1
End
'Mailing_address
Sheets(2).Range("D" & r) = objPage.getElementById("MainContent_lblAddrLine1").innerText & " " & objPage.getElementById("MainContent_lblAddrLine3").innerText
'Owner_Name
Sheets(2).Range("E" & r) = objPage.getElementById("MainContent_gvOwners").getElementsByTagName("tr")(1).innerText
'Property_Address
Sheets(2).Range("F" & r) = objPage.getElementById("MainContent_lblLocation").innerText
'City
Sheets(2).Range("G" & r) = Left(Sheets(2).Range("B" & r).Text, 2) & "-" & objPage.getElementById("MainContent_lblSubdiv").innerText
'Zip_Code
'Property_Type
Sheets(2).Range("I" & r) = objPage.getElementById("MainContent_lblUsecode").innerText
'Acres
Sheets(2).Range("j" & r) = objPage.getElementById("MainContent_lblAcres").innerText
'Footage
Sheets(2).Range("k" & r) = objPage.getElementById("MainContent_lblSqFt").innerText
'Bedrooms
Sheets(2).Range("L" & r) = objPage.getElementById("MainContent_lblUnits").innerText
'Bathrooms
'Prev_Sale Year
Sheets(2).Range("N" & r) = objPage.getElementById("MainContent_lblSaleDate").innerText
'Prev_Sale
Sheets(2).Range("o" & r) = objPage.getElementById("MainContent_gvSalesInfo").getElementsByTagName("td")(1).innerText
'Tax_Appraisal
Set arr_tr_el = objPage.getElementsByTagName("tr")
ct = 0
For Each tr_el In arr_tr_el
If InStr(tr_el.innerText, "Taxable Value") Then
ct = ct + 1
If ct = 3 Then
Sheets(2).Range("P" & r) = Trim(tr_el.getElementsByTagName("td")(1).innerText)
End If
End If
Next tr_el
Set arr_tr_el = objPage.getElementsByTagName("tr")
ct = 0
For Each tr_el In arr_tr_el
If InStr(tr_el.innerText, "Total tax") Then
ct = ct + 1
If ct = 3 Then
Sheets(2).Range("R" & r) = Trim(tr_el.getElementsByTagName("td")(1).innerText)
End If
End If
Next tr_el
'Tax_link
Sheets(2).Range("s" & r) = "http://pbctax.manatron.com/tabs/propertyTax/accountdetail.aspx?p=" & Sheets(2).Range("B" & r) & "&"
'Dim IE As New InternetExplorer
'IE.Visible = True
'IE.Navigate2 "http://pbctax.manatron.com/tabs/propertyTax/accountdetail.aspx?p=" & Sheets(2).Range("B" & r)
'FnWait (3)
'WaitIE IE
'IE.document.getElementById("fldInput").Value = Sheets(2).Range("E" & r)
'IE.document.getElementById("btnsearch").Click
'WaitIE IE
'FnWait (5)
'Set arr_td_el = IE.document.getElementsByTagName("td")
'For Each td_el In arr_td_el
'MsgBox td_el.innerText
'If InStr(td_el.getAttribute("title"), "View Account Detail") Then
'MsgBox td_el.getAttribute("title")
'Sheets(2).Range("s" & r) = td_el.getAttribute("href")
'Exit For
'End If
'Next td_el
'getElementsByTagName("tr")(1) '.getElementsByTagName("td")(5).innerText .getElementById("grm-search")
'End
'IE.Quit
'Notes
FnWait Sheets(1).Range("B1")
End Sub
Function FnWait(intTime)
Application.Wait DateAdd("s", intTime, Now)
End Function
Public Function WaitIE(ByRef objIEBrowser As InternetExplorer)
Do While objIEBrowser.Busy Or objIEBrowser.ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
End Function
Function Clean_mul(ByVal strIn As String) As String
strIn = Trim(strIn)
' // Replace all double space pairings with single spaces
Do While InStr(strIn, vbNewLine & vbNewLine)
strIn = Replace(strIn, vbNewLine & vbNewLine, vbNewLine)
Loop
Clean_mul = strIn
End Function
[TABLE="width: 1203"]
<tbody>[TR]
[TD="width: 72"]Unique ID[/TD]
[TD="width: 80"]Arrest State[/TD]
[TD="width: 93"]Arrest County[/TD]
[TD="width: 67"]Name[/TD]
[TD="width: 44"]Age[/TD]
[TD="width: 64"]Gender[/TD]
[TD="width: 103"]Home Address[/TD]
[TD="width: 76"]Home City[/TD]
[TD="width: 81"]Home State[/TD]
[TD="width: 81"]Postal Code[/TD]
[TD="width: 80"]Occupation[/TD]
[TD="width: 83"]Arrested on[/TD]
[TD="width: 64"]Charges[/TD]
[TD="width: 74"]Photo File[/TD]
[TD="width: 77"]Photo Link[/TD]
[TD="width: 64"]Link[/TD]
[/TR]
</tbody>[/TABLE]
I HAVE ALL READY A SCRIPT THAT BUT IMPORT IN DIFFERENT TABS BUT I NEED IN ABOVE FORMAT. MANY THANKS
Sub scrap2()
Dim sKill As String
sKill = "taskkill /F /IM iexplore.exe"
Shell sKill, vbHide
FnWait 2
Application.StatusBar = ""
FnWait 1
Dim IE As InternetExplorer
Set IE = New InternetExplorer
IE.Visible = Sheets(1).Range("B8")
For r = 1 To Sheets(1).Range("b6")
Application.StatusBar = r
Dim objPage2 As HTMLDocument
URL = Sheets(2).Range("A" & r)
On Error Resume Next
IE.Navigate URL
WaitIE IE
FnWait (1)
Set objPage2 = IE.document
Set info1 = objPage2.getElementsByClassName("section-content")
Sheets(3).Range("A" & r) = Trim(Clean_mul(info1(0).innerText))
Set info1 = Nothing
Set info2 = objPage2.getElementsByClassName("section-content personal-information")
Sheets(3).Range("B" & r) = Trim(Clean_mul(info2(0).innerText))
Set info2 = Nothing
Set info3 = objPage2.getElementsByClassName("section-content charges")
Sheets(3).Range("C" & r) = Trim(Clean_mul(info3(0).innerText))
Set info3 = Nothing
Set imgs = objPage2.getElementsByTagName("img")
Sheets(3).Range("D" & r) = Trim(Clean_mul(imgs(0).src))
Set imgs = Nothing
'section-content charges
'http://georgia.arrests.org/Arrests/Reginald_Davis_29028530/
'/mugs/Houston/2016/2016_6088.jpg
'http://georgia.arrests.org/mugs/Houston/2016/2016_6088.jpg
' urll = "http://" & Sheets(1).Range("B4") & ".arrests.org/" & imgurl
objPage2 = Nothing
Next r
End Sub
Sub macro_scrap()
Dim sKill As String
sKill = "taskkill /F /IM iexplore.exe"
Shell sKill, vbHide
FnWait 2
Application.StatusBar = ""
FnWait 1
Set objPage = New HTMLDocument
ct = 1
ct2 = 1
p = 1
view_lis = 56
Dim IE As InternetExplorer
Set IE = New InternetExplorer
IE.Visible = Sheets(1).Range("B7")
For p = 1 To 18
ct2 = 1
Dim objPage2 As HTMLDocument
URL = "http://" & Sheets(1).Range("B4") & ".arrests.org/?page=" & p & "&results=" & view_lis
On Error Resume Next
IE.Navigate URL
WaitIE IE
FnWait (5)
Set objPage2 = IE.document
Set dados = objPage2.getElementsByTagName("a")
For Each oElement In dados
If InStr(oElement.href, "/?d=1") Then
Sheets(2).Range("a" & ct) = Replace(oElement.href, "about:", URL)
Sheets(2).Range("b" & ct) = p
ct = ct + 1
ct2 = ct2 + 1
If ct2 >= 112 Then
ct2 = 1
Exit For
End If
End If
Next oElement
objPage2 = Nothing
Application.StatusBar = p
Next p
'URL & arsit '/Arrests/Landon_Garrett_29025095/?d=1
End
'Mailing_address
Sheets(2).Range("D" & r) = objPage.getElementById("MainContent_lblAddrLine1").innerText & " " & objPage.getElementById("MainContent_lblAddrLine3").innerText
'Owner_Name
Sheets(2).Range("E" & r) = objPage.getElementById("MainContent_gvOwners").getElementsByTagName("tr")(1).innerText
'Property_Address
Sheets(2).Range("F" & r) = objPage.getElementById("MainContent_lblLocation").innerText
'City
Sheets(2).Range("G" & r) = Left(Sheets(2).Range("B" & r).Text, 2) & "-" & objPage.getElementById("MainContent_lblSubdiv").innerText
'Zip_Code
'Property_Type
Sheets(2).Range("I" & r) = objPage.getElementById("MainContent_lblUsecode").innerText
'Acres
Sheets(2).Range("j" & r) = objPage.getElementById("MainContent_lblAcres").innerText
'Footage
Sheets(2).Range("k" & r) = objPage.getElementById("MainContent_lblSqFt").innerText
'Bedrooms
Sheets(2).Range("L" & r) = objPage.getElementById("MainContent_lblUnits").innerText
'Bathrooms
'Prev_Sale Year
Sheets(2).Range("N" & r) = objPage.getElementById("MainContent_lblSaleDate").innerText
'Prev_Sale
Sheets(2).Range("o" & r) = objPage.getElementById("MainContent_gvSalesInfo").getElementsByTagName("td")(1).innerText
'Tax_Appraisal
Set arr_tr_el = objPage.getElementsByTagName("tr")
ct = 0
For Each tr_el In arr_tr_el
If InStr(tr_el.innerText, "Taxable Value") Then
ct = ct + 1
If ct = 3 Then
Sheets(2).Range("P" & r) = Trim(tr_el.getElementsByTagName("td")(1).innerText)
End If
End If
Next tr_el
Set arr_tr_el = objPage.getElementsByTagName("tr")
ct = 0
For Each tr_el In arr_tr_el
If InStr(tr_el.innerText, "Total tax") Then
ct = ct + 1
If ct = 3 Then
Sheets(2).Range("R" & r) = Trim(tr_el.getElementsByTagName("td")(1).innerText)
End If
End If
Next tr_el
'Tax_link
Sheets(2).Range("s" & r) = "http://pbctax.manatron.com/tabs/propertyTax/accountdetail.aspx?p=" & Sheets(2).Range("B" & r) & "&"
'Dim IE As New InternetExplorer
'IE.Visible = True
'IE.Navigate2 "http://pbctax.manatron.com/tabs/propertyTax/accountdetail.aspx?p=" & Sheets(2).Range("B" & r)
'FnWait (3)
'WaitIE IE
'IE.document.getElementById("fldInput").Value = Sheets(2).Range("E" & r)
'IE.document.getElementById("btnsearch").Click
'WaitIE IE
'FnWait (5)
'Set arr_td_el = IE.document.getElementsByTagName("td")
'For Each td_el In arr_td_el
'MsgBox td_el.innerText
'If InStr(td_el.getAttribute("title"), "View Account Detail") Then
'MsgBox td_el.getAttribute("title")
'Sheets(2).Range("s" & r) = td_el.getAttribute("href")
'Exit For
'End If
'Next td_el
'getElementsByTagName("tr")(1) '.getElementsByTagName("td")(5).innerText .getElementById("grm-search")
'End
'IE.Quit
'Notes
FnWait Sheets(1).Range("B1")
End Sub
Function FnWait(intTime)
Application.Wait DateAdd("s", intTime, Now)
End Function
Public Function WaitIE(ByRef objIEBrowser As InternetExplorer)
Do While objIEBrowser.Busy Or objIEBrowser.ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
End Function
Function Clean_mul(ByVal strIn As String) As String
strIn = Trim(strIn)
' // Replace all double space pairings with single spaces
Do While InStr(strIn, vbNewLine & vbNewLine)
strIn = Replace(strIn, vbNewLine & vbNewLine, vbNewLine)
Loop
Clean_mul = strIn
End Function