Sub ParseCases()
Dim X As Long, LastRow As Long, NO As Long, NOS As Long, FIRSTDATE As Long, LASTDATE As Long, SUMMARY As Long, JUDGES As Long, DOCs As Long, NextDOCs As Long, Cell As Range
' Get last row of file
LastRow = Cells(Rows.Count, "A").End(xlUp).Row
' I noted some non-breaking spaces in one of the files (ASCII-160), so let's convert them to real spaces
Range("A1:A" & LastRow).Replace Chr$(160), " ", xlPart
' Get rid of leading/trailing spaces
For Each Cell In Range("A1:A" & LastRow)
If Left(Cell.Value, 1) = " " Then Cell.Value = Trim(Cell.Value)
Next
' Put an ending marker for the Do..Loop to find (the Nos. marker is needed in order for the docket numbers check code to be able to work correctly)
Cells(LastRow + 1, "A").Value = "Nos. Ending DOCUMENTS"
' Combine multiple "to be saved" lines into single lines
DOCs = 1
Do
' Get docket number (there are two ways that cell can start.... No. or Nos.)
NO = Cells(DOCs, "A").Resize(LastRow).Find("No. *", LookAt:=xlWhole, SearchDirection:=xlNext, MatchCase:=True).Row
On Error Resume Next
NOS = Cells(DOCs, "A").Resize(LastRow).Find("Nos. *", LookAt:=xlWhole, SearchDirection:=xlNext, MatchCase:=True).Row
On Error GoTo 0
' Use the minimum one
If NOS < NO Then NO = NOS
' Get JUDGES row number
JUDGES = Cells(DOCs, "A").Resize(LastRow).Find("JUDGES:", LookAt:=xlWhole, SearchDirection:=xlNext, MatchCase:=True).Row
' Get first date line
For X = NO + 3 To JUDGES
If IsDate(Left(Cells(X, "A").Value, InStr(Cells(X, "A").Value, ",")) & " 2000") Then
FIRSTDATE = X
Exit For
End If
Next
' Get SUMMARY row number
SUMMARY = Cells(DOCs, "A").Resize(LastRow).Find("CASE SUMMARY:", LookAt:=xlWhole, SearchDirection:=xlNext, MatchCase:=True).Row
' Get last date line
For LASTDATE = SUMMARY To NO Step -1
If IsDate(Left(Cells(LASTDATE, "A").Value, InStr(Cells(LASTDATE, "A").Value, ",")) & " 2000") Then Exit For
Next
' Combine multiple date lines
If LASTDATE > FIRSTDATE Then
Cells(FIRSTDATE, "A").Value = Join(WorksheetFunction.Transpose(Range(Cells(FIRSTDATE, "A"), Cells(LASTDATE, "A"))), "; ")
Range("A" & (FIRSTDATE + 1) & ":A" & LASTDATE).Clear
End If
' Combine multiple lines for the "case names"
If NO - DOCs > 2 Then
Cells(DOCs + 1, "A").Value = Join(WorksheetFunction.Transpose(Range(Cells(DOCs + 1, "A"), Cells(NO - 1, "A"))))
Range("A" & (DOCs + 2) & ":A" & (NO - 1)).Clear
End If
' Combine multiple lines for the "citations"
If FIRSTDATE - NO > 3 Then
Cells(NO + 2, "A").Value = Join(WorksheetFunction.Transpose(Range(Cells(NO + 2, "A"), Cells(FIRSTDATE - 1, "A"))))
Range("A" & (NO + 3) & ":A" & (FIRSTDATE - 1)).Clear
End If
' Get next DOCUMENT row number
NextDOCs = Cells(DOCs, "A").Resize(LastRow).Find(" DOCUMENTS", LookAt:=xlPart, SearchDirection:=xlNext, MatchCase:=True).Row
' Combine multiple lines for the JUDGES
If NextDOCs - JUDGES > 2 Then
Cells(JUDGES + 1, "A").Value = Join(WorksheetFunction.Transpose(Range(Cells(JUDGES + 1, "A"), Cells(NextDOCs - 1, "A"))))
Range("A" & (JUDGES + 2) & ":A" & (NextDOCs - 1)).Clear
End If
' Check to see if we are at the end of file
NextDOCs = Cells(DOCs, "A").Resize(LastRow).Find(" DOCUMENTS", LookAt:=xlPart, SearchDirection:=xlNext, MatchCase:=True).Row
If NextDOCs > LastRow Then Exit Do
' Update the DOCs variable and run next loop
DOCs = NextDOCs
Loop
Columns("A").SpecialCells(xlCellTypeBlanks).EntireRow.Delete
' The file has been normalized as to its structure, so let's run the code I originally posted to parse it
' ========================================================================================================
' Get updated LastRow (remember, we might have deleted some rows earlier)... the minus one excludes the "Ending DOCUMENTS" flag we placed after the last row
LastRow = Cells(Rows.Count, "A").End(xlUp).Row - 1
' Now that each DOCUMENTS page is structured the same, lets move the data into the correct columns
DOCs = 1
Do
' Fill Columns A thru E with data
Cells(DOCs, "A").Resize(, 5) = WorksheetFunction.Transpose(Cells(DOCs + 1, "A").Resize(5))
' Fill Column F with the judges
JUDGES = Cells(DOCs, "A").Resize(LastRow).Find("JUDGES:", LookAt:=xlWhole, SearchDirection:=xlNext, MatchCase:=True).Row
Cells(DOCs, "F").Value = Cells(JUDGES + 1, "A").Value
' Get the next DOCUMENTS cells
NextDOCs = Cells(DOCs, "A").Resize(LastRow).Find(" DOCUMENTS", LookAt:=xlPart, SearchDirection:=xlNext, MatchCase:=True).Row
' Exit the loop if it is greater than the last row (if it is, then it is the "Nos. Ending DOCUMENTS" text we put in the blank cell under the last data row to mark the end of the file.
If NextDOCs > LastRow Then Exit Do
' If we didn't exit the loop, then update the DOCs variable for the next loop
DOCs = NextDOCs
Loop
' The only thing that will be in Column B is text we placed there as part of the parsing process; all other cells in the column are blanks and we use SpecialCells to locate them in order to delete the rows we already parsed
Range(Cells(2, "B"), Cells(LastRow, "B")).SpecialCells(xlCellTypeBlanks).EntireRow.Delete
' Find out the current last row (now that all the deletions are done
LastRow = Cells(Rows.Count, "A").End(xlUp).Row
' And delete the end of file flag we put there
Cells(LastRow, "A").Clear
End Sub