Hello, I have a problem with my code. It currently works fine, by searching through a directory, finding every .html & .htm, searching through that file for any href's, and then printing them into a spreadsheet. The problem I have seemed to encounter though, is that some files have the .html extension, but have IE icons, so the code
seems unable to search through the source code of them because if you were to double click them they would simply come up in IE. Here is some of the code that I have...
Sub CheckTextFilesForHREFs()
Globalindx = 1
'MsgBox "Press OK to begin report"
Dim WholeLine As String
Dim myPath As String
Dim workfile As String
Dim myR As Long
Set fs = Application.FileSearch
With fs
.LookIn = "C:\Exelon"
.Filename = ".html"
.SearchSubFolders = True
If .Execute(SortBy:=msoSortByFileName, _
SortOrder:=msoSortOrderAscending) > 0 Then
MsgBox "There were " & .FoundFiles.Count & _
" file(s) found."
Application.ScreenUpdating = False
For i = 1 To .FoundFiles.Count
ParseURL .FoundFiles(i)
Next i
Application.ScreenUpdating = True
Else
MsgBox "There were no files found."
End If
End With
End Sub
Sub ParseURL(strFile As String)
Dim strTxt As String, lngTxt As Long, i As Long, oMatches
Dim ws As Worksheet, j As Long, k As Long, m As Long, oMatches2
Dim reg, oMatches3, reg2, txtTitle, txtLinkName, txtLine
Dim re, matches, match, d, uri, name
i = FreeFile
lngTxt = FileLen(strFile)
strTxt = Space(lngTxt)
Open strFile For Binary Access Read As #i
Get #i, , strTxt
Close #i
txtTitle = GetTitle(strTxt)
Dim indx
indx = 1
Dim tst
tst = GetHrefs(strTxt, strFile, txtTitle)
End Sub
Function GetURLAddress(ByVal TextToSearch) As String
Dim Start, Length
Start = InStr(1, TextToSearch, "href=", vbTextCompare) +
6
Length = InStr(1, TextToSearch, """ ", vbTextCompare) - Start
If Length < 0 Then
Length = InStr(1, TextToSearch, "'>", vbTextCompare) - Start
End If
If Length < 0 Then
GetURLAddress = ""
Else
GetURLAddress = LTrim(RTrim(Mid(TextToSearch, Start, Length)))
End If
End Function
seems unable to search through the source code of them because if you were to double click them they would simply come up in IE. Here is some of the code that I have...
Sub CheckTextFilesForHREFs()
Globalindx = 1
'MsgBox "Press OK to begin report"
Dim WholeLine As String
Dim myPath As String
Dim workfile As String
Dim myR As Long
Set fs = Application.FileSearch
With fs
.LookIn = "C:\Exelon"
.Filename = ".html"
.SearchSubFolders = True
If .Execute(SortBy:=msoSortByFileName, _
SortOrder:=msoSortOrderAscending) > 0 Then
MsgBox "There were " & .FoundFiles.Count & _
" file(s) found."
Application.ScreenUpdating = False
For i = 1 To .FoundFiles.Count
ParseURL .FoundFiles(i)
Next i
Application.ScreenUpdating = True
Else
MsgBox "There were no files found."
End If
End With
End Sub
Sub ParseURL(strFile As String)
Dim strTxt As String, lngTxt As Long, i As Long, oMatches
Dim ws As Worksheet, j As Long, k As Long, m As Long, oMatches2
Dim reg, oMatches3, reg2, txtTitle, txtLinkName, txtLine
Dim re, matches, match, d, uri, name
i = FreeFile
lngTxt = FileLen(strFile)
strTxt = Space(lngTxt)
Open strFile For Binary Access Read As #i
Get #i, , strTxt
Close #i
txtTitle = GetTitle(strTxt)
Dim indx
indx = 1
Dim tst
tst = GetHrefs(strTxt, strFile, txtTitle)
End Sub
Function GetURLAddress(ByVal TextToSearch) As String
Dim Start, Length
Start = InStr(1, TextToSearch, "href=", vbTextCompare) +
6
Length = InStr(1, TextToSearch, """ ", vbTextCompare) - Start
If Length < 0 Then
Length = InStr(1, TextToSearch, "'>", vbTextCompare) - Start
End If
If Length < 0 Then
GetURLAddress = ""
Else
GetURLAddress = LTrim(RTrim(Mid(TextToSearch, Start, Length)))
End If
End Function