Tek-Tips is the largest IT community on the Internet today!

Members share and learn making Tek-Tips Forums the best source of peer-reviewed technical information on the Internet!

  • Congratulations gkittelson on being selected by the Tek-Tips community for having the most helpful posts in the forums last week. Way to Go!

QueryTables - is it possible to extract source HTML?

Status
Not open for further replies.

osx99

Technical User
Apr 9, 2003
250
GB
I am attempting to extract the HTML links of a webpage using the code below. However, not all of the links are extracted in link format but text instead

I'm unsure why as using
.WebFormatting = xlWebFormattingAll??

Is there a way to extract the source HTML code instead as viewed in Internet Explorer - View - Source?

Many thanks,
Os

Code:
Sub extract()

Range("A1").Select
firstid = 458806
numraces = 1
For x = 1 To numraces
startid = firstid + x
mystring = "URL;[URL unfurl="true"]http://www.racingpost.co.uk/horses/card.sd?race_id="[/URL]
myracecard = mystring & startid
Set QT = ActiveSheet.QueryTables.Add(Connection:=myracecard, Destination:=ActiveCell)
With QT
.FieldNames = True
.RowNumbers = False
.BackgroundQuery = False
.RefreshStyle = xlInsertDeleteCells
.SaveData = True
.AdjustColumnWidth = True
.WebSelectionType = xlSpecifiedTables
.WebFormatting = xlWebFormattingAll
.WebTables = "4"
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = True
.WebDisableDateRecognition = True
.WebDisableRedirections = True
.Refresh BackgroundQuery:=False
End With
QT.Refresh BackgroundQuery:=False

'call subroutines here to do your system stuff
Dim Col As Long, ColCnt As Long, Rng As Range
     
    Application.ScreenUpdating = False
    Application.Calculation = xlCalculationManual
     
On Error GoTo Exits:
     
    If Selection.Columns.Count > 1 Then
        Set Rng = Selection
    Else
        Set Rng = Range(Columns(1), Columns(ActiveSheet.Cells.SpecialCells(xlCellTypeLastCell).Column()))
    End If
    ColCnt = 0
    For Col = Rng.Columns.Count To 1 Step -1
        If Application.WorksheetFunction.CountA(Rng.Columns(Col).EntireColumn) = 0 Then
            Rng.Columns(Col).EntireColumn.Delete
            ColCnt = ColCnt + 1
        End If
    Next Col
     
Exits:
    Application.ScreenUpdating = True
    Application.Calculation = xlCalculationAutomatic
Next x


End Sub
 
osx99,
Easiest way I can think of is to use Automation.
Code:
Sub Main()
  Dim objIE As Object
  Dim objLinks As Object, objLink As Object
  Dim lngRow As Long
  
  Set objIE = CreateObject("InternetExplorer.Application")
  objIE.Visible = True
  objIE.Navigate "[URL unfurl="true"]www.tek-tips.com"[/URL]
  
  While objIE.Busy: Wend
  While objIE.Document.ReadyState <> "complete": Wend
  
  lngRow = 1
  ActiveSheet.Cells(lngRow, 1) = "href"
  ActiveSheet.Cells(lngRow, 2) = "innerText"
  ActiveSheet.Cells(lngRow, 3) = "innerHTML"
  Set objLinks = objIE.Document.All.Tags("A")
  For Each objLink In objLinks
    lngRow = lngRow + 1
    ActiveSheet.Cells(lngRow, 1) = objLink.href
    ActiveSheet.Cells(lngRow, 2) = objLink.InnerText
    ActiveSheet.Cells(lngRow, 3) = objLink.InnerHTML
  Next objLink
  
  
  Set objLink = Nothing
  Set objLinks = Nothing
  objIE.Quit
  Set objIE = Nothing
End Sub

Hope this helps,
CMP

[small]For the best results do what I'm thinking, not what I'm saying.[/small]
(GMT-07:00) Mountain Time (US & Canada)
 
Status
Not open for further replies.

Part and Inventory Search

Sponsor

Back
Top