Tek-Tips is the largest IT community on the Internet today!

Members share and learn making Tek-Tips Forums the best source of peer-reviewed technical information on the Internet!

  • Congratulations gkittelson on being selected by the Tek-Tips community for having the most helpful posts in the forums last week. Way to Go!

scraping table in web page 3

Status
Not open for further replies.

sal21

Programmer
Apr 26, 2004
434
IT
based:

https://demo.istat.it/app/?i=D7B&l=it&a=2024

to have the result:

1) click on button: Vista Territoriale
2) click on radio button: Tutti i comuni della provincia selezionata (Seleziona la provincia)
3) choice Como
4) click on Listbox mese
choice Luglio
$) click on button Cerca

Appear a table with data.

I need to looping all the value of the table result.

Possible?
Tks
 
Below is a good start but I could not finish. I tested to clicking the Cerca button, but the table disappeared?

Note: I ran this in an Access Module so I could F8 Step through code. If you load this into excel, I think you'll have to reference Internet Controls in VBA
  1. In the VBA editor, go to Tools > References.
  2. Scroll down and find Microsoft Internet Controls, check the box, and press OK.


Code:
Sub WebAutomation()
    Dim IE As Object
    Dim Table As Object, Rows As Object, Row As Object, Cells As Object, Cell As Object
    Dim i As Integer, j As Integer

    ' Initialize Internet Explorer
    Set IE = CreateObject("InternetExplorer.Application")
    IE.Visible = True
    IE.Navigate "https://demo.istat.it/app/?i=D7B&l=it&a=2024"

    ' Wait for the page to load
    Do While IE.Busy Or IE.ReadyState <> 4
        DoEvents
    Loop

    ' Wait for the "Vista Territoriale" button to be available
    If Not WaitForElement(IE, "tab-1", 10) Then
        MsgBox "Failed to find the 'Vista Territoriale' button.", vbExclamation
        IE.Quit
        Set IE = Nothing
        Exit Sub
    End If

    ' Click the "Vista Territoriale" button
    IE.Document.getElementById("tab-1").Click

    ' Wait for 2 seconds to allow changes
    Wait 2

    ' Select the radio button "Tutti i comuni della provincia selezionata"
    If Not WaitForElement(IE, "provincerb-1", 10) Then
        MsgBox "Failed to find the 'Tutti i comuni' radio button.", vbExclamation
        IE.Quit
        Set IE = Nothing
        Exit Sub
    End If
    IE.Document.getElementById("provincerb-1").Checked = True

    ' Remove the disabled attribute from the province dropdown
    IE.Document.getElementById("province-1").removeAttribute "disabled"

    ' Wait briefly for changes to take effect
    Wait 1

    ' Select "Como" from the province dropdown
    Set provinceDropdown = IE.Document.getElementById("province-1")
    For i = 0 To provinceDropdown.Options.Length - 1
        If provinceDropdown.Options(i).Text = "Como" Then
            provinceDropdown.SelectedIndex = i
            Exit For
        End If
    Next

    ' Wait for changes to apply
    Wait 2

    ' Select "Luglio" from the month dropdown
    If Not WaitForElement(IE, "mese", 10) Then
        MsgBox "Failed to find the 'month' dropdown list.", vbExclamation
        IE.Quit
        Set IE = Nothing
        Exit Sub
    End If
    Set monthDropdown = IE.Document.getElementById("mese")
    For i = 0 To monthDropdown.Options.Length - 1
        If monthDropdown.Options(i).Text = "Luglio" Then
            monthDropdown.SelectedIndex = i
            Exit For
        End If
    Next

    ' Try double-clicking the "Cerca" button with JavaScript to avoid JSON download issue
    If Not WaitForElement(IE, "btnricerca-0", 10) Then
        MsgBox "Failed to find the 'Cerca' button.", vbExclamation
        IE.Quit
        Set IE = Nothing
        Exit Sub
    End If
    ' Trigger the click event twice
    IE.Document.parentWindow.execScript "document.getElementById('btnricerca-0').click();", "JavaScript"
    Wait 1
    IE.Document.parentWindow.execScript "document.getElementById('btnricerca-0').click();", "JavaScript"

STOP

 '-----------------------------------------------------  
'TESTED UP TO THIS POINT............................................
 '-----------------------------------------------------  
    '-----------------------------------------------------
    ' NEED TO CHECK JS EXECUTION FOR CERCA BUTTON CLICK
    ' Programmatically clicking the button is not working properly
    '-----------------------------------------------------
    '-----------------------------------------------------
    ' EXPECTED A TABLE HERE, SAW IT ONCE but THEN IT BROKE
    '-----------------------------------------------------
    
    Wait 2

    ' Locate the table element and extract data
    Set Table = IE.Document.getElementById("result-table") ' Replace with actual table ID if different
    If Table Is Nothing Then
        MsgBox "Failed to find the result table.", vbExclamation
        IE.Quit
        Set IE = Nothing
        Exit Sub
    End If
    Set Rows = Table.getElementsByTagName("tr")

    ' Loop through rows and cells to extract data
    For i = 1 To Rows.Length - 1 ' Start at 1 to skip header row
        Set Row = Rows.Item(i)
        Set Cells = Row.getElementsByTagName("td")
        For j = 0 To Cells.Length - 1
            Set Cell = Cells.Item(j)
            Debug.Print "Row " & i & ", Column " & j & ": " & Cell.innerText
        Next j
    Next i

    ' Close Internet Explorer
    IE.Quit
    Set IE = Nothing
End Sub

' Helper function to wait for an element to load
Function WaitForElement(IE As Object, elementID As String, maxSeconds As Integer) As Boolean
    Dim secondsElapsed As Integer
    secondsElapsed = 0
    Do While IE.Document.getElementById(elementID) Is Nothing
        If secondsElapsed >= maxSeconds Then
            WaitForElement = False
            Exit Function
        End If
        secondsElapsed = secondsElapsed + 1
        Wait 1 ' Wait 1 second
    Loop
    WaitForElement = True
End Function

' Custom wait function
Sub Wait(seconds As Single)
    Dim endTime As Single
    endTime = Timer + seconds
    Do While Timer < endTime
        DoEvents ' Yield to other processes
    Loop
End Sub
 
IE does not render the site correctly - the table (which is dynamically created in e.g Edge) does not get created, so it cannot be scraped.

And whilstr it is possible to emulate clicking the button (by performing a form SUBMIT) in MSXML2 this does not update the page either - it returns the JSON representing the data.

Given we'v e previously shown you how to decode JSON, would a JSON string containing all the data be sufficient for your needs sal21?
 
Hi softwarert, tks for code, but have erroe in:

Set Table = IE.Document.getElementById("result-table")
 
For strongm, only you.... please help
Yes sufficient Json resulty
 
Rich (BB code):
Option Explicit

Public Sub doit()
    Dim strResult As String
    strResult = GetJsonData()
    Debug.Print strResult
End Sub

' Requires reference to Microsoft HTML Library
Public Function GetJsonData() As String
    Dim objHttp As Object
    Dim myHTMLDoc As New HTMLDocument
   
    Set objHttp = CreateObject("MSXML2.XMLHTTP")
   
    objHttp.Open "POST", "https://demo.istat.it/app/RPCCerca.php", False ' this is what clicking Cerva runs
    objHttp.setRequestHeader "Content-Type", "application/x-www-form-urlencoded" ' tell server we are submitting form data
    objHttp.send "territorio=procom&province=084&mese=7&hid-i=D7B&hid-a=2024&hid-l=it&hid-cat=D7B&hid-dati=dati-form-1&hid-tavola=tavola-form-1" 'parameters submitted from the form. See tables below for values
    GetJsonData = objHttp.responseText

    ' territoria
        'rip,
        'reg
        'pro
        'procom
    ' province
        '084 Agrigento
        '006 Alessandria
        '042 Ancona
        '051 Arezzo
        '044 Ascoli Piceno
        '005 Asti
        '064 Avellino
        '072 Bari
        '110 Barletta-Andria-Trani
        '025 Belluno
        '062 Benevento
        '016 Bergamo
        '096 Biella
        '037 Bologna
        '021 Bolzano/Bozen
        '017 Brescia
        '074 Brindisi
        '092 Cagliari
        '085 Caltanissetta
        '070 Campobasso
        '061 Caserta
        '087 Catania
        '079 Catanzaro
        '069 Chieti
        '013 Como
        '078 Cosenza
        '019 Cremona
        '101 Crotone
        '004 Cuneo
        '086 Enna
        '109 Fermo
        '038 Ferrara
        '048 Firenze
        '071 Foggia
        '040 Forlì-Cesena
        '060 Frosinone
        '010 Genova
        '031 Gorizia
        '053 Grosseto
        '008 Imperia
        '094 Isernia
        '066 L Aquila
        '011 La Spezia
        '059 Latina
        '075 Lecce
        '097 Lecco
        '049 Livorno
        '098 Lodi
        '046 Lucca
        '043 Macerata
        '020 Mantova
        '045 Massa-Carrara
        '077 Matera
        '083 Messina
        '015 Milano
        '036 Modena
        '108 Monza e della Brianza
        '063 Napoli
        '003 Novara
        '091 Nuoro
        '095 Oristano
        '028 Padova
        '082 Palermo
        '034 Parma
        '018 Pavia
        '054 Perugia
        '041 Pesaro e Urbino
        '068 Pescara
        '033 Piacenza
        '050 Pisa
        '047 Pistoia
        '093 Pordenone
        '076 Potenza
        '100 Prato
        '088 Ragusa
        '039 Ravenna
        '080 Reggio di Calabria
        '035 Reggio nell Emilia
        '057 Rieti
        '099 Rimini
        '058 Roma
        '029 Rovigo
        '065 Salerno
        '090 Sassari
        '009 Savona
        '052 Siena
        '089 Siracusa
        '014 Sondrio
        '111 Sud Sardegna
        '073 Taranto
        '067 Teramo
        '055 Terni
        '001 Torino
        '081 Trapani
        '022 Trento
        '026 Treviso
        '032 Trieste
        '030 Udine
        '007 Valle d Aosta/Vallée d Aoste
        '012 Varese
        '027 Venezia
        '103 Verbano-Cusio-Ossola
        '002 Vercelli
        '023 Verona
        '102 Vibo Valentia
        '024 Vicenza
        '056 Viterbo
    'mese
        '1 Gennaio
        '2 Febbraio
        '3 Marzo
        '4 Aprile
        '5 Maggio
        '6 Giugno
        '7 Luglio
End Function
 
Strongm, tks as usual for code!
Workd perfect!

But,

I have see with attention the web page, after i have clicked on Cerca the page have a button Esporta Csv (see image), this button downlod the csv fron current selection, possible, to click on it and save the xcurrent csv in c:\test\084.csv? (084 is the ocde of Provincia)
 

Attachments

  • export_csv.jpg
    export_csv.jpg
    25.2 KB · Views: 3
So now you are saying the json - which contains all the data - is NOT what you wanted?
 
Strongm, yes! Great code for JSON.
But the export directlly in CSV, Is a good idea, or not?
 
>good idea, or not?
Since I don't know what you then want to do with the data I don't know whether it is a good idea or not

My point is that, since you have the data in the json string, you can save it as a CSV (or whatever format you like) yourself
 
Styrongm,
The json file. dont return correctlly... I don't see in what's in the original table!
 
Right, so whilst we could emulate clicking the button on the page just as we (sort of) have for the Cerca button, the data to export isn't on the page, so the Export button has nothing to export.

And what we have done is get just teh data that is used to populate the table back as a json string. Again, the Export button doesn't know about that

So you need to write your own export routing - IF the json data is correct. But you are saying it isn't (although it works fine here). Lewt's see if we can get to the bottom of that here ... I suggest you copy and paste the contents of strResult (i.e the string that my code above prints to the IDE immediate window) here, so we can have a look.

Alternatively, download and learn Selenium (which I think has been mentioned to you before), which allows you to automate Edge just like Internet Explorer - and Edge DOES handle the page properly
 
Hi strongm,
I have installed a selenium and chromedriver.

This code work perfect in VBA for Excel, but on VB 6.0, have the error in image:

i have set also Selenuium type library in preference of VB 6.0

Code:
Sub TEST_CHROME()

Dim BOT As New WebDriver
Dim rowc, cc, columnC As Integer
BOT.start "Chrome" '<<<<< error here
BOT.Get "https://demo.istat.it/app/?i=D7B&l=it&a=2024"

BOT.FindElementById("tab-1").SendKeys ("Vista territoriale")
BOT.FindElementById("provincerb-1").SendKeys ("Tutti i comuni della provincia selezionata")
BOT.FindElementById("btnricerca-1").SendKeys("Cerca").Click

End Sub

i'm on windows 10
 

Attachments

  • ERROR_HERE.jpg
    ERROR_HERE.jpg
    19.8 KB · Views: 6
Since you have installed selenium and chromedriver, you can try the code below.

This code is a classic scraper in that it attempts to automate human interaction with the page. If you Step through the code (F8), you should see each interaction and be able to troubleshoot any issues that arise. Most issues that I run into in scraping is the JS button clicks perform differently when being pressed by a human/mouse event vs a programmatic event.

Code:
Sub WebAutomationSelenium()
    Dim bot As New WebDriver
    Dim Table As Object, Rows As Object, Row As Object, Cells As Object, Cell As Object
    Dim i As Integer, j As Integer
    
    ' Initialize ChromeDriver
    bot.Start "chrome", "https://demo.istat.it/app/?i=D7B&l=it&a=2024"
    bot.Window.Maximize

    ' Wait for the page to load
    bot.Wait 5000
    
    ' Click the "Vista Territoriale" button
    If Not WaitForElement(bot, By.ID, "tab-1", 10) Then
        MsgBox "Failed to find the 'Vista Territoriale' button.", vbExclamation
        bot.Quit
        Exit Sub
    End If
    bot.FindElementById("tab-1").Click

    ' Wait briefly for changes to take effect
    bot.Wait 2000

    ' Select the radio button "Tutti i comuni della provincia selezionata"
    If Not WaitForElement(bot, By.ID, "provincerb-1", 10) Then
        MsgBox "Failed to find the 'Tutti i comuni' radio button.", vbExclamation
        bot.Quit
        Exit Sub
    End If
    bot.FindElementById("provincerb-1").Click

    ' Remove the disabled attribute from the province dropdown
    bot.ExecuteScript "document.getElementById('province-1').removeAttribute('disabled');"
    bot.Wait 1000

    ' Select "Como" from the province dropdown
    Dim provinceDropdown As Object
    Set provinceDropdown = bot.FindElementById("province-1")
    For i = 0 To provinceDropdown.FindElementsByTag("option").Count - 1
        If provinceDropdown.FindElementsByTag("option")(i).Text = "Como" Then
            provinceDropdown.FindElementsByTag("option")(i).Click
            Exit For
        End If
    Next

    ' Wait for changes to apply
    bot.Wait 2000

    ' Select "Luglio" from the month dropdown
    If Not WaitForElement(bot, By.ID, "mese", 10) Then
        MsgBox "Failed to find the 'month' dropdown list.", vbExclamation
        bot.Quit
        Exit Sub
    End If
    Dim monthDropdown As Object
    Set monthDropdown = bot.FindElementById("mese")
    For i = 0 To monthDropdown.FindElementsByTag("option").Count - 1
        If monthDropdown.FindElementsByTag("option")(i).Text = "Luglio" Then
            monthDropdown.FindElementsByTag("option")(i).Click
            Exit For
        End If
    Next

    ' Click the "Cerca" button
    If Not WaitForElement(bot, By.ID, "btnricerca-0", 10) Then
        MsgBox "Failed to find the 'Cerca' button.", vbExclamation
        bot.Quit
        Exit Sub
    End If
    bot.FindElementById("btnricerca-0").Click
    bot.Wait 2000

    ' Locate the table element and extract data
    Set Table = bot.FindElementById("result-table") ' Replace with actual table ID if different
    If Table Is Nothing Then
        MsgBox "Failed to find the result table.", vbExclamation
        bot.Quit
        Exit Sub
    End If

    Set Rows = Table.FindElementsByTag("tr")
    
    ' Loop through rows and cells to extract data
    For i = 1 To Rows.Count - 1 ' Start at 1 to skip header row
        Set Row = Rows.Item(i)
        Set Cells = Row.FindElementsByTag("td")
        For j = 0 To Cells.Count - 1
            Set Cell = Cells.Item(j)
            Debug.Print "Row " & i & ", Column " & j & ": " & Cell.Text
        Next j
    Next i

    ' Close ChromeDriver
    bot.Quit
End Sub

' Helper function to wait for an element to load
Function WaitForElement(bot As WebDriver, selectorType As By, selector As String, maxSeconds As Integer) As Boolean
    Dim secondsElapsed As Integer
    secondsElapsed = 0
    Do While bot.FindElement(selectorType, selector) Is Nothing
        If secondsElapsed >= maxSeconds Then
            WaitForElement = False
            Exit Function
        End If
        secondsElapsed = secondsElapsed + 1
        bot.Wait 1000 ' Wait 1 second
    Loop
    WaitForElement = True
End Function
 
>>i have set also Selenium type library in preference of VB 6.0
Just to avoid one possible area - which Selenium Type library?. There should are two, one for 32bit, one for 64bit. You want the 32bit one for VB6

Also with VB6 you may want to try the alternate initial navigation:

Rich (BB code):
BOT.Start "chrome", ""
BOT.Get "https://demo.istat.it/app/?i=D7B&l=it&a=2024"
 
Oh, sorry. I was testing in Access (VBA) bot.start not VB 6.0 com.

For VB6, try:

Code:
Option Explicit

Private Sub WebAutomationSelenium()
    Dim bot As Object
    Dim Table As Object, Rows As Object, Row As Object, Cells As Object, Cell As Object
    Dim i As Integer, j As Integer
    
    ' Initialize ChromeDriver
    Set bot = CreateObject("Selenium.WebDriver")
    bot.Start "chrome", "https://demo.istat.it/app/?i=D7B&l=it&a=2024"
    bot.Window.Maximize

    ' Wait for the page to load
    Sleep 5000
    
    ' Click the "Vista Territoriale" button
    If Not WaitForElement(bot, "id", "tab-1", 10) Then
        MsgBox "Failed to find the 'Vista Territoriale' button.", vbExclamation
        bot.Quit
        Exit Sub
    End If
    bot.FindElementById("tab-1").Click

    ' Wait briefly for changes to take effect
    Sleep 2000

    ' Select the radio button "Tutti i comuni della provincia selezionata"
    If Not WaitForElement(bot, "id", "provincerb-1", 10) Then
        MsgBox "Failed to find the 'Tutti i comuni' radio button.", vbExclamation
        bot.Quit
        Exit Sub
    End If
    bot.FindElementById("provincerb-1").Click

    ' Remove the disabled attribute from the province dropdown
    bot.ExecuteScript "document.getElementById('province-1').removeAttribute('disabled');"
    Sleep 1000

    ' Select "Como" from the province dropdown
    Dim provinceDropdown As Object
    Set provinceDropdown = bot.FindElementById("province-1")
    For i = 0 To provinceDropdown.FindElementsByTag("option").Count - 1
        If provinceDropdown.FindElementsByTag("option")(i).Text = "Como" Then
            provinceDropdown.FindElementsByTag("option")(i).Click
            Exit For
        End If
    Next

    ' Wait for changes to apply
    Sleep 2000

    ' Select "Luglio" from the month dropdown
    If Not WaitForElement(bot, "id", "mese", 10) Then
        MsgBox "Failed to find the 'month' dropdown list.", vbExclamation
        bot.Quit
        Exit Sub
    End If
    Dim monthDropdown As Object
    Set monthDropdown = bot.FindElementById("mese")
    For i = 0 To monthDropdown.FindElementsByTag("option").Count - 1
        If monthDropdown.FindElementsByTag("option")(i).Text = "Luglio" Then
            monthDropdown.FindElementsByTag("option")(i).Click
            Exit For
        End If
    Next

    ' Click the "Cerca" button
    If Not WaitForElement(bot, "id", "btnricerca-0", 10) Then
        MsgBox "Failed to find the 'Cerca' button.", vbExclamation
        bot.Quit
        Exit Sub
    End If
    bot.FindElementById("btnricerca-0").Click
    Sleep 2000

    ' Locate the table element and extract data
    Set Table = bot.FindElementById("result-table") ' Replace with actual table ID if different
    If Table Is Nothing Then
        MsgBox "Failed to find the result table.", vbExclamation
        bot.Quit
        Exit Sub
    End If

    Set Rows = Table.FindElementsByTag("tr")
    
    ' Loop through rows and cells to extract data
    For i = 1 To Rows.Count - 1 ' Start at 1 to skip header row
        Set Row = Rows.Item(i)
        Set Cells = Row.FindElementsByTag("td")
        For j = 0 To Cells.Count - 1
            Set Cell = Cells.Item(j)
            MsgBox "Row " & i & ", Column " & j & ": " & Cell.Text
        Next j
    Next i

    ' Close ChromeDriver
    bot.Quit
End Sub

' Helper function to wait for an element to load
Private Function WaitForElement(bot As Object, selectorType As String, selector As String, maxSeconds As Integer) As Boolean
    Dim secondsElapsed As Integer
    secondsElapsed = 0
    Do While bot.FindElement(selectorType, selector) Is Nothing
        If secondsElapsed >= maxSeconds Then
            WaitForElement = False
            Exit Function
        End If
        secondsElapsed = secondsElapsed + 1
        Sleep 1000 ' Wait 1 second
    Loop
    WaitForElement = True
End Function

' Sleep function for VB6
Private Declare Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long)
 
For strongm.
Can you post the link for the download the selenium 32 bit and Chromedrive.exe in 32 bit?
Tks
 
>I ran this in an Access Module so I could F8 Step through code
>testing in Access (VBA)

Bit confused, to be honest. Firstly, why are you attempting to provide VBA solutions in the VB6 forum? The F8 thing seems strange, given that the VB6 IDE has the same capability.

Secondly, as it happens for this challenge the VBA code should run unaltered in VB6 , and it does on my machine in Excel and VB6 (which does make sal21's suggestion that his code does not work on VB6 somewhat intriguing), so I'd be interested to know why you made the changes that you did.
 
Last edited:
Status
Not open for further replies.

Part and Inventory Search

Sponsor

Back
Top