Scraping di dati dentro tag html

di il
2 risposte

Scraping di dati dentro tag html

Buongiorno a tutti,
un paio di anni fa avevo creato un codice per l'estrazione di alcuni dati, a scopo di studio, da un sito web. Il codice da qualche tempo non funziona più, nella parte in cui estraeva dei valori contenuti all'interno dei tag html. Le pagine non sono dinamiche, quindi il lavoro è più facile, ma ci sto sbattendo la testa da settimane e non so come risolvere. Ogni aiuto/consiglio è gradito.
Posto di seguito il codice + link della pagina da cui estrae i dati + il codice di errore
Ho evidenziato in grassetto la parte che non dovrebbe funzionare
<?php

function curl_request($url, $timeout = 30) {
    // Initialize curl with given url
    $ch = curl_init($url);

    // Set user-agent
    curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER["HTTP_USER_AGENT"]);
    // Write the response to a variable
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    // Follow redirects
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
    // Max seconds to execute
    curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
    // Stop on error
    curl_setopt($ch, CURLOPT_FAILONERROR, 1);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);

    return curl_exec($ch);
}

function get_html($url) {
    return str_get_html(curl_request($url));
}

[b][i]function print_odd($odd) {
    if (array_key_exists('data-odd', $odd->attr)) {   //riga 28
        return $odd->attr['data-odd'];
    }

    return $odd->children(0)->children(0)->children(0)->children(0)->attr['data-odd'];   //riga 32
}
[/i][/b]

function get_all_links_from_page($page_url) {
    $html = get_html($page_url);

	
    $links = []; // Sintassi breve per definire un array vuoto, equivale a: $links = array();
    foreach ($html->find("td[class=h-text-center]/a") as $div) {
        array_push($links, 'url-sito-web' . $div->href);
    }

    return $links;
}

function process_page($page) {
    $html = get_html($page);

    $titles = $html->find("span[class=list-breadcrumb__item__in]"); // 1 per match
    $result = $html->find("p[id=js-score]"); // 1
    $partial = $html->find("h2[id=js-partial]"); // 1
    $best_bets = $html->find("td[class=table-main__odds colored]/span/span/span"); // 1
    $odds = $html->find("td[class=table-main__odds]"); // 2

    $c=0; $b=0; $o=0; $z=0; $h=0; $d=0; $s=0;// two counters
    foreach ($titles as $match) {
	
    	///*
		$match_status = $result[$h++];

	if (stripos($match_status, 'POSTP') !== false) {
		echo 'Error: POSTP';
		$h++;
		continue;
	}
		//*/
	
        list($num1, $num2) = explode(':', $result[$c++]->innertext); // <- explode
        $num1 = intval($num1);
        $num2 = intval($num2);
        $num3 = ($num1 + $num2);
        $risultato = ($num1 . '-' . $num2);
        $risultatounito = ($num1 . '-' . $num2);

        list($home, $away) = explode('-', $titles[$z++]->innertext); // <- explode
        list($partialht, $partialft) = explode(',', $partial[$o++]->innertext); // <- explode

        $partialht = str_replace('(', '', $partialht);
        $partialft = str_replace(')', '', $partialft);

        $rest = substr($partialht, -1);
        $firstCharacter = $partialht[0];
        $lastCharacter = $partialht[2];
        $firstCharacters = $partialft[1];
        $lastCharacters = $partialft[3];

        $firstCharacter = intval($firstCharacter);
        $lastCharacter = intval($lastCharacter);

        $firstCharacters = intval($firstCharacters);
        $lastCharacters = intval($lastCharacters);

        $somma = $firstCharacter + $lastCharacter;
        $somma = intval($somma);

        $sommatwo = $firstCharacters + $lastCharacters;
        $sommatwo = intval($sommatwo);

        list($homescoreht, $awayscoreht) = explode(':', $partialht[$d++]->innertext); // <- explode
            //$homescoreht = str_replace(' ', '', $homescoreht);
            //$awayscoreht = str_replace(' ', '', $awayscoreht);
        $homescoreht = intval($homescoreht);
        $awayscoreht = intval($awayscoreht);

        list($homescoreft, $awayscoreft) = explode(':', $partialft[$s++]->innertext); // <- explode
            //$homescoreft = str_replace(' ', '', $homescoreft);
            //$awayscoreft = str_replace(' ', '', $awayscoreft);
        $homescoreft = intval($homescoreft);
        $awayscoreft = intval($awayscoreft);

        if ($somma > 0) {
            $over05ht = "OK";
        } else {
            $over05ht = "NO";
        }

        if ($num3 == $somma) {
            $over05sh = "NO";
        } else {
            $over05sh = "OK";
        }

		
		if ($num3 > 0){

		$over05 = "OK";
	}else {

		$over05 = "NO";
		}
	if ($num3 > 1){

		$over15 = "OK";
	}else {

		$over15 = "NO";
		}
	if ($num3 > 2){

		$over25 = "OK";
	}else {

		$over25 = "NO";
		}
	if ($num3 > 3){

		$over35 = "OK";
	}else {

		$over35 = "NO";
		}
	if ($num3 > 4){

		$over45 = "OK";
	}else {

		$over45 = "NO";
		}

 if ($num1 > 0 && $num2 >0){

		$goal = "OK";
	}else {

		$goal = "NO";
		}

		if ($num1 > $num2) {
		$esito = "1";
		} else if ($num1 == $num2) {
		$esito = "X";
		}else if ($num1 < $num2) {
		$esito = "2";
		}

		
		
        $odd1 = print_odd($odds[$b++]);
        $odd2 = print_odd($odds[$b++]);
        $odd3 = print_odd($odds[$b++]);

        $home = strip_tags($home);
        $away = strip_tags($away);

        $uniquefield = $home . ' ' . $away . ' ' . $risultatounito . ' ' . $odd1;
        
        /************************************************************************/
           $campionato = "Fin-A";
        /************************************************************************/

        $output = '<tr><td class="rtitle">' .
            '<td>' . $home . '</td><td> : </td><td>' . $away . ' / ' .  // <- example use
            '<td>' . $num1 . '</td><td> : </td><td>' . $num2 . ' / ' .  // <- example use
            '<td class="first-cell">' . $partialht . '</td> ' .
            '<td class="first-cell">' . $partialft . '</td> ' .
            '<td class="first-cell">' . $firstCharacter . '</td> ' .
            '<td class="first-cell">' . $lastCharacter .' /</td> ' .
            '<td class="first-cell">' . $firstCharacters . '</td> ' .
            '<td class="first-cell">' . $lastCharacters .' /</td> ' .
            '<td class="first-cell">' . $somma . ' /</td> ' .
            '<td class="first-cell">' . $num3 . ' /</td> ' .
            '<td class="first-cell">' . $over05ht .' /</td> ' .
            '<td class="first-cell">' . $over05sh .' /</td> ' .
            "<td class='first-cell'>" . $over05 ."</td> "  .
			"<td class='first-cell'>" . $over15 ."</td> "  .
			"<td class='first-cell'>" . $over25 ."</td> "  .
			"<td class='first-cell'>" . $over35 ."</td> "  .
			"<td class='first-cell'>" . $over45 ."</td> "  .
			"<td class='first-cell'>" . $goal ."</td> "  .
			'<td class="first-cell">' . $risultato . '</td> ' .
            '<td class="first-cell">' . $risultatounito . '</td> ' .
            '<td class="odds">' . $odd1 . ';' . $odd2 . ';' . $odd3 . '</td>' .
            '<td class="first-cell">' . $campionato .'</td> ' .
            '<td class="first-cell">' . $uniquefield .'</td> ' .
            '</td></tr><br/>';


$servername = "xxxxx";
$username = "xxxxx";
$password = "xxxxx";
$dbname = "xxxxx";		

// Create connection
$conn = new mysqli($servername, $username, $password, $dbname);
// Check connection
if ($conn->connect_error) {
    die("Connection failed: " . $conn->connect_error);
} 


$sql = "INSERT INTO risultati (home, away, scorehome, scoreaway, best_bets, oddtwo, oddthree, partialht, partialft, firstCharacter, lastCharacter, firstCharacters, lastCharacters, somma, num3, over05ht, over05sh, over05, over15, over25, over35, over45, goal, risultato,  risultatounito, esito, campionato, uniquefield)
VALUES ('$home', '$away', '$num1', '$num2', '$odd1', '$odd2', '$odd3', '$partialht', '$partialft', '$firstCharacter', '$lastCharacter', '$firstCharacters', '$lastCharacters', '$somma', '$num3', '$over05ht', '$over05sh', '$over05', '$over15', '$over25', '$over35', '$over45', '$goal', '$risultato', '$risultatounito','$esito','$campionato','$uniquefield');";


if ($conn->multi_query($sql) === TRUE) {
    echo "New records created successfully";
} else {
    echo "Error: " . $sql . "<br>" . $conn->error;
}

$conn->close();

			
        
		
		echo $output;
    }
}
?>

Link pagina: *****

Errore ottenuto:
Warning: array_key_exists() expects parameter 2 to be array, null given in xxx.it/home/stc/finland/functions.php on line 28

Fatal error: Uncaught Error: Call to a member function children() on null in xxx.it/home/stc/finland/functions.php:32 Stack trace:
#0 xxx.it/home/stc/finland/functions.php(179): print_odd(NULL)
#1 xxx.it/home/stc/finland/extract-links.php(34): process_page('url-link.')
#2 {main} thrown in xxx.it/home/stc/finland/functions.php on line 32
Grazie a tutti

2 Risposte

Devi accedere o registrarti per scrivere nel forum
2 risposte