61 lines
2.2 KiB
PHP
61 lines
2.2 KiB
PHP
|
<?php
|
||
|
|
||
|
namespace App\Services\Data\News;
|
||
|
|
||
|
use GuzzleHttp\Client;
|
||
|
use Symfony\Component\DomCrawler\Crawler;
|
||
|
use App\Services\Data\Contracts\NewsProviderInterface;
|
||
|
use Illuminate\Support\Str;
|
||
|
class ImdbNewsProvider implements NewsProviderInterface
|
||
|
{
|
||
|
/**
|
||
|
* @var Client
|
||
|
*/
|
||
|
private $http;
|
||
|
|
||
|
public function __construct(Client $http)
|
||
|
{
|
||
|
$this->http = $http;
|
||
|
}
|
||
|
|
||
|
public function getArticles(int $limit = 10)
|
||
|
{
|
||
|
$articles = [];
|
||
|
|
||
|
$html = $this->http->get('https://www.imdb.com/news/movie', ['headers' => ['User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' ]])->getBody()->getContents();
|
||
|
$strippedHtml = preg_replace('/<script(.*?)>(.*?)<\/script>/is', '', $html);
|
||
|
|
||
|
$crawler = new Crawler($strippedHtml);
|
||
|
// grab every news article on the page
|
||
|
$articles = $crawler->filter('.ipc-page-section .ipc-list-card')->each(function(Crawler $node) {
|
||
|
try {
|
||
|
|
||
|
return [
|
||
|
'title' => $node->filter('a.ipc-link')->text(),
|
||
|
'body' => Str::beforeLast($node->filter('.ipc-html-content-inner-div')->text(), 'See full article at '),
|
||
|
'meta' => [
|
||
|
|
||
|
'date' => $node->filter('.ipc-inline-list .ipc-inline-list__item')->first()->text(),
|
||
|
'author' => Str::replaceFirst('by ', '',$node->filter('.ipc-inline-list')->children('.ipc-inline-list__item')->eq(1)->text()),
|
||
|
'source' => Str::afterLast($node->filter('.ipc-html-content-inner-div')->text(), 'See full article at '),
|
||
|
'image' => substr($node->filter('img.ipc-image')->attr('src'), 0, strpos($node->filter('img.ipc-image')->attr('src'), '._')) . '.jpg',
|
||
|
'url' => $node->filter('a.ipc-link')->attr('href'),
|
||
|
]
|
||
|
];
|
||
|
// dump($d);
|
||
|
// throw new \Exception("Error Processing Request", 1);
|
||
|
} catch (\Throwable $th) {
|
||
|
// throw $th;
|
||
|
}
|
||
|
});
|
||
|
return collect(array_filter($articles));
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
public function getSingleArticle(string $slug)
|
||
|
{
|
||
|
|
||
|
}
|
||
|
}
|