123movies-seo/app/Services/Data/News/ImdbNewsProvider.php

61 lines
2.2 KiB
PHP
Raw Normal View History

2024-08-24 23:08:42 +03:00
<?php
namespace App\Services\Data\News;
use GuzzleHttp\Client;
use Symfony\Component\DomCrawler\Crawler;
use App\Services\Data\Contracts\NewsProviderInterface;
use Illuminate\Support\Str;
class ImdbNewsProvider implements NewsProviderInterface
{
/**
* @var Client
*/
private $http;
public function __construct(Client $http)
{
$this->http = $http;
}
public function getArticles(int $limit = 10)
{
$articles = [];
$html = $this->http->get('https://www.imdb.com/news/movie', ['headers' => ['User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' ]])->getBody()->getContents();
$strippedHtml = preg_replace('/<script(.*?)>(.*?)<\/script>/is', '', $html);
$crawler = new Crawler($strippedHtml);
// grab every news article on the page
$articles = $crawler->filter('.ipc-page-section .ipc-list-card')->each(function(Crawler $node) {
try {
return [
'title' => $node->filter('a.ipc-link')->text(),
'body' => Str::beforeLast($node->filter('.ipc-html-content-inner-div')->text(), 'See full article at '),
'meta' => [
'date' => $node->filter('.ipc-inline-list .ipc-inline-list__item')->first()->text(),
'author' => Str::replaceFirst('by ', '',$node->filter('.ipc-inline-list')->children('.ipc-inline-list__item')->eq(1)->text()),
'source' => Str::afterLast($node->filter('.ipc-html-content-inner-div')->text(), 'See full article at '),
'image' => substr($node->filter('img.ipc-image')->attr('src'), 0, strpos($node->filter('img.ipc-image')->attr('src'), '._')) . '.jpg',
'url' => $node->filter('a.ipc-link')->attr('href'),
]
];
// dump($d);
// throw new \Exception("Error Processing Request", 1);
} catch (\Throwable $th) {
// throw $th;
}
});
return collect(array_filter($articles));
}
public function getSingleArticle(string $slug)
{
}
}