mirror of
https://git.lolcat.ca/lolcat/4get.git
synced 2026-01-04 01:35:26 -05:00
fix invalid sublinks on google scraper
This commit is contained in:
parent
ce75cbda81
commit
46e6ed12e3
1 changed files with 26 additions and 16 deletions
|
|
@ -953,23 +953,33 @@ class google{
|
|||
])
|
||||
);
|
||||
|
||||
if(count($probe) !== 0){
|
||||
$url =
|
||||
$this->unshiturl(
|
||||
$a["attributes"]["href"]
|
||||
);
|
||||
|
||||
if(
|
||||
preg_match(
|
||||
'/^http/',
|
||||
$url
|
||||
)
|
||||
){
|
||||
|
||||
$sublinks[] = [
|
||||
"title" =>
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$probe[0]
|
||||
)
|
||||
),
|
||||
"description" => null,
|
||||
"date" => null,
|
||||
"url" =>
|
||||
$this->unshiturl(
|
||||
$a["attributes"]["href"]
|
||||
)
|
||||
];
|
||||
if(count($probe) !== 0){
|
||||
|
||||
$sublinks[] = [
|
||||
"title" =>
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$probe[0]
|
||||
)
|
||||
),
|
||||
"description" => null,
|
||||
"date" => null,
|
||||
"url" => $url
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue