captcha and imgur, findthatmeme, yep imagesearch

This commit is contained in:
lolcat 2023-10-16 02:30:43 -04:00
parent fa9dc4d6ef
commit 3aa0180774
26 changed files with 1710 additions and 63 deletions

148
scraper/ftm.php Normal file
View file

@ -0,0 +1,148 @@
<?php
class ftm{
public function __construct(){
include "lib/nextpage.php";
$this->nextpage = new nextpage("ftm");
}
public function getfilters($page){
return [];
}
private function get($url, $search, $offset){
$curlproc = curl_init();
curl_setopt($curlproc, CURLOPT_URL, $url);
$payload =
json_encode(
[
"search" => $search,
"offset" => $offset
]
);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"Content-Length: " . strlen($payload),
"Content-Type: application/json",
"DNT: 1",
"Connection: keep-alive",
"Origin: https://findthatmeme.com",
"Referer: https://findthatmeme.com/?search=" . urlencode($search),
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1",
"X-Auth-Key: undefined",
"X-CSRF-Validation-Header: true"]
);
curl_setopt($curlproc, CURLOPT_POST, true);
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $payload);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function image($get){
$search = $get["s"];
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
if($get["npt"]){
$count = (int)$this->nextpage->get($get["npt"], "images");
}else{
$count = 0;
}
try{
$json =
json_decode(
$this->get(
"https://findthatmeme.com/api/v1/search",
$search,
$count
),
true
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
if($json === null){
throw new Exception("Failed to decode JSON");
}
foreach($json as $item){
$count++;
if($item["type"] == "VIDEO"){
$thumb = "thumb/" . $item["thumbnail"];
}else{
$thumb = $item["image_path"];
}
$out["image"][] = [
"title" => date("jS \of F Y @ g:ia", strtotime($item["created_at"])),
"source" => [
[
"url" =>
"https://findthatmeme.us-southeast-1.linodeobjects.com/" .
$thumb,
"width" => null,
"height" => null
]
],
"url" => $item["source_page_url"]
];
}
if($count === 50){
$out["npt"] =
$this->nextpage->store(
$count,
"images"
);
}
return $out;
}
}

249
scraper/imgur.php Normal file
View file

@ -0,0 +1,249 @@
<?php
class imgur{
public function __construct(){
include "lib/nextpage.php";
$this->nextpage = new nextpage("imgur");
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
return [
"sort" => [ // /score/
"display" => "Sort by",
"option" => [
"score" => "Highest scoring",
"relevance" => "Most relevant",
"time" => "Newest first"
]
],
"time" => [ // /score/day/
"display" => "Time posted",
"option" => [
"all" => "All time",
"day" => "Today",
"week" => "This week",
"month" => "This month",
"year" => "This year"
]
],
"format" => [ // q_type
"display" => "Format",
"option" => [
"any" => "Any format",
"jpg" => "JPG",
"png" => "PNG",
"gif" => "GIF",
"anigif" => "Animated GIF",
"album" => "Albums"
]
],
"size" => [ // q_size_px
"display" => "Size",
"option" => [
"any" => "Any size",
"small" => "Small (500px or less)",
"med" => "Medium (500px to 2000px)",
"big" => "Big (2000px to 5000px)",
"lrg" => "Large (5000px to 10000px)",
"huge" => "Huge (10000px and above)"
]
]
];
}
private function get($url, $get = []){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?scrolled&" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Referer: https://imgur.com/search/",
"Connection: keep-alive",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-origin",
"TE: trailers",
"X-Requested-With: XMLHttpRequest"]
);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function image($get){
if($get["npt"]){
$filter =
json_decode(
$this->nextpage->get(
$get["npt"],
"images"
),
true
);
$search = $filter["s"];
unset($filter["s"]);
$sort = $filter["sort"];
unset($filter["sort"]);
$time = $filter["time"];
unset($filter["time"]);
$format = $filter["format"];
unset($filter["format"]);
$size = $filter["size"];
unset($filter["size"]);
$page = $filter["page"];
unset($filter["page"]);
}else{
$search = $get["s"];
$sort = $get["sort"];
$time = $get["time"];
$format = $get["format"];
$size = $get["size"];
$page = 0;
$filter = [
"q" => $search
];
if($format != "any"){
$filter["q_type"] = $format;
}
if($size != "any"){
$filter["q_size_px"] = $size;
$filter["q_size_is_mpx"] = "off";
}
}
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
try{
$html =
$this->get(
"https://imgur.com/search/$sort/$time/page/$page",
$filter
);
}catch(Exception $error){
throw new Exception("Failed to fetch HTML");
}
$this->fuckhtml->load($html);
$posts =
$this->fuckhtml
->getElementsByClassName(
"post",
"div"
);
foreach($posts as $post){
$this->fuckhtml->load($post);
$image =
$this->fuckhtml
->getElementsByTagName("img")[0];
$image_url = "https:" . substr($this->fuckhtml->getTextContent($image["attributes"]["src"]), 0, -5);
$out["image"][] = [
"title" =>
$this->fuckhtml
->getTextContent(
$image["attributes"]["alt"]
),
"source" => [
[
"url" => $image_url . ".jpg",
"width" => null,
"height" => null
],
[
"url" => $image_url . "m.jpg",
"width" => null,
"height" => null
]
],
"url" =>
"https://imgur.com" .
$this->fuckhtml
->getTextContent(
$this->fuckhtml
->getElementsByClassName(
"image-list-link",
"a"
)
[0]
["attributes"]
["href"]
)
];
}
if(isset($out["image"][0])){
// store nextpage
$filter["s"] = $search;
$filter["sort"] = $sort;
$filter["time"] = $time;
$filter["format"] = $format;
$filter["size"] = $size;
$filter["page"] = $page + 1;
$out["npt"] =
$this->nextpage->store(
json_encode($filter),
"images"
);
}
return $out;
}
}

224
scraper/pinterest.php Normal file
View file

@ -0,0 +1,224 @@
<?php
class pinterest{
public function __construct(){
include "lib/nextpage.php";
$this->nextpage = new nextpage("pinterest");
}
public function getfilters($page){
return [];
}
private function get($url, $get = []){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
"Sec-Fetch-Mode: navigate",
"Sec-Fetch-Site: none",
"Sec-Fetch-User: ?1"]
);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function image($get){
$search = $get["s"];
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
$filter = [
"source_url" => "/search/pins/?q=" . urlencode($search),
"rs" => "typed",
"data" =>
json_encode(
[
"options" => [
"article" => null,
"applied_filters" => null,
"appliedProductFilters" => "---",
"auto_correction_disabled" => false,
"corpus" => null,
"customized_rerank_type" => null,
"filters" => null,
"query" => $search,
"query_pin_sigs" => null,
"redux_normalize_feed" => true,
"rs" => "typed",
"scope" => "pins", // pins, boards, videos,
"source_id" => null
],
"context" => []
]
),
"_" => substr(str_replace(".", "", (string)microtime(true)), 0, -1)
];
try{
$json =
json_decode(
$this->get(
"https://www.pinterest.ca/resource/BaseSearchResource/get/",
$filter
),
true
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
if($json === null){
throw new Exception("Failed to decode JSON");
}
//print_r($json);
foreach(
$json
["resource_response"]
["data"]
["results"]
as $item
){
switch($item["type"]){
case "pin":
/*
Handle image object
*/
$images = array_values($item["images"]);
$image = &$images[count($images) - 1]; // original
$thumb = &$images[1]; // 236x
$title = [];
if(
isset($item["grid_title"]) &&
trim($item["grid_title"]) != ""
){
$title[] = $item["grid_title"];
}
if(
isset($item["description"]) &&
trim($item["description"]) != ""
){
$title[] = $item["description"];
}
$title = implode(": ", $title);
if(
$title == "" &&
isset($item["board"]["name"]) &&
trim($item["board"]["name"]) != ""
){
$title = $item["board"]["name"];
}
if($title == ""){
$title = null;
}
$out["image"][] = [
"title" => $title,
"source" => [
[
"url" => $image["url"],
"width" => (int)$image["width"],
"height" => (int)$image["height"]
],
[
"url" => $thumb["url"],
"width" => (int)$thumb["width"],
"height" => (int)$thumb["height"]
]
],
"url" => "https://www.pinterest.com/pin/" . $item["id"]
];
break;
case "board":
if(isset($item["cover_pin"]["image_url"])){
$image = [
"url" => $item["cover_pin"]["image_url"],
"width" => (int)$item["cover_pin"]["size"][0],
"height" => (int)$item["cover_pin"]["size"][1]
];
}elseif(isset($item["image_cover_url_hd"])){
/*
$image = [
"url" =>
"width" => null,
"height" => null
];*/
}
break;
}
}
return $out;
}
private function getfullresimage($image, $has_og){
$has_og = $has_og ? "1200x" : "originals";
return
preg_replace(
'/https:\/\/i\.pinimg\.com\/[^\/]+\//',
"https://i.pinimg.com/" . $has_og . "/",
$image
);
}
}

370
scraper/yep.php Normal file
View file

@ -0,0 +1,370 @@
<?php
class yep{
public function __construct(){
include "lib/nextpage.php";
$this->nextpage = new nextpage("yep");
}
public function getfilters($page){
return [
"country" => [
"display" => "Country",
"option" => [
"all" => "All regions",
"af" => "Afghanistan",
"al" => "Albania",
"dz" => "Algeria",
"as" => "American Samoa",
"ad" => "Andorra",
"ao" => "Angola",
"ai" => "Anguilla",
"ag" => "Antigua and Barbuda",
"ar" => "Argentina",
"am" => "Armenia",
"aw" => "Aruba",
"au" => "Australia",
"at" => "Austria",
"az" => "Azerbaijan",
"bs" => "Bahamas",
"bh" => "Bahrain",
"bd" => "Bangladesh",
"bb" => "Barbados",
"by" => "Belarus",
"be" => "Belgium",
"bz" => "Belize",
"bj" => "Benin",
"bt" => "Bhutan",
"bo" => "Bolivia",
"ba" => "Bosnia and Herzegovina",
"bw" => "Botswana",
"br" => "Brazil",
"bn" => "Brunei Darussalam",
"bg" => "Bulgaria",
"bf" => "Burkina Faso",
"bi" => "Burundi",
"cv" => "Cabo Verde",
"kh" => "Cambodia",
"cm" => "Cameroon",
"ca" => "Canada",
"ky" => "Cayman Islands",
"cf" => "Central African Republic",
"td" => "Chad",
"cl" => "Chile",
"cn" => "China",
"co" => "Colombia",
"cg" => "Congo",
"cd" => "Congo, Democratic Republic",
"ck" => "Cook Islands",
"cr" => "Costa Rica",
"hr" => "Croatia",
"cu" => "Cuba",
"cy" => "Cyprus",
"cz" => "Czechia",
"ci" => "Côte d'Ivoire",
"dk" => "Denmark",
"dj" => "Djibouti",
"dm" => "Dominica",
"do" => "Dominican Republic",
"ec" => "Ecuador",
"eg" => "Egypt",
"sv" => "El Salvador",
"gq" => "Equatorial Guinea",
"ee" => "Estonia",
"et" => "Ethiopia",
"fo" => "Faroe Islands",
"fj" => "Fiji",
"fi" => "Finland",
"fr" => "France",
"gf" => "French Guiana",
"pf" => "French Polynesia",
"ga" => "Gabon",
"gm" => "Gambia",
"ge" => "Georgia",
"de" => "Germany",
"gh" => "Ghana",
"gi" => "Gibraltar",
"gr" => "Greece",
"gl" => "Greenland",
"gd" => "Grenada",
"gp" => "Guadeloupe",
"gu" => "Guam",
"gt" => "Guatemala",
"gg" => "Guernsey",
"gn" => "Guinea",
"gy" => "Guyana",
"ht" => "Haiti",
"hn" => "Honduras",
"hk" => "Hong Kong",
"hu" => "Hungary",
"is" => "Iceland",
"in" => "India",
"id" => "Indonesia",
"iq" => "Iraq",
"ie" => "Ireland",
"im" => "Isle of Man",
"il" => "Israel",
"it" => "Italy",
"jm" => "Jamaica",
"jp" => "Japan",
"je" => "Jersey",
"jo" => "Jordan",
"kz" => "Kazakhstan",
"ke" => "Kenya",
"ki" => "Kiribati",
"kw" => "Kuwait",
"kg" => "Kyrgyzstan",
"la" => "Lao People's Democratic Republic",
"lv" => "Latvia",
"lb" => "Lebanon",
"ls" => "Lesotho",
"ly" => "Libya",
"li" => "Liechtenstein",
"lt" => "Lithuania",
"lu" => "Luxembourg",
"mk" => "Macedonia",
"mg" => "Madagascar",
"mw" => "Malawi",
"my" => "Malaysia",
"mv" => "Maldives",
"ml" => "Mali",
"mt" => "Malta",
"mq" => "Martinique",
"mr" => "Mauritania",
"mu" => "Mauritius",
"yt" => "Mayotte",
"mx" => "Mexico",
"fm" => "Micronesia, Federated States of",
"md" => "Moldova",
"mc" => "Monaco",
"mn" => "Mongolia",
"me" => "Montenegro",
"ms" => "Montserrat",
"ma" => "Morocco",
"mz" => "Mozambique",
"mm" => "Myanmar",
"na" => "Namibia",
"nr" => "Nauru",
"np" => "Nepal",
"nl" => "Netherlands",
"nc" => "New Caledonia",
"nz" => "New Zealand",
"ni" => "Nicaragua",
"ne" => "Niger",
"ng" => "Nigeria",
"nu" => "Niue",
"no" => "Norway",
"om" => "Oman",
"pk" => "Pakistan",
"ps" => "Palestine, State of",
"pa" => "Panama",
"pg" => "Papua New Guinea",
"py" => "Paraguay",
"pe" => "Peru",
"ph" => "Philippines",
"pn" => "Pitcairn",
"pl" => "Poland",
"pt" => "Portugal",
"pr" => "Puerto Rico",
"qa" => "Qatar",
"ro" => "Romania",
"ru" => "Russian Federation",
"rw" => "Rwanda",
"re" => "Réunion",
"sh" => "Saint Helena",
"kn" => "Saint Kitts and Nevis",
"lc" => "Saint Lucia",
"vc" => "Saint Vincent and the Grenadines",
"ws" => "Samoa",
"sm" => "San Marino",
"st" => "Sao Tome and Principe",
"sa" => "Saudi Arabia",
"sn" => "Senegal",
"rs" => "Serbia",
"sc" => "Seychelles",
"sl" => "Sierra Leone",
"sg" => "Singapore",
"sk" => "Slovakia",
"si" => "Slovenia",
"sb" => "Solomon Islands",
"so" => "Somalia",
"kr" => "Sourth Korea",
"za" => "South Africa",
"es" => "Spain",
"lk" => "Sri Lanka",
"sr" => "Suriname",
"se" => "Sweden",
"ch" => "Switzerland",
"tw" => "Taiwan",
"tj" => "Tajikistan",
"tz" => "Tanzania",
"th" => "Thailand",
"tl" => "Timor-Leste",
"tg" => "Togo",
"tk" => "Tokelau",
"to" => "Tonga",
"tt" => "Trinidad and Tobago",
"tn" => "Tunisia",
"tr" => "Turkey",
"tm" => "Turkmenistan",
"ug" => "Uganda",
"ua" => "Ukraine",
"ae" => "United Arab Emirates",
"gb" => "United Kingdom",
"us" => "United States",
"uy" => "Uruguay",
"uz" => "Uzbekistan",
"vu" => "Vanuatu",
"ve" => "Venezuela",
"vn" => "Vietnam",
"vg" => "Virgin Islands, British",
"vi" => "Virgin Islands, U.S.",
"ye" => "Yemen",
"zm" => "Zambia",
"zw" => "Zimbabwe"
]
],
"nsfw" => [
"display" => "NSFW",
"option" => [
"yes" => "Yes",
"maybe" => "Maybe",
"no" => "No"
]
]
];
}
private function get($url, $get = []){
$curlproc = curl_init();
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
}
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
"Accept: */*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
"Origin: https://yep.com",
"Referer: https://yep.com/",
"Connection: keep-alive",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
"Sec-Fetch-Site: same-site"]
);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
throw new Exception(curl_error($curlproc));
}
curl_close($curlproc);
return $data;
}
public function image($get){
$search = $get["s"];
$country = $get["country"];
$nsfw = $get["nsfw"];
switch($nsfw){
case "yes": $nsfw = "off"; break;
case "maybe": $nsfw = "moderate"; break;
case "no": $nsfw = "strict"; break;
}
$out = [
"status" => "ok",
"npt" => null,
"image" => []
];
try{
$json =
json_decode(
$this->get(
"https://api.yep.com/fs/2/search",
[
"client" => "web",
"gl" => $country == "all" ? $country : strtoupper($country),
"no_correct" => "false",
"q" => $search,
"safeSearch" => $nsfw,
"type" => "images"
]
),
true
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
if($json === null){
throw new Exception("Failed to decode JSON");
}
foreach($json[1]["results"] as $item){
if(
$item["width"] !== 0 &&
$item["height"] !== 0
){
$thumb_width = $item["width"] >= 260 ? 260 : $item["width"];
$thumb_height = ceil($item["height"] * ($thumb_width / $item["width"]));
$width = $item["width"];
$height = $item["height"];
}else{
$thumb_width = null;
$thumb_height = null;
$width = null;
$height = null;
}
$out["image"][] = [
"title" => $item["title"],
"source" => [
[
"url" => $item["image_id"],
"width" => $width,
"height" => $height
],
[
"url" => $item["src"],
"width" => $thumb_width,
"height" => $thumb_height
]
],
"url" => $item["host_page"]
];
}
return $out;
}
}