mirror of
https://git.bakhai.co.in/FbIN/4Get.git
synced 2025-11-04 20:11:32 +05:30
commit
c6e404d2af
132 changed files with 34951 additions and 0 deletions
1860
scraper/brave.php
Normal file
1860
scraper/brave.php
Normal file
File diff suppressed because it is too large
Load diff
145
scraper/crowdview.php
Normal file
145
scraper/crowdview.php
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
<?php
|
||||
|
||||
class crowdview{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("crowdview");
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = []){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: none",
|
||||
"Sec-Fetch-User: ?1"]
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function web($get){
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
|
||||
try{
|
||||
$json = $this->get(
|
||||
$proxy,
|
||||
"https://crowdview-next-js.onrender.com/api/search-v3",
|
||||
[
|
||||
"query" => $search
|
||||
]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch JSON");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => null,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === NULL){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
foreach($json["results"] as $item){
|
||||
|
||||
$description = explode("<b>", $item["snippet"], 2);
|
||||
|
||||
$out["web"][] = [
|
||||
"title" => $this->sanitize($item["title"]),
|
||||
"description" => $this->sanitize($description[1]),
|
||||
"url" => $item["link"],
|
||||
"date" => strtotime($description[0]),
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => [],
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function sanitize($html){
|
||||
|
||||
return
|
||||
trim(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
html_entity_decode(
|
||||
$html
|
||||
)
|
||||
),
|
||||
". "
|
||||
);
|
||||
}
|
||||
}
|
||||
309
scraper/curlie.php
Normal file
309
scraper/curlie.php
Normal file
|
|
@ -0,0 +1,309 @@
|
|||
<?php
|
||||
|
||||
class curlie{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("curlie");
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
if($page != "web"){
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
return [
|
||||
"lang" => [
|
||||
"display" => "Language",
|
||||
"option" => [
|
||||
"any" => "Any language",
|
||||
"en" => "English",
|
||||
"de" => "German",
|
||||
"fr" => "French",
|
||||
"ja" => "Japanese",
|
||||
"it" => "Italian",
|
||||
"es" => "Spanish",
|
||||
"ru" => "Russian",
|
||||
"nl" => "Dutch",
|
||||
"pl" => "Polish",
|
||||
"tr" => "Turkish",
|
||||
"da" => "Danish",
|
||||
"sv" => "Swedish",
|
||||
"no" => "Norwegian",
|
||||
"is" => "Icelandic",
|
||||
"fo" => "Faroese",
|
||||
"fi" => "Finnish",
|
||||
"et" => "Estonian",
|
||||
"lt" => "Lithuanian",
|
||||
"lv" => "Latvian",
|
||||
"cy" => "Welsh",
|
||||
"ga" => "Irish",
|
||||
"gd" => "Scottish Gaelic",
|
||||
"br" => "Breton",
|
||||
"fy" => "Frisian",
|
||||
"frr" => "North Frisian",
|
||||
"gem" => "Saterland Frisian",
|
||||
"lb" => "Luxembourgish",
|
||||
"rm" => "Romansh",
|
||||
"pt" => "Portuguese",
|
||||
"ca" => "Catalan",
|
||||
"gl" => "Galician",
|
||||
"eu" => "Basque",
|
||||
"ast" => "Asturian",
|
||||
"an" => "Aragonese",
|
||||
"fur" => "Friulan",
|
||||
"sc" => "Sardinian",
|
||||
"scn" => "Sicilian",
|
||||
"oc" => "Occitan",
|
||||
"be" => "Belarusian",
|
||||
"cs" => "Czech",
|
||||
"hu" => "Hungarian",
|
||||
"sk" => "Slovak",
|
||||
"uk" => "Ukrainian",
|
||||
"csb" => "Kashubian",
|
||||
"tt" => "Tatar",
|
||||
"ba" => "Bashkir",
|
||||
"os" => "Ossetian",
|
||||
"sl" => "Slovene",
|
||||
"sr" => "Serbian",
|
||||
"hr" => "Croatian",
|
||||
"bs" => "Bosnian",
|
||||
"bg" => "Bulgarian",
|
||||
"sq" => "Albanian",
|
||||
"ro" => "Romanian",
|
||||
"mk" => "Macedonian",
|
||||
"el" => "Greek",
|
||||
"iw" => "Hebrew",
|
||||
"fa" => "Persian",
|
||||
"ar" => "Arabic",
|
||||
"ku" => "Kurdish",
|
||||
"az" => "Azerbaijani",
|
||||
"hy" => "Armenian",
|
||||
"af" => "Afrikaans",
|
||||
"sw" => "Kiswahili",
|
||||
"uz" => "Uzbek",
|
||||
"kk" => "Kazakh",
|
||||
"ky" => "Kyrgyz",
|
||||
"tg" => "Tajik",
|
||||
"tk" => "Turkmen",
|
||||
"ug" => "Uyghurche",
|
||||
"hi" => "Hindi",
|
||||
"si" => "Sinhalese",
|
||||
"gu" => "Gujarati",
|
||||
"ur" => "Urdu",
|
||||
"mr" => "Marathi",
|
||||
"pa" => "Punjabi",
|
||||
"bn" => "Bengali",
|
||||
"ta" => "Tamil",
|
||||
"te" => "Telugu",
|
||||
"kn" => "Kannada",
|
||||
"zh_CN" => "Chinese Simplified",
|
||||
"zh_TW" => "Chinese Traditional",
|
||||
"ko" => "Korean",
|
||||
"cfr" => "Taiwanese",
|
||||
"th" => "Thai",
|
||||
"vi" => "Vietnamese",
|
||||
"in" => "Indonesian",
|
||||
"ms" => "Malay",
|
||||
"tl" => "Tagalog",
|
||||
"eo" => "Esperanto",
|
||||
"ia" => "Interlingua",
|
||||
"la" => "Latin"
|
||||
]
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = []){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: none",
|
||||
"Sec-Fetch-User: ?1"]
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function web($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$query, $proxy] = $this->backend->get($get["npt"], "web");
|
||||
|
||||
try{
|
||||
$html = $this->get(
|
||||
$proxy,
|
||||
"https://curlie.org/" . $query,
|
||||
[]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch search page");
|
||||
}
|
||||
|
||||
}else{
|
||||
$proxy = $this->backend->get_ip();
|
||||
|
||||
$query = [
|
||||
"q" => $get["s"],
|
||||
"start" => 0,
|
||||
"stime" => 92452189 // ?
|
||||
];
|
||||
|
||||
if($get["lang"] !== "any"){
|
||||
|
||||
$query["lang"] = $get["lang"];
|
||||
}
|
||||
|
||||
try{
|
||||
$html = $this->get(
|
||||
$proxy,
|
||||
"https://curlie.org/search",
|
||||
$query
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch search page");
|
||||
}
|
||||
}
|
||||
|
||||
$this->fuckhtml->load($html);
|
||||
|
||||
$nextpage =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"next-page",
|
||||
"a"
|
||||
);
|
||||
|
||||
if(count($nextpage) !== 0){
|
||||
|
||||
$nextpage =
|
||||
$this->backend->store(
|
||||
$nextpage[0]["attributes"]["href"],
|
||||
"web",
|
||||
$proxy
|
||||
);
|
||||
}else{
|
||||
|
||||
$nextpage = null;
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => $nextpage,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
$items =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"site-item",
|
||||
"div"
|
||||
);
|
||||
|
||||
foreach($items as $item){
|
||||
|
||||
$this->fuckhtml->load($item);
|
||||
|
||||
$a =
|
||||
$this->fuckhtml
|
||||
->getElementsByAttributeValue(
|
||||
"target",
|
||||
"_blank",
|
||||
"a"
|
||||
)[0];
|
||||
|
||||
$description =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName("site-descr");
|
||||
|
||||
if(count($description) !== 0){
|
||||
|
||||
$description =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$description[0]
|
||||
);
|
||||
}else{
|
||||
|
||||
$description = null;
|
||||
}
|
||||
|
||||
$out["web"][] = [
|
||||
"title" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$a
|
||||
),
|
||||
"description" => $description,
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$a["attributes"]["href"]
|
||||
),
|
||||
"date" => null,
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => [],
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
}
|
||||
1967
scraper/ddg.php
Normal file
1967
scraper/ddg.php
Normal file
File diff suppressed because it is too large
Load diff
820
scraper/facebook.php
Normal file
820
scraper/facebook.php
Normal file
|
|
@ -0,0 +1,820 @@
|
|||
<?php
|
||||
|
||||
class facebook{
|
||||
|
||||
const get = 0;
|
||||
const post = 1;
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/nextpage.php";
|
||||
$this->nextpage = new nextpage("fb");
|
||||
|
||||
include "lib/proxy_pool.php";
|
||||
$this->proxy = new proxy_pool("facebook");
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
return [
|
||||
"sort" => [
|
||||
"display" => "Sort by",
|
||||
"option" => [
|
||||
"relevance" => "Relevance",
|
||||
"most_recent" => "Most recent"
|
||||
]
|
||||
],
|
||||
"newer" => [
|
||||
"display" => "Newer than",
|
||||
"option" => "_DATE"
|
||||
],
|
||||
"older" => [
|
||||
"display" => "Older than",
|
||||
"option" => "_DATE"
|
||||
],
|
||||
"live" => [
|
||||
"display" => "Livestream",
|
||||
"option" => [
|
||||
"no" => "No",
|
||||
"yes" => "Yes"
|
||||
]
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
private function get($url, $get = [], $reqtype = self::get){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
|
||||
$get = http_build_query($get);
|
||||
|
||||
if($reqtype === self::get){
|
||||
|
||||
$headers = [
|
||||
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0",
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: none",
|
||||
"Sec-Fetch-User: ?1"
|
||||
];
|
||||
|
||||
$url .= "?" . $get;
|
||||
}else{
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||
|
||||
$headers = [
|
||||
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0",
|
||||
"Accept: */*",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip, deflate, br",
|
||||
"Content-Type: application/x-www-form-urlencoded",
|
||||
"X-FB-Friendly-Name: SearchCometResultsPaginatedResultsQuery",
|
||||
//"X-FB-LSD: AVptQC4a16c",
|
||||
//"X-ASBD-ID: 129477",
|
||||
"Content-Length: " . strlen($get),
|
||||
"Origin: https://www.facebook.com",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Referer: https://www.facebook.com/watch/",
|
||||
"Cookie: datr=__GMZCgwVF5BbyvAtfJojQwg; oo=v1%7C3%3A1691641171; wd=955x995",
|
||||
"Sec-Fetch-Dest: empty",
|
||||
"Sec-Fetch-Mode: cors",
|
||||
"Sec-Fetch-Site: same-origin",
|
||||
"TE: trailers"
|
||||
];
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_POST, true);
|
||||
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
|
||||
}
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->proxy->assign_proxy($curlproc);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function video($get){
|
||||
|
||||
$search = $get["s"];
|
||||
$npt = $get["npt"];
|
||||
|
||||
$this->out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"video" => [],
|
||||
"author" => [],
|
||||
"livestream" => [],
|
||||
"playlist" => [],
|
||||
"reel" => []
|
||||
];
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
$nextpage =
|
||||
json_decode(
|
||||
$this->nextpage->get(
|
||||
$npt,
|
||||
"videos"
|
||||
),
|
||||
true
|
||||
);
|
||||
|
||||
// parse next page
|
||||
$this->video_nextpage($nextpage);
|
||||
|
||||
return $this->out;
|
||||
}
|
||||
|
||||
// generate filter data
|
||||
// {
|
||||
// "rp_creation_time:0":"{\"name\":\"creation_time\",\"args\":\"{\\\"start_year\\\":\\\"2023\\\",\\\"start_month\\\":\\\"2023-08\\\",\\\"end_year\\\":\\\"2023\\\",\\\"end_month\\\":\\\"2023-08\\\",\\\"start_day\\\":\\\"2023-08-10\\\",\\\"end_day\\\":\\\"2023-08-10\\\"}\"}",
|
||||
// "videos_sort_by:0":"{\"name\":\"videos_sort_by\",\"args\":\"Most Recent\"}",
|
||||
// "videos_live:0":"{\"name\":\"videos_live\",\"args\":\"\"}"
|
||||
// }
|
||||
$filter = [];
|
||||
$sort = $get["sort"];
|
||||
$live = $get["live"];
|
||||
$older = $get["older"];
|
||||
$newer = $get["newer"];
|
||||
|
||||
if(
|
||||
$older !== false ||
|
||||
$newer !== false
|
||||
){
|
||||
|
||||
if($older === false){
|
||||
|
||||
$older = time();
|
||||
}
|
||||
|
||||
if($newer === false){
|
||||
|
||||
$newer = 0;
|
||||
}
|
||||
|
||||
$filter["rp_creation_time:0"] =
|
||||
json_encode(
|
||||
[
|
||||
"name" => "creation_time",
|
||||
"args" =>
|
||||
json_encode(
|
||||
[
|
||||
"start_year" => date("Y", $newer),
|
||||
"start_month" => date("Y-m", $newer),
|
||||
"end_year" => date("Y", $older),
|
||||
"end_month" => date("Y-m", $older),
|
||||
"start_day" => date("Y-m-d", $newer),
|
||||
"end_day" => date("Y-m-d", $older)
|
||||
]
|
||||
)
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
if($sort != "relevance"){
|
||||
|
||||
$filter["videos_sort_by:0"] =
|
||||
json_encode(
|
||||
[
|
||||
"name" => "videos_sort_by",
|
||||
"args" => "Most Recent"
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
if($live != "no"){
|
||||
|
||||
$filter["videos_live:0"] = json_encode(
|
||||
[
|
||||
"name" => "videos_live",
|
||||
"args" => ""
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
$req = [
|
||||
"q" => $search
|
||||
];
|
||||
|
||||
if(count($filter) !== 0){
|
||||
|
||||
$req["filters"] =
|
||||
base64_encode(
|
||||
json_encode(
|
||||
$filter
|
||||
)
|
||||
);
|
||||
}
|
||||
/*
|
||||
$html =
|
||||
$this->get(
|
||||
"https://www.facebook.com/watch/search/",
|
||||
$req
|
||||
);*/
|
||||
|
||||
$handle = fopen("scraper/facebook.html", "r");
|
||||
$html = fread($handle, filesize("scraper/facebook.html"));
|
||||
fclose($handle);
|
||||
|
||||
preg_match_all(
|
||||
'/({"__bbox":.*,"sequence_number":0}})\]\]/',
|
||||
$html,
|
||||
$json
|
||||
);
|
||||
|
||||
if(!isset($json[1][1])){
|
||||
|
||||
throw new Exception("Could not grep JSON body");
|
||||
}
|
||||
|
||||
$json = json_decode($json[1][1], true);
|
||||
|
||||
foreach(
|
||||
$json
|
||||
["__bbox"]
|
||||
["result"]
|
||||
["data"]
|
||||
["serpResponse"]
|
||||
["results"]
|
||||
["edges"]
|
||||
as $result
|
||||
){
|
||||
|
||||
$this->parse_edge($result);
|
||||
}
|
||||
|
||||
// get nextpage data
|
||||
if(
|
||||
$json
|
||||
["__bbox"]
|
||||
["result"]
|
||||
["data"]
|
||||
["serpResponse"]
|
||||
["results"]
|
||||
["page_info"]
|
||||
["has_next_page"]
|
||||
== 1
|
||||
){
|
||||
|
||||
preg_match(
|
||||
'/handleWithCustomApplyEach\(ScheduledApplyEach,({.*})\);}\);}\);<\/script>/',
|
||||
$html,
|
||||
$nextpagedata
|
||||
);
|
||||
|
||||
// [POST] https://www.facebook.com/api/graphql/
|
||||
// FORM data, not JSON!
|
||||
|
||||
$nextpage = [
|
||||
"av" => "0",
|
||||
"__user" => null,
|
||||
"__a" => null,
|
||||
"__req" => "2",
|
||||
"__hs" => null,
|
||||
"dpr" => "1",
|
||||
"__ccg" => null,
|
||||
"__rev" => null,
|
||||
// another client side token
|
||||
"__s" => $this->randomstring(6) . ":" . $this->randomstring(6) . ":" . $this->randomstring(6),
|
||||
"__hsi" => null,
|
||||
// tracking fingerprint (probably generated using webgl)
|
||||
"__dyn" => "7xeUmwlE7ibwKBWo2vwAxu13w8CewSwMwNw9G2S0im3y4o0B-q1ew65xO2O1Vw8G1Qw5Mx61vw9m1YwBgao6C0Mo5W3S7Udo5q4U2zxe2Gew9O222SUbEaU2eU5O0GpovU19pobodEGdw46wbS1LwTwNwLw8O1pwr86C16w",
|
||||
"__csr" => $this->randomstring(null),
|
||||
"__comet_req" => null,
|
||||
"lsd" => null,
|
||||
"jazoest" => null,
|
||||
"__spin_r" => null,
|
||||
"__spin_b" => null,
|
||||
"__spin_t" => null,
|
||||
"fb_api_caller_class" => "RelayModern",
|
||||
"fb_api_req_friendly_name" => "SearchCometResultsPaginatedResultsQuery",
|
||||
"variables" => [ // this is json
|
||||
"UFI2CommentsProvider_commentsKey" => "SearchCometResultsInitialResultsQuery",
|
||||
"allow_streaming" => false,
|
||||
"args" => [
|
||||
"callsite" => "comet:watch_search",
|
||||
"config" => [
|
||||
"exact_match" => false,
|
||||
"high_confidence_config" => null,
|
||||
"intercept_config" => null,
|
||||
"sts_disambiguation" => null,
|
||||
"watch_config" => null
|
||||
],
|
||||
"context" => [
|
||||
"bsid" => null,
|
||||
"tsid" => null
|
||||
],
|
||||
"experience" => [
|
||||
"encoded_server_defined_params" => null,
|
||||
"fbid" => null,
|
||||
"type" => "WATCH_TAB_GLOBAL"
|
||||
],
|
||||
"filters" => [],
|
||||
"text" => $search
|
||||
],
|
||||
"count" => 5,
|
||||
"cursor" =>
|
||||
$json
|
||||
["__bbox"]
|
||||
["result"]
|
||||
["data"]
|
||||
["serpResponse"]
|
||||
["results"]
|
||||
["page_info"]
|
||||
["end_cursor"],
|
||||
"displayCommentsContextEnableComment" => false,
|
||||
"displayCommentsContextIsAdPreview" => false,
|
||||
"displayCommentsContextIsAggregatedShare" => false,
|
||||
"displayCommentsContextIsStorySet" => false,
|
||||
"displayCommentsFeedbackContext" => null,
|
||||
"feedLocation" => "SEARCH",
|
||||
"feedbackSource" => 23,
|
||||
"fetch_filters" => true,
|
||||
"focusCommentID" => null,
|
||||
"locale" => null,
|
||||
"privacySelectorRenderLocation" => "COMET_STREAM",
|
||||
"renderLocation" => "search_results_page",
|
||||
"scale" => 1,
|
||||
"stream_initial_count" => 0,
|
||||
"useDefaultActor" => false,
|
||||
"__relay_internal__pv__IsWorkUserrelayprovider" => false,
|
||||
"__relay_internal__pv__IsMergQAPollsrelayprovider" => false,
|
||||
"__relay_internal__pv__StoriesArmadilloReplyEnabledrelayprovider" => false,
|
||||
"__relay_internal__pv__StoriesRingrelayprovider" => false
|
||||
],
|
||||
"server_timestamps" => "true",
|
||||
"doc_id" => "6761275837251607" // is actually dynamic
|
||||
];
|
||||
|
||||
// append filters to nextpage
|
||||
foreach($filter as $key => $value){
|
||||
|
||||
$nextpage["variables"]["args"]["filters"][] =
|
||||
$value;
|
||||
}
|
||||
|
||||
$nextpagedata = json_decode($nextpagedata[1], true);
|
||||
|
||||
// get bsid
|
||||
foreach($nextpagedata["require"] as $key){
|
||||
|
||||
foreach($key as $innerkey){
|
||||
|
||||
if(is_array($innerkey)){
|
||||
foreach($innerkey as $inner_innerkey){
|
||||
|
||||
if(is_array($inner_innerkey)){
|
||||
foreach($inner_innerkey as $inner_inner_innerkey){
|
||||
|
||||
if(
|
||||
isset(
|
||||
$inner_inner_innerkey
|
||||
["variables"]
|
||||
["args"]
|
||||
["context"]
|
||||
["bsid"]
|
||||
)
|
||||
){
|
||||
|
||||
$nextpage
|
||||
["variables"]
|
||||
["args"]
|
||||
["context"]
|
||||
["bsid"] =
|
||||
$inner_inner_innerkey
|
||||
["variables"]
|
||||
["args"]
|
||||
["context"]
|
||||
["bsid"];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach($nextpagedata["define"] as $key){
|
||||
|
||||
if(isset($key[2]["haste_session"])){
|
||||
|
||||
$nextpage["__hs"] = $key[2]["haste_session"];
|
||||
}
|
||||
|
||||
if(isset($key[2]["connectionClass"])){
|
||||
|
||||
$nextpage["__ccg"] = $key[2]["connectionClass"];
|
||||
}
|
||||
|
||||
if(isset($key[2]["__spin_r"])){
|
||||
|
||||
$nextpage["__spin_r"] = (string)$key[2]["__spin_r"];
|
||||
}
|
||||
|
||||
if(isset($key[2]["hsi"])){
|
||||
|
||||
$nextpage["__hsi"] = (string)$key[2]["hsi"];
|
||||
}
|
||||
|
||||
if(
|
||||
isset($key[2]["token"]) &&
|
||||
!empty($key[2]["token"])
|
||||
){
|
||||
|
||||
$nextpage["lsd"] = $key[2]["token"];
|
||||
}
|
||||
|
||||
if(isset($key[2]["__spin_r"])){
|
||||
|
||||
$nextpage["__spin_r"] = (string)$key[2]["__spin_r"];
|
||||
$nextpage["__rev"] = $nextpage["__spin_r"];
|
||||
}
|
||||
|
||||
if(isset($key[2]["__spin_b"])){
|
||||
|
||||
$nextpage["__spin_b"] = $key[2]["__spin_b"];
|
||||
}
|
||||
|
||||
if(isset($key[2]["__spin_t"])){
|
||||
|
||||
$nextpage["__spin_t"] = (string)$key[2]["__spin_t"];
|
||||
}
|
||||
}
|
||||
|
||||
preg_match(
|
||||
'/{"u":"\\\\\/ajax\\\\\/qm\\\\\/\?__a=([0-9]+)&__user=([0-9]+)&__comet_req=([0-9]+)&jazoest=([0-9]+)"/',
|
||||
$html,
|
||||
$ajaxparams
|
||||
);
|
||||
|
||||
if(count($ajaxparams) !== 5){
|
||||
|
||||
throw new Exception("Could not grep the AJAX parameters");
|
||||
}
|
||||
|
||||
$nextpage["__a"] = $ajaxparams[1];
|
||||
$nextpage["__user"] = $ajaxparams[2];
|
||||
$nextpage["__comet_req"] = $ajaxparams[3];
|
||||
$nextpage["jazoest"] = $ajaxparams[4];
|
||||
|
||||
/*
|
||||
$handle = fopen("scraper/facebook-nextpage.json", "r");
|
||||
$json = fread($handle, filesize("scraper/facebook-nextpage.json"));
|
||||
fclose($handle);*/
|
||||
|
||||
$nextpage["variables"] = json_encode($nextpage["variables"]);
|
||||
|
||||
$this->video_nextpage($nextpage);
|
||||
}
|
||||
|
||||
return $this->out;
|
||||
}
|
||||
|
||||
private function video_nextpage($nextpage, $getcursor = false){
|
||||
|
||||
$json =
|
||||
$this->get(
|
||||
"https://www.facebook.com/api/graphql/",
|
||||
$nextpage,
|
||||
self::post
|
||||
);
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Failed to decode next page JSON");
|
||||
}
|
||||
|
||||
foreach(
|
||||
$json
|
||||
["data"]
|
||||
["serpResponse"]
|
||||
["results"]
|
||||
["edges"]
|
||||
as $result
|
||||
){
|
||||
|
||||
$this->parse_edge($result);
|
||||
}
|
||||
|
||||
if(
|
||||
$json
|
||||
["data"]
|
||||
["serpResponse"]
|
||||
["results"]
|
||||
["page_info"]
|
||||
["has_next_page"] == 1
|
||||
){
|
||||
|
||||
$nextpage["variables"] = json_decode($nextpage["variables"], true);
|
||||
|
||||
$nextpage["variables"]["cursor"] =
|
||||
$json
|
||||
["data"]
|
||||
["serpResponse"]
|
||||
["results"]
|
||||
["page_info"]
|
||||
["end_cursor"];
|
||||
|
||||
$nextpage["variables"] = json_encode($nextpage["variables"]);
|
||||
|
||||
//change this for second call. after, it's static.
|
||||
// TODO: csr also updates to longer string
|
||||
$nextpage["__dyn"] = "7xeUmwlEnwn8K2WnFw9-2i5U4e0yoW3q322aew9G2S0zU20xi3y4o0B-q1ew65xOfxO1Vw8G11xmfz81s8hwGwQw9m1YwBgao6C2O0B85W3S7Udo5qfK0EUjwGzE2swwwJK2W2K0zK5o4q0GpovU19pobodEGdw46wbS1LwTwNwLw8O1pwr86C16w";
|
||||
|
||||
// TODO: change this on third and 6th call
|
||||
//$nextpage["__s"] = $this->randomstring(6) . ":" . explode(":", $nextpage["__s"], 2)[1];
|
||||
|
||||
$this->out["npt"] = $this->nextpage->store(json_encode($nextpage), "videos");
|
||||
}
|
||||
}
|
||||
|
||||
private function parse_edge($edge){
|
||||
|
||||
$append = "video";
|
||||
$edge =
|
||||
$edge
|
||||
["relay_rendering_strategy"]
|
||||
["view_model"];
|
||||
|
||||
if(
|
||||
strtolower(
|
||||
$edge
|
||||
["video_metadata_model"]
|
||||
["video_broadcast_status"]
|
||||
)
|
||||
== "live"
|
||||
){
|
||||
|
||||
// handle livestream
|
||||
$duration = "_LIVE";
|
||||
$append = "livestream";
|
||||
$timetext = null;
|
||||
$views =
|
||||
(int)$edge
|
||||
["video_metadata_model"]
|
||||
["relative_time_string"];
|
||||
|
||||
$url_prefix = "https://www.facebook.com/watch/live/?v=";
|
||||
|
||||
}elseif(
|
||||
stripos(
|
||||
$edge
|
||||
["video_metadata_model"]
|
||||
["video_broadcast_status"],
|
||||
"vod"
|
||||
) !== false
|
||||
){
|
||||
|
||||
// handle VOD format
|
||||
$timetext = null;
|
||||
$views =
|
||||
(int)$edge
|
||||
["video_metadata_model"]
|
||||
["relative_time_string"];
|
||||
|
||||
$duration =
|
||||
$this->hms2int(
|
||||
$edge
|
||||
["video_thumbnail_model"]
|
||||
["video_duration_text"]
|
||||
);
|
||||
|
||||
$url_prefix = "https://www.facebook.com/watch/live/?v=";
|
||||
|
||||
}else{
|
||||
|
||||
// handle normal format
|
||||
$timetext =
|
||||
explode(
|
||||
" · ",
|
||||
$edge
|
||||
["video_metadata_model"]
|
||||
["relative_time_string"],
|
||||
2
|
||||
);
|
||||
|
||||
if(count($timetext) === 2){
|
||||
|
||||
$views = $this->truncatedcount2int($timetext[1]);
|
||||
}else{
|
||||
|
||||
$views = null;
|
||||
}
|
||||
|
||||
$timetext = strtotime($timetext[0]);
|
||||
|
||||
$duration =
|
||||
$this->hms2int(
|
||||
$edge
|
||||
["video_thumbnail_model"]
|
||||
["video_duration_text"]
|
||||
);
|
||||
|
||||
$url_prefix = "https://www.facebook.com/watch/?v=";
|
||||
}
|
||||
|
||||
if(
|
||||
isset(
|
||||
$edge
|
||||
["video_metadata_model"]
|
||||
["video_owner_profile"]
|
||||
["uri_token"]
|
||||
)
|
||||
){
|
||||
|
||||
$profileurl =
|
||||
"https://www.facebook.com/watch/" .
|
||||
$edge
|
||||
["video_metadata_model"]
|
||||
["video_owner_profile"]
|
||||
["uri_token"];
|
||||
}else{
|
||||
|
||||
$profileurl =
|
||||
$edge
|
||||
["video_metadata_model"]
|
||||
["video_owner_profile"]
|
||||
["url"];
|
||||
}
|
||||
|
||||
$this->out[$append][] = [
|
||||
"title" =>
|
||||
$this->limitstrlen(
|
||||
str_replace(
|
||||
"\n",
|
||||
" ",
|
||||
$edge
|
||||
["video_metadata_model"]
|
||||
["title"]
|
||||
),
|
||||
100
|
||||
),
|
||||
"description" =>
|
||||
empty(
|
||||
$edge
|
||||
["video_metadata_model"]
|
||||
["save_description"]
|
||||
) ?
|
||||
null :
|
||||
str_replace(
|
||||
"\n",
|
||||
" ",
|
||||
$this->limitstrlen(
|
||||
$edge
|
||||
["video_metadata_model"]
|
||||
["save_description"]
|
||||
)
|
||||
),
|
||||
"author" => [
|
||||
"name" =>
|
||||
$edge
|
||||
["video_metadata_model"]
|
||||
["video_owner_profile"]
|
||||
["name"],
|
||||
"url" => $profileurl,
|
||||
"avatar" => null
|
||||
],
|
||||
"date" => $timetext,
|
||||
"duration" => $duration,
|
||||
"views" => $views,
|
||||
"thumb" =>
|
||||
[
|
||||
"url" =>
|
||||
$edge
|
||||
["video_thumbnail_model"]
|
||||
["thumbnail_image"]
|
||||
["uri"],
|
||||
"ratio" => "16:9"
|
||||
],
|
||||
"url" =>
|
||||
$url_prefix .
|
||||
$edge
|
||||
["video_click_model"]
|
||||
["click_metadata_model"]
|
||||
["video_id"]
|
||||
];
|
||||
}
|
||||
|
||||
private function randomstring($len){
|
||||
|
||||
if($len === null){
|
||||
|
||||
$str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789-";
|
||||
$len = rand(141, 145);
|
||||
$c = 61;
|
||||
}else{
|
||||
|
||||
$str = "abcdefghijklmnopqrstuvwxyz123456789";
|
||||
$c = 34;
|
||||
}
|
||||
|
||||
$out = null;
|
||||
for($i=0; $i<$len; $i++){
|
||||
|
||||
$out .= $str[rand(0, $c)];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function limitstrlen($text, $len = 300){
|
||||
|
||||
return explode("\n", wordwrap($text, $len, "\n"))[0];
|
||||
}
|
||||
|
||||
private function hms2int($time){
|
||||
|
||||
$parts = explode(":", $time, 3);
|
||||
$time = 0;
|
||||
|
||||
if(count($parts) === 3){
|
||||
|
||||
// hours
|
||||
$time = $time + ((int)$parts[0] * 3600);
|
||||
array_shift($parts);
|
||||
}
|
||||
|
||||
if(count($parts) === 2){
|
||||
|
||||
// minutes
|
||||
$time = $time + ((int)$parts[0] * 60);
|
||||
array_shift($parts);
|
||||
}
|
||||
|
||||
// seconds
|
||||
$time = $time + (int)$parts[0];
|
||||
|
||||
return $time;
|
||||
}
|
||||
|
||||
private function truncatedcount2int($number){
|
||||
|
||||
// decimal should always be 1 number long
|
||||
$number = explode(" ", $number, 2);
|
||||
$number = $number[0];
|
||||
|
||||
$unit = strtolower($number[strlen($number) - 1]);
|
||||
|
||||
$tmp = explode(".", $number, 2);
|
||||
$number = (int)$number;
|
||||
|
||||
if(count($tmp) === 2){
|
||||
|
||||
$decimal = (int)$tmp[1];
|
||||
}else{
|
||||
|
||||
$decimal = 0;
|
||||
}
|
||||
|
||||
switch($unit){
|
||||
|
||||
case "k":
|
||||
$exponant = 1000;
|
||||
break;
|
||||
|
||||
case "m":
|
||||
$exponant = 1000000;
|
||||
break;
|
||||
|
||||
case "b";
|
||||
$exponant = 1000000000;
|
||||
break;
|
||||
|
||||
default:
|
||||
$exponant = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
return ($number * $exponant) + ($decimal * ($exponant / 10));
|
||||
}
|
||||
}
|
||||
262
scraper/fivehpx.php
Normal file
262
scraper/fivehpx.php
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
<?php
|
||||
|
||||
class fivehpx{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("fivehpx");
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
return [
|
||||
"sort" => [
|
||||
"display" => "Sort",
|
||||
"option" => [
|
||||
"relevance" => "Relevance",
|
||||
"pulse" => "Pulse",
|
||||
"newest" => "Newest"
|
||||
]
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = [], $post_data = null){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
|
||||
if($post_data === null){
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Sec-GPC: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: same-origin",
|
||||
"Sec-Fetch-User: ?1",
|
||||
"Priority: u=0, i",
|
||||
"TE: trailers"]
|
||||
);
|
||||
}else{
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: */*",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"Referer: https://500px.com/",
|
||||
"content-type: application/json",
|
||||
//"x-csrf-token: undefined",
|
||||
"x-500px-source: Search",
|
||||
"Content-Length: " . strlen($post_data),
|
||||
"Origin: https://500px.com",
|
||||
"DNT: 1",
|
||||
"Sec-GPC: 1",
|
||||
"Connection: keep-alive",
|
||||
// "Cookie: _pin_unauth, _fbp, _sharedID, _sharedID_cst",
|
||||
"Sec-Fetch-Dest: empty",
|
||||
"Sec-Fetch-Mode: cors",
|
||||
"Sec-Fetch-Site: same-site",
|
||||
"Priority: u=4",
|
||||
"TE: trailers"]
|
||||
);
|
||||
|
||||
// set post data
|
||||
curl_setopt($curlproc, CURLOPT_POST, true);
|
||||
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $post_data);
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
// http2 bypass
|
||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function image($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$pagination, $proxy] =
|
||||
$this->backend->get(
|
||||
$get["npt"], "images"
|
||||
);
|
||||
|
||||
$pagination = json_decode($pagination, true);
|
||||
$search = $pagination["search"];
|
||||
|
||||
}else{
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
$pagination = [
|
||||
"sort" => strtoupper($get["sort"]),
|
||||
"search" => $search,
|
||||
"filters" => [],
|
||||
"nlp" => false,
|
||||
];
|
||||
}
|
||||
|
||||
try{
|
||||
|
||||
$json =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://api.500px.com/graphql",
|
||||
[],
|
||||
json_encode([
|
||||
"operationName" => "PhotoSearchPaginationContainerQuery",
|
||||
"variables" => $pagination,
|
||||
"query" =>
|
||||
'query PhotoSearchPaginationContainerQuery(' .
|
||||
(isset($pagination["cursor"]) ? '$cursor: String, ' : "") .
|
||||
'$sort: PhotoSort, $search: String!, $filters: [PhotoSearchFilter!], $nlp: Boolean) { ...PhotoSearchPaginationContainer_query_1vzAZD} fragment PhotoSearchPaginationContainer_query_1vzAZD on Query { photoSearch(sort: $sort, first: 100, ' .
|
||||
(isset($pagination["cursor"]) ? 'after: $cursor, ' : "") .
|
||||
'search: $search, filters: $filters, nlp: $nlp) { edges { node { id legacyId canonicalPath name description width height images(sizes: [33, 36]) { size url id } } } totalCount pageInfo { endCursor hasNextPage } }}'
|
||||
])
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch graphQL object");
|
||||
}
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Failed to decode graphQL object");
|
||||
}
|
||||
|
||||
if(isset($json["errors"][0]["message"])){
|
||||
|
||||
throw new Exception("500px returned an API error: " . $json["errors"][0]["message"]);
|
||||
}
|
||||
|
||||
if(!isset($json["data"]["photoSearch"]["edges"])){
|
||||
|
||||
throw new Exception("No edges returned by API");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"image" => []
|
||||
];
|
||||
|
||||
foreach($json["data"]["photoSearch"]["edges"] as $image){
|
||||
|
||||
$image = $image["node"];
|
||||
$title =
|
||||
trim(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$image["name"]
|
||||
) . ": " .
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$image["description"]
|
||||
)
|
||||
, " :"
|
||||
);
|
||||
|
||||
$small = $this->image_ratio(600, $image["width"], $image["height"]);
|
||||
$large = $this->image_ratio(2048, $image["width"], $image["height"]);
|
||||
|
||||
$out["image"][] = [
|
||||
"title" => $title,
|
||||
"source" => [
|
||||
[
|
||||
"url" => $image["images"][1]["url"],
|
||||
"width" => $large[0],
|
||||
"height" => $large[1]
|
||||
],
|
||||
[
|
||||
"url" => $image["images"][0]["url"],
|
||||
"width" => $small[0],
|
||||
"height" => $small[1]
|
||||
]
|
||||
],
|
||||
"url" => "https://500px.com" . $image["canonicalPath"]
|
||||
];
|
||||
}
|
||||
|
||||
// get NPT token
|
||||
if($json["data"]["photoSearch"]["pageInfo"]["hasNextPage"] === true){
|
||||
|
||||
$out["npt"] =
|
||||
$this->backend->store(
|
||||
json_encode([
|
||||
"cursor" => $json["data"]["photoSearch"]["pageInfo"]["endCursor"],
|
||||
"search" => $search,
|
||||
"sort" => $pagination["sort"],
|
||||
"filters" => [],
|
||||
"nlp" => false
|
||||
]),
|
||||
"images",
|
||||
$proxy
|
||||
);
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function image_ratio($longest_edge, $width, $height){
|
||||
|
||||
$ratio = [
|
||||
$longest_edge / $width,
|
||||
$longest_edge / $height
|
||||
];
|
||||
|
||||
if($ratio[0] < $ratio[1]){
|
||||
|
||||
$ratio = $ratio[0];
|
||||
}else{
|
||||
|
||||
$ratio = $ratio[1];
|
||||
}
|
||||
|
||||
return [
|
||||
floor($width * $ratio),
|
||||
floor($height * $ratio)
|
||||
];
|
||||
}
|
||||
}
|
||||
161
scraper/ftm.php
Normal file
161
scraper/ftm.php
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
<?php
|
||||
|
||||
class ftm{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("ftm");
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $search, $offset){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
$payload =
|
||||
json_encode(
|
||||
[
|
||||
"search" => $search,
|
||||
"offset" => $offset
|
||||
]
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"Content-Length: " . strlen($payload),
|
||||
"Content-Type: application/json",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Origin: https://findthatmeme.com",
|
||||
"Referer: https://findthatmeme.com/?search=" . urlencode($search),
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: none",
|
||||
"Sec-Fetch-User: ?1",
|
||||
"X-Auth-Key: undefined",
|
||||
"X-CSRF-Validation-Header: true"]
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_POST, true);
|
||||
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $payload);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function image($get){
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"image" => []
|
||||
];
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$data, $proxy] = $this->backend->get($get["npt"], "images");
|
||||
$data = json_decode($data, true);
|
||||
|
||||
$count = $data["count"];
|
||||
$search = $data["search"];
|
||||
}else{
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$count = 0;
|
||||
$proxy = $this->backend->get_ip();
|
||||
}
|
||||
|
||||
try{
|
||||
$json =
|
||||
json_decode(
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://findthatmeme.com/api/v1/search",
|
||||
$search,
|
||||
$count
|
||||
),
|
||||
true
|
||||
);
|
||||
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch JSON");
|
||||
}
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
foreach($json as $item){
|
||||
|
||||
$count++;
|
||||
|
||||
if($item["type"] == "VIDEO"){
|
||||
|
||||
$thumb = "thumb/" . $item["thumbnail"];
|
||||
}else{
|
||||
|
||||
$thumb = $item["image_path"];
|
||||
}
|
||||
|
||||
$out["image"][] = [
|
||||
"title" => date("jS \of F Y @ g:ia", strtotime($item["created_at"])),
|
||||
"source" => [
|
||||
[
|
||||
"url" =>
|
||||
"https://s3.thehackerblog.com/findthatmeme/" .
|
||||
$thumb,
|
||||
"width" => null,
|
||||
"height" => null
|
||||
]
|
||||
],
|
||||
"url" => $item["source_page_url"]
|
||||
];
|
||||
}
|
||||
|
||||
$out["npt"] =
|
||||
$this->backend->store(
|
||||
json_encode([
|
||||
"count" => $count,
|
||||
"search" => $search
|
||||
]),
|
||||
"images",
|
||||
$proxy
|
||||
);
|
||||
|
||||
return $out;
|
||||
}
|
||||
}
|
||||
320
scraper/ghostery.php
Normal file
320
scraper/ghostery.php
Normal file
|
|
@ -0,0 +1,320 @@
|
|||
<?php
|
||||
|
||||
class ghostery{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("ghostery");
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
if($page != "web"){
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
return [
|
||||
"country" => [
|
||||
"display" => "Country",
|
||||
"option" => [
|
||||
"any" => "All regions",
|
||||
"AR" => "Argentina",
|
||||
"AU" => "Australia",
|
||||
"AT" => "Austria",
|
||||
"BE" => "Belgium",
|
||||
"BR" => "Brazil",
|
||||
"CA" => "Canada",
|
||||
"CL" => "Chile",
|
||||
"DK" => "Denmark",
|
||||
"FI" => "Finland",
|
||||
"FR" => "France",
|
||||
"DE" => "Germany",
|
||||
"HK" => "Hong Kong",
|
||||
"IN" => "India",
|
||||
"ID" => "Indonesia",
|
||||
"IT" => "Italy",
|
||||
"JP" => "Japan",
|
||||
"KR" => "Korea",
|
||||
"MY" => "Malaysia",
|
||||
"MX" => "Mexico",
|
||||
"NL" => "Netherlands",
|
||||
"NZ" => "New Zealand",
|
||||
"NO" => "Norway",
|
||||
"CN" => "People's Republic of China",
|
||||
"PL" => "Poland",
|
||||
"PT" => "Portugal",
|
||||
"PH" => "Republic of the Philippines",
|
||||
"RU" => "Russia",
|
||||
"SA" => "Saudi Arabia",
|
||||
"ZA" => "South Africa",
|
||||
"ES" => "Spain",
|
||||
"SE" => "Sweden",
|
||||
"CH" => "Switzerland",
|
||||
"TW" => "Taiwan",
|
||||
"TR" => "Turkey",
|
||||
"GB" => "United Kingdom",
|
||||
"US" => "United States"
|
||||
]
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = [], $country){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"Referer: https://ghosterysearch.com",
|
||||
"DNT: 1",
|
||||
"Sec-GPC: 1",
|
||||
"Connection: keep-alive",
|
||||
"Cookie: ctry=" . ($country == "any" ? "--" : $country) . "; noads=true",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: same-origin",
|
||||
"Sec-Fetch-User: ?1",
|
||||
"Priority: u=0, i"]
|
||||
);
|
||||
|
||||
// http2 bypass
|
||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function web($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$query, $proxy] = $this->backend->get($get["npt"], "web");
|
||||
|
||||
parse_str($query, $query);
|
||||
|
||||
// country
|
||||
$country = $query["c"];
|
||||
unset($query["c"]);
|
||||
|
||||
$query = http_build_query($query);
|
||||
|
||||
try{
|
||||
|
||||
$html =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://ghosterysearch.com/search?" . $query,
|
||||
[],
|
||||
$country
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch search page");
|
||||
}
|
||||
}else{
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
|
||||
try{
|
||||
|
||||
$html =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://ghosterysearch.com/search",
|
||||
[
|
||||
"q" => $get["s"]
|
||||
],
|
||||
$get["country"]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch search page");
|
||||
}
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => null,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
$this->fuckhtml->load($html);
|
||||
|
||||
$results_wrapper =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"results",
|
||||
"section"
|
||||
);
|
||||
|
||||
if(count($results_wrapper) === 0){
|
||||
|
||||
throw new Exception("Failed to grep result section");
|
||||
}
|
||||
|
||||
$this->fuckhtml->load($results_wrapper[0]);
|
||||
|
||||
// get search results
|
||||
$results =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"result",
|
||||
"li"
|
||||
);
|
||||
|
||||
if(count($results) === 0){
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
foreach($results as $result){
|
||||
|
||||
$this->fuckhtml->load($result);
|
||||
|
||||
$a =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"url",
|
||||
"a"
|
||||
);
|
||||
|
||||
if(count($a) === 0){
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
$a = $a[0];
|
||||
|
||||
$out["web"][] = [
|
||||
"title" =>
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"h2"
|
||||
)[0]
|
||||
)
|
||||
),
|
||||
"description" =>
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"p"
|
||||
)[0]
|
||||
)
|
||||
),
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$a
|
||||
["attributes"]
|
||||
["href"]
|
||||
),
|
||||
"date" => null,
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => [],
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
||||
$this->fuckhtml->load($html);
|
||||
|
||||
// get pagination token
|
||||
$pagination_wrapper =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"pagination",
|
||||
"div"
|
||||
);
|
||||
|
||||
if(count($pagination_wrapper) !== 0){
|
||||
|
||||
// found next page!
|
||||
$this->fuckhtml->load($pagination_wrapper[0]);
|
||||
|
||||
$a =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"a"
|
||||
);
|
||||
|
||||
if(count($a) !== 0){
|
||||
|
||||
$q =
|
||||
parse_url(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$a[count($a) - 1]
|
||||
["attributes"]
|
||||
["href"]
|
||||
),
|
||||
PHP_URL_QUERY
|
||||
);
|
||||
|
||||
$out["npt"] =
|
||||
$this->backend
|
||||
->store(
|
||||
$q . "&c=" . $get["country"],
|
||||
"web",
|
||||
$proxy
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function titledots($title){
|
||||
|
||||
return trim($title, " .\t\n\r\0\x0B…");
|
||||
}
|
||||
}
|
||||
3448
scraper/google.php
Normal file
3448
scraper/google.php
Normal file
File diff suppressed because it is too large
Load diff
1054
scraper/google_cse.php
Normal file
1054
scraper/google_cse.php
Normal file
File diff suppressed because it is too large
Load diff
435
scraper/greppr.php
Normal file
435
scraper/greppr.php
Normal file
|
|
@ -0,0 +1,435 @@
|
|||
<?php
|
||||
|
||||
class greppr{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("greppr");
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = [], $cookie = false){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
|
||||
if($cookie === false){
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: none",
|
||||
"Sec-Fetch-User: ?1"]
|
||||
);
|
||||
}else{
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"Cookie: PHPSESSID=" . $cookie,
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: none",
|
||||
"Sec-Fetch-User: ?1"]
|
||||
);
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$headers = [];
|
||||
|
||||
curl_setopt(
|
||||
$curlproc,
|
||||
CURLOPT_HEADERFUNCTION,
|
||||
function($curlproc, $header) use (&$headers){
|
||||
|
||||
$len = strlen($header);
|
||||
$header = explode(':', $header, 2);
|
||||
|
||||
if(count($header) < 2){
|
||||
|
||||
// ignore invalid headers
|
||||
return $len;
|
||||
}
|
||||
|
||||
$headers[strtolower(trim($header[0]))] = trim($header[1]);
|
||||
|
||||
return $len;
|
||||
}
|
||||
);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
|
||||
return [
|
||||
"headers" => $headers,
|
||||
"data" => $data
|
||||
];
|
||||
}
|
||||
|
||||
public function web($get, $first_attempt = true){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$q, $proxy] = $this->backend->get($get["npt"], "web");
|
||||
|
||||
$q = json_decode($q, true);
|
||||
|
||||
}else{
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
}
|
||||
|
||||
// get token
|
||||
// token[0] = static token that changes once a day
|
||||
// token[1] = dynamic token that changes on every request
|
||||
// token[1] = PHPSESSID cookie
|
||||
$tokens = apcu_fetch("greppr_token");
|
||||
|
||||
if(
|
||||
$tokens === false ||
|
||||
$first_attempt === false // force token fetch
|
||||
){
|
||||
|
||||
// we haven't gotten the token yet, get it
|
||||
try{
|
||||
|
||||
$response =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://greppr.org",
|
||||
[]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch search tokens");
|
||||
}
|
||||
|
||||
$tokens = $this->parse_token($response);
|
||||
|
||||
if($tokens === false){
|
||||
|
||||
throw new Exception("Failed to grep search tokens");
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
$params = [
|
||||
$tokens[0] => $q["q"],
|
||||
"s" => $q["s"],
|
||||
"l" => 30,
|
||||
"n" => $tokens[1]
|
||||
];
|
||||
}else{
|
||||
|
||||
$params = [
|
||||
$tokens[0] => $search,
|
||||
"n" => $tokens[1]
|
||||
];
|
||||
}
|
||||
|
||||
$searchresults = $this->get(
|
||||
$proxy,
|
||||
"https://greppr.org/search",
|
||||
$params,
|
||||
$tokens[2]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch search page");
|
||||
}
|
||||
|
||||
if(strlen($searchresults["data"]) === 0){
|
||||
|
||||
// redirected to main page, which means we got old token
|
||||
// generate a new one
|
||||
|
||||
// ... unless we just tried to do that
|
||||
if($first_attempt === false){
|
||||
|
||||
throw new Exception("Failed to get a new search token");
|
||||
}
|
||||
|
||||
return $this->web($get, false);
|
||||
}
|
||||
|
||||
// refresh the token with new data (this also triggers fuckhtml load)
|
||||
$this->parse_token($searchresults, $tokens[2]);
|
||||
|
||||
// response object
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => null,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
// get results for later
|
||||
$results =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"result",
|
||||
"div"
|
||||
);
|
||||
|
||||
// check for next page
|
||||
$next_elem =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"pagination",
|
||||
"ul"
|
||||
);
|
||||
|
||||
if(count($next_elem) !== 0){
|
||||
|
||||
$this->fuckhtml->load($next_elem[0]);
|
||||
|
||||
$as =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"page-link",
|
||||
"a"
|
||||
);
|
||||
|
||||
$break = false;
|
||||
foreach($as as $a){
|
||||
|
||||
if($break === true){
|
||||
|
||||
parse_str(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$a["attributes"]["href"]
|
||||
),
|
||||
$values
|
||||
);
|
||||
|
||||
$values = array_values($values);
|
||||
|
||||
$out["npt"] =
|
||||
$this->backend->store(
|
||||
json_encode(
|
||||
[
|
||||
"q" => $values[0],
|
||||
"s" => $values[1]
|
||||
]
|
||||
),
|
||||
"web",
|
||||
$proxy
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
if($a["attributes"]["href"] == "#"){
|
||||
|
||||
$break = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// scrape results
|
||||
foreach($results as $result){
|
||||
|
||||
$this->fuckhtml->load($result);
|
||||
|
||||
$a =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"a"
|
||||
)[0];
|
||||
|
||||
$description =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"highlightedDesc",
|
||||
"p"
|
||||
);
|
||||
|
||||
if(count($description) === 0){
|
||||
|
||||
$description = null;
|
||||
}else{
|
||||
|
||||
$description =
|
||||
$this->limitstrlen(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$description[0]
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
$date =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"p"
|
||||
);
|
||||
|
||||
$date =
|
||||
strtotime(
|
||||
explode(
|
||||
":",
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$date[count($date) - 1]["innerHTML"]
|
||||
)
|
||||
)[1]
|
||||
);
|
||||
|
||||
$out["web"][] = [
|
||||
"title" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$a["innerHTML"]
|
||||
),
|
||||
"description" => $description,
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$a["attributes"]["href"]
|
||||
),
|
||||
"date" => $date,
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => [],
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function parse_token($response, $cookie = false){
|
||||
|
||||
$this->fuckhtml->load($response["data"]);
|
||||
|
||||
$scripts =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName("script");
|
||||
|
||||
$found = false;
|
||||
foreach($scripts as $script){
|
||||
|
||||
preg_match(
|
||||
'/window\.location ?= ?\'\/search\?([^=]+).*&n=([0-9]+)/',
|
||||
$script["innerHTML"],
|
||||
$tokens
|
||||
);
|
||||
|
||||
if(isset($tokens[1])){
|
||||
|
||||
$found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if($found === false){
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
$tokens = [
|
||||
$tokens[1],
|
||||
$tokens[2]
|
||||
];
|
||||
|
||||
if($cookie !== false){
|
||||
|
||||
// we already specified a cookie, so use the one we have already
|
||||
$tokens[] = $cookie;
|
||||
apcu_store("greppr_token", $tokens);
|
||||
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
if(!isset($response["headers"]["set-cookie"])){
|
||||
|
||||
// server didn't send a cookie
|
||||
return false;
|
||||
}
|
||||
|
||||
// get cookie
|
||||
preg_match(
|
||||
'/PHPSESSID=([^;]+)/',
|
||||
$response["headers"]["set-cookie"],
|
||||
$cookie
|
||||
);
|
||||
|
||||
if(!isset($cookie[1])){
|
||||
|
||||
// server sent an unexpected cookie
|
||||
return false;
|
||||
}
|
||||
|
||||
$tokens[] = $cookie[1];
|
||||
apcu_store("greppr_token", $tokens);
|
||||
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
private function limitstrlen($text){
|
||||
|
||||
return explode("\n", wordwrap($text, 300, "\n"))[0];
|
||||
}
|
||||
}
|
||||
258
scraper/imgur.php
Normal file
258
scraper/imgur.php
Normal file
|
|
@ -0,0 +1,258 @@
|
|||
<?php
|
||||
|
||||
class imgur{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("imgur");
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
return [
|
||||
"sort" => [ // /score/
|
||||
"display" => "Sort by",
|
||||
"option" => [
|
||||
"score" => "Highest scoring",
|
||||
"relevance" => "Most relevant",
|
||||
"time" => "Newest first"
|
||||
]
|
||||
],
|
||||
"time" => [ // /score/day/
|
||||
"display" => "Time posted",
|
||||
"option" => [
|
||||
"all" => "All time",
|
||||
"day" => "Today",
|
||||
"week" => "This week",
|
||||
"month" => "This month",
|
||||
"year" => "This year"
|
||||
]
|
||||
],
|
||||
"format" => [ // q_type
|
||||
"display" => "Format",
|
||||
"option" => [
|
||||
"any" => "Any format",
|
||||
"jpg" => "JPG",
|
||||
"png" => "PNG",
|
||||
"gif" => "GIF",
|
||||
"anigif" => "Animated GIF",
|
||||
"album" => "Albums"
|
||||
]
|
||||
],
|
||||
"size" => [ // q_size_px
|
||||
"display" => "Size",
|
||||
"option" => [
|
||||
"any" => "Any size",
|
||||
"small" => "Small (500px or less)",
|
||||
"med" => "Medium (500px to 2000px)",
|
||||
"big" => "Big (2000px to 5000px)",
|
||||
"lrg" => "Large (5000px to 10000px)",
|
||||
"huge" => "Huge (10000px and above)"
|
||||
]
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = []){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?scrolled&" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Referer: https://imgur.com/search/",
|
||||
"Connection: keep-alive",
|
||||
"Sec-Fetch-Dest: empty",
|
||||
"Sec-Fetch-Mode: cors",
|
||||
"Sec-Fetch-Site: same-origin",
|
||||
"TE: trailers",
|
||||
"X-Requested-With: XMLHttpRequest"]
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function image($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$filter, $proxy] =
|
||||
$this->backend->get(
|
||||
$get["npt"],
|
||||
"images"
|
||||
);
|
||||
|
||||
$filter = json_decode($filter, true);
|
||||
|
||||
$search = $filter["s"];
|
||||
unset($filter["s"]);
|
||||
|
||||
$sort = $filter["sort"];
|
||||
unset($filter["sort"]);
|
||||
|
||||
$time = $filter["time"];
|
||||
unset($filter["time"]);
|
||||
|
||||
$format = $filter["format"];
|
||||
unset($filter["format"]);
|
||||
|
||||
$size = $filter["size"];
|
||||
unset($filter["size"]);
|
||||
|
||||
$page = $filter["page"];
|
||||
unset($filter["page"]);
|
||||
}else{
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
$sort = $get["sort"];
|
||||
$time = $get["time"];
|
||||
$format = $get["format"];
|
||||
$size = $get["size"];
|
||||
$page = 0;
|
||||
|
||||
$filter = [
|
||||
"q" => $search
|
||||
];
|
||||
|
||||
if($format != "any"){
|
||||
|
||||
$filter["q_type"] = $format;
|
||||
}
|
||||
|
||||
if($size != "any"){
|
||||
|
||||
$filter["q_size_px"] = $size;
|
||||
$filter["q_size_is_mpx"] = "off";
|
||||
}
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"image" => []
|
||||
];
|
||||
|
||||
try{
|
||||
$html =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://imgur.com/search/$sort/$time/page/$page",
|
||||
$filter
|
||||
);
|
||||
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch HTML");
|
||||
}
|
||||
|
||||
$this->fuckhtml->load($html);
|
||||
|
||||
$posts =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"post",
|
||||
"div"
|
||||
);
|
||||
|
||||
foreach($posts as $post){
|
||||
|
||||
$this->fuckhtml->load($post);
|
||||
|
||||
$image =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName("img")[0];
|
||||
|
||||
$image_url = "https:" . substr($this->fuckhtml->getTextContent($image["attributes"]["src"]), 0, -5);
|
||||
|
||||
$out["image"][] = [
|
||||
"title" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$image["attributes"]["alt"]
|
||||
),
|
||||
"source" => [
|
||||
[
|
||||
"url" => $image_url . ".jpg",
|
||||
"width" => null,
|
||||
"height" => null
|
||||
],
|
||||
[
|
||||
"url" => $image_url . "m.jpg",
|
||||
"width" => null,
|
||||
"height" => null
|
||||
]
|
||||
],
|
||||
"url" =>
|
||||
"https://imgur.com" .
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"image-list-link",
|
||||
"a"
|
||||
)
|
||||
[0]
|
||||
["attributes"]
|
||||
["href"]
|
||||
)
|
||||
];
|
||||
}
|
||||
|
||||
if(isset($out["image"][0])){
|
||||
|
||||
// store nextpage
|
||||
$filter["s"] = $search;
|
||||
$filter["sort"] = $sort;
|
||||
$filter["time"] = $time;
|
||||
$filter["format"] = $format;
|
||||
$filter["size"] = $size;
|
||||
$filter["page"] = $page + 1;
|
||||
|
||||
$out["npt"] =
|
||||
$this->backend->store(
|
||||
json_encode($filter),
|
||||
"images",
|
||||
$proxy
|
||||
);
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
}
|
||||
476
scraper/marginalia.php
Normal file
476
scraper/marginalia.php
Normal file
|
|
@ -0,0 +1,476 @@
|
|||
<?php
|
||||
|
||||
class marginalia{
|
||||
public function __construct(){
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("marginalia");
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
if(config::MARGINALIA_API_KEY === null){
|
||||
|
||||
$base = [
|
||||
"adtech" => [
|
||||
"display" => "Reduce adtech",
|
||||
"option" => [
|
||||
"no" => "No",
|
||||
"yes" => "Yes"
|
||||
]
|
||||
],
|
||||
"recent" => [
|
||||
"display" => "Recent results",
|
||||
"option" => [
|
||||
"no" => "No",
|
||||
"yes" => "Yes"
|
||||
]
|
||||
],
|
||||
"intitle" => [
|
||||
"display" => "Search in title",
|
||||
"option" => [
|
||||
"no" => "No",
|
||||
"yes" => "Yes"
|
||||
]
|
||||
]
|
||||
];
|
||||
}else{
|
||||
|
||||
$base = [];
|
||||
}
|
||||
|
||||
return array_merge(
|
||||
$base,
|
||||
[
|
||||
"format" => [
|
||||
"display" => "Format",
|
||||
"option" => [
|
||||
"any" => "Any format",
|
||||
"html5" => "html5",
|
||||
"xhtml" => "xhtml",
|
||||
"html123" => "html123"
|
||||
]
|
||||
],
|
||||
"file" => [
|
||||
"display" => "Filetype",
|
||||
"option" => [
|
||||
"any" => "Any filetype",
|
||||
"nomedia" => "Deny media",
|
||||
"media" => "Contains media",
|
||||
"audio" => "Contains audio",
|
||||
"video" => "Contains video",
|
||||
"archive" => "Contains archive",
|
||||
"document" => "Contains document"
|
||||
]
|
||||
],
|
||||
"javascript" => [
|
||||
"display" => "Javascript",
|
||||
"option" => [
|
||||
"any" => "Allow JS",
|
||||
"deny" => "Deny JS",
|
||||
"require" => "Require JS"
|
||||
]
|
||||
],
|
||||
"trackers" => [
|
||||
"display" => "Trackers",
|
||||
"option" => [
|
||||
"any" => "Allow trackers",
|
||||
"deny" => "Deny trackers",
|
||||
"require" => "Require trackers"
|
||||
]
|
||||
],
|
||||
"cookies" => [
|
||||
"display" => "Cookies",
|
||||
"option" => [
|
||||
"any" => "Allow cookies",
|
||||
"deny" => "Deny cookies",
|
||||
"require" => "Require cookies"
|
||||
]
|
||||
],
|
||||
"affiliate" => [
|
||||
"display" => "Affiliate links in body",
|
||||
"option" => [
|
||||
"any" => "Allow affiliate links",
|
||||
"deny" => "Deny affiliate links",
|
||||
"require" => "Require affiliate links"
|
||||
]
|
||||
]
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = []){
|
||||
|
||||
$headers = [
|
||||
"User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: none",
|
||||
"Sec-Fetch-User: ?1"
|
||||
];
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function web($get){
|
||||
|
||||
$search = [$get["s"]];
|
||||
if(strlen($get["s"]) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$format = $get["format"];
|
||||
$file = $get["file"];
|
||||
|
||||
foreach(
|
||||
[
|
||||
"javascript" => $get["javascript"],
|
||||
"trackers" => $get["trackers"],
|
||||
"cookies" => $get["cookies"],
|
||||
"affiliate" => $get["affiliate"]
|
||||
]
|
||||
as $key => $value
|
||||
){
|
||||
|
||||
if($value == "any"){ continue; }
|
||||
|
||||
switch($key){
|
||||
|
||||
case "javascript": $str = "js:true"; break;
|
||||
case "trackers": $str = "special:tracking"; break;
|
||||
case "cookies": $str = "special:cookies"; break;
|
||||
case "affiliate": $str = "special:affiliate"; break;
|
||||
}
|
||||
|
||||
if($value == "deny"){
|
||||
$str = "-" . $str;
|
||||
}
|
||||
|
||||
$search[] = $str;
|
||||
}
|
||||
|
||||
if($format != "any"){
|
||||
|
||||
$search[] = "format:$format";
|
||||
}
|
||||
|
||||
switch($file){
|
||||
|
||||
case "any": break;
|
||||
case "nomedia": $search[] = "-special:media"; break;
|
||||
case "media": $search[] = "special:media"; break;
|
||||
|
||||
default:
|
||||
$search[] = "file:$file";
|
||||
}
|
||||
|
||||
$search = implode(" ", $search);
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => null,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
// API scraper
|
||||
if(config::MARGINALIA_API_KEY !== null){
|
||||
|
||||
try{
|
||||
$json =
|
||||
$this->get(
|
||||
$this->backend->get_ip(), // no nextpage
|
||||
"https://api.marginalia-search.com/" . config::MARGINALIA_API_KEY . "/search/" . urlencode($search),
|
||||
[
|
||||
"count" => 20
|
||||
]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to get JSON");
|
||||
}
|
||||
|
||||
if($json == "Slow down"){
|
||||
|
||||
throw new Exception("The API key used is rate limited. Please try again in a few minutes.");
|
||||
}
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
foreach($json["results"] as $result){
|
||||
|
||||
$out["web"][] = [
|
||||
"title" => $result["title"],
|
||||
"description" => str_replace("\n", " ", $result["description"]),
|
||||
"url" => $result["url"],
|
||||
"date" => null,
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => [],
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
// HTML parser
|
||||
$proxy = $this->backend->get_ip();
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$params, $proxy] =
|
||||
$this->backend->get(
|
||||
$get["npt"],
|
||||
"web"
|
||||
);
|
||||
|
||||
try{
|
||||
$html =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://old-search.marginalia.nu/search?" . $params
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to get HTML");
|
||||
}
|
||||
|
||||
}else{
|
||||
$params = [
|
||||
"query" => $search
|
||||
];
|
||||
|
||||
foreach(["adtech", "recent", "intitle"] as $v){
|
||||
|
||||
if($get[$v] == "yes"){
|
||||
|
||||
switch($v){
|
||||
|
||||
case "adtech": $params["adtech"] = "reduce"; break;
|
||||
case "recent": $params["recent"] = "recent"; break;
|
||||
case "adtech": $params["searchTitle"] = "title"; break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
$html =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://old-search.marginalia.nu/search",
|
||||
$params
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to get HTML");
|
||||
}
|
||||
}
|
||||
|
||||
$this->fuckhtml->load($html);
|
||||
|
||||
$sections =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"card search-result",
|
||||
"section"
|
||||
);
|
||||
|
||||
foreach($sections as $section){
|
||||
|
||||
$this->fuckhtml->load($section);
|
||||
|
||||
$title =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"title",
|
||||
"a"
|
||||
)[0];
|
||||
|
||||
$description =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"description",
|
||||
"p"
|
||||
);
|
||||
|
||||
if(count($description) !== 0){
|
||||
|
||||
$description =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$description[0]
|
||||
);
|
||||
}else{
|
||||
|
||||
$description = null;
|
||||
}
|
||||
|
||||
$sublinks = [];
|
||||
$sublink_html =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName("additional-results");
|
||||
|
||||
if(count($sublink_html) !== 0){
|
||||
|
||||
$this->fuckhtml->load($sublink_html[0]);
|
||||
|
||||
$links =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName("a");
|
||||
|
||||
foreach($links as $link){
|
||||
|
||||
$sublinks[] = [
|
||||
"title" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$link
|
||||
),
|
||||
"date" => null,
|
||||
"description" => null,
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$link["attributes"]["href"]
|
||||
)
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
$out["web"][] = [
|
||||
"title" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$title
|
||||
),
|
||||
"description" => $description,
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$title["attributes"]["href"]
|
||||
),
|
||||
"date" => null,
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => $sublinks,
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
||||
// get next page
|
||||
$this->fuckhtml->load($html);
|
||||
|
||||
$pagination =
|
||||
$this->fuckhtml
|
||||
->getElementsByAttributeValue(
|
||||
"aria-label",
|
||||
"pagination",
|
||||
"nav"
|
||||
);
|
||||
|
||||
if(count($pagination) === 0){
|
||||
|
||||
// no pagination
|
||||
return $out;
|
||||
}
|
||||
|
||||
$this->fuckhtml->load($pagination[0]);
|
||||
|
||||
$pages =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"page-link",
|
||||
"a"
|
||||
);
|
||||
|
||||
$found_current_page = false;
|
||||
|
||||
foreach($pages as $page){
|
||||
|
||||
if(
|
||||
stripos(
|
||||
$page["attributes"]["class"],
|
||||
"active"
|
||||
) !== false
|
||||
){
|
||||
|
||||
$found_current_page = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if($found_current_page){
|
||||
|
||||
// we found current page index, and we iterated over
|
||||
// the next page <a>
|
||||
|
||||
$out["npt"] =
|
||||
$this->backend->store(
|
||||
parse_url(
|
||||
$page["attributes"]["href"],
|
||||
PHP_URL_QUERY
|
||||
),
|
||||
"web",
|
||||
$proxy
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
}
|
||||
|
||||
1174
scraper/mojeek.php
Normal file
1174
scraper/mojeek.php
Normal file
File diff suppressed because it is too large
Load diff
236
scraper/mwmbl.php
Normal file
236
scraper/mwmbl.php
Normal file
|
|
@ -0,0 +1,236 @@
|
|||
<?php
|
||||
|
||||
class mwmbl{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("mwmbl");
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = []){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
// use http2
|
||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"Referer: https://beta.mwmbl.org/",
|
||||
"DNT: 1",
|
||||
"Sec-GPC: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: same-origin",
|
||||
"Priority: u=0, i",
|
||||
"Sec-Fetch-User: ?1"]
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function web($get){
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
try{
|
||||
$html = $this->get(
|
||||
$this->backend->get_ip(), // no next page!
|
||||
"https://beta.mwmbl.org/",
|
||||
[
|
||||
"q" => $search
|
||||
]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch HTML. If you're getting a timeout, make sure you have curl-impersonate setup.");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => null,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
$this->fuckhtml->load($html);
|
||||
|
||||
$results =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"result",
|
||||
"li"
|
||||
);
|
||||
|
||||
foreach($results as $result){
|
||||
|
||||
$this->fuckhtml->load($result);
|
||||
|
||||
$p =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName("p");
|
||||
|
||||
$sublinks = [];
|
||||
|
||||
$mores =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"result-link-more",
|
||||
"div"
|
||||
);
|
||||
|
||||
foreach($mores as $more){
|
||||
|
||||
$this->fuckhtml->load($more);
|
||||
|
||||
$as =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"more",
|
||||
"a"
|
||||
);
|
||||
|
||||
if(count($as) === 0){
|
||||
|
||||
// ?? invalid
|
||||
continue;
|
||||
}
|
||||
|
||||
$sublinks[] = [
|
||||
"title" =>
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"more-title",
|
||||
"span"
|
||||
)[0]
|
||||
)
|
||||
),
|
||||
"description" =>
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"more-extract",
|
||||
"span"
|
||||
)[0]
|
||||
)
|
||||
),
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$as[0]
|
||||
["attributes"]
|
||||
["href"]
|
||||
)
|
||||
];
|
||||
}
|
||||
|
||||
// reset
|
||||
$this->fuckhtml->load($result);
|
||||
|
||||
$out["web"][] = [
|
||||
"title" =>
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"title",
|
||||
$p
|
||||
)[0]
|
||||
)
|
||||
),
|
||||
"description" =>
|
||||
$this->titledots(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"extract",
|
||||
$p
|
||||
)[0]
|
||||
)
|
||||
),
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName("a")
|
||||
[0]
|
||||
["attributes"]
|
||||
["href"]
|
||||
),
|
||||
"date" => null,
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => $sublinks,
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function titledots($title){
|
||||
|
||||
return rtrim($title, "…");
|
||||
}
|
||||
}
|
||||
439
scraper/pinterest.php
Normal file
439
scraper/pinterest.php
Normal file
|
|
@ -0,0 +1,439 @@
|
|||
<?php
|
||||
|
||||
class pinterest{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("pinterest");
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = [], &$cookies, $header_data_post = null){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($header_data_post === null){
|
||||
|
||||
// handling GET
|
||||
|
||||
// extract cookies
|
||||
$cookies_tmp = [];
|
||||
curl_setopt($curlproc, CURLOPT_HEADERFUNCTION, function($curlproc, $header) use (&$cookies_tmp){
|
||||
|
||||
$length = strlen($header);
|
||||
|
||||
$header = explode(":", $header, 2);
|
||||
|
||||
if(trim(strtolower($header[0])) == "set-cookie"){
|
||||
|
||||
$cookie_tmp = explode("=", trim($header[1]), 2);
|
||||
|
||||
$cookies_tmp[trim($cookie_tmp[0])] =
|
||||
explode(";", $cookie_tmp[1], 2)[0];
|
||||
}
|
||||
|
||||
return $length;
|
||||
});
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: application/json, text/javascript, */*, q=0.01",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"Referer: https://ca.pinterest.com/",
|
||||
"X-Requested-With: XMLHttpRequest",
|
||||
"X-APP-VERSION: 78f8764",
|
||||
"X-Pinterest-AppState: active",
|
||||
"X-Pinterest-Source-Url: /",
|
||||
"X-Pinterest-PWS-Handler: www/index.js",
|
||||
"screen-dpr: 1",
|
||||
"is-preload-enabled: 1",
|
||||
"DNT: 1",
|
||||
"Sec-GPC: 1",
|
||||
"Sec-Fetch-Dest: empty",
|
||||
"Sec-Fetch-Mode: cors",
|
||||
"Sec-Fetch-Site: same-origin",
|
||||
"Connection: keep-alive",
|
||||
"Alt-Used: ca.pinterest.com",
|
||||
"Priority: u=0",
|
||||
"TE: trailers"]
|
||||
);
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
}else{
|
||||
|
||||
// handling POST (pagination)
|
||||
$get = http_build_query($get);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: application/json, text/javascript, */*, q=0.01",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"Content-Type: application/x-www-form-urlencoded",
|
||||
"Content-Length: " . strlen($get),
|
||||
"Referer: https://ca.pinterest.com/",
|
||||
"X-Requested-With: XMLHttpRequest",
|
||||
"X-APP-VERSION: 78f8764",
|
||||
"X-CSRFToken: " . $cookies["csrf"],
|
||||
"X-Pinterest-AppState: active",
|
||||
"X-Pinterest-Source-Url: /search/pins/?rs=ac&len=2&q=" . urlencode($header_data_post) . "&eq=" . urlencode($header_data_post),
|
||||
"X-Pinterest-PWS-Handler: www/search/[scope].js",
|
||||
"screen-dpr: 1",
|
||||
"is-preload-enabled: 1",
|
||||
"Origin: https://ca.pinterest.com",
|
||||
"DNT: 1",
|
||||
"Sec-GPC: 1",
|
||||
"Sec-Fetch-Dest: empty",
|
||||
"Sec-Fetch-Mode: cors",
|
||||
"Sec-Fetch-Site: same-origin",
|
||||
"Connection: keep-alive",
|
||||
"Alt-Used: ca.pinterest.com",
|
||||
"Cookie: " . $cookies["cookie"],
|
||||
"TE: trailers"]
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_POST, true);
|
||||
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
|
||||
// http2 bypass
|
||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
if($header_data_post === null){
|
||||
|
||||
if(!isset($cookies_tmp["csrftoken"])){
|
||||
|
||||
throw new Exception("Failed to grep CSRF token");
|
||||
}
|
||||
|
||||
$cookies = "";
|
||||
|
||||
foreach($cookies_tmp as $cookie_name => $cookie_value){
|
||||
|
||||
$cookies .= $cookie_name . "=" . $cookie_value . "; ";
|
||||
}
|
||||
|
||||
$cookies = [
|
||||
"csrf" => $cookies_tmp["csrftoken"],
|
||||
"cookie" => rtrim($cookies, " ;")
|
||||
];
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function image($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$data, $proxy] =
|
||||
$this->backend->get(
|
||||
$get["npt"], "images"
|
||||
);
|
||||
|
||||
$data = json_decode($data, true);
|
||||
|
||||
$search = $data["q"];
|
||||
$cookies = $data["cookies"];
|
||||
|
||||
try{
|
||||
$json =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://ca.pinterest.com/resource/BaseSearchResource/get/",
|
||||
[
|
||||
"source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed",
|
||||
"data" => json_encode(
|
||||
[
|
||||
"options" => [
|
||||
"applied_unified_filters" => null,
|
||||
"appliedProductFilters" => "---",
|
||||
"article" => null,
|
||||
"auto_correction_disabled" => false,
|
||||
"corpus" => null,
|
||||
"customized_rerank_type" => null,
|
||||
"domains" => null,
|
||||
"dynamicPageSizeExpGroup" => null,
|
||||
"filters" => null,
|
||||
"journey_depth" => null,
|
||||
"page_size" => null,
|
||||
"price_max" => null,
|
||||
"price_min" => null,
|
||||
"query_pin_sigs" => null,
|
||||
"query" => $data["q"],
|
||||
"redux_normalize_feed" => true,
|
||||
"request_params" => null,
|
||||
"rs" => "typed",
|
||||
"scope" => "pins",
|
||||
"selected_one_bar_modules" => null,
|
||||
"source_id" => null,
|
||||
"source_module_id" => null,
|
||||
"source_url" => "/search/pins/?q=" . urlencode($search) . "&rs=typed",
|
||||
"top_pin_id" => null,
|
||||
"top_pin_ids" => null,
|
||||
"bookmarks" => [
|
||||
$data["bookmark"]
|
||||
]
|
||||
],
|
||||
"context" => []
|
||||
],
|
||||
JSON_UNESCAPED_SLASHES
|
||||
)
|
||||
],
|
||||
$cookies,
|
||||
$search
|
||||
);
|
||||
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch JSON");
|
||||
}
|
||||
|
||||
}else{
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
// https://ca.pinterest.com/resource/BaseSearchResource/get/?source_url=%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac&data=%7B%22options%22%3A%7B%22applied_unified_filters%22%3Anull%2C%22appliedProductFilters%22%3A%22---%22%2C%22article%22%3Anull%2C%22auto_correction_disabled%22%3Afalse%2C%22corpus%22%3Anull%2C%22customized_rerank_type%22%3Anull%2C%22domains%22%3Anull%2C%22dynamicPageSizeExpGroup%22%3Anull%2C%22filters%22%3Anull%2C%22journey_depth%22%3Anull%2C%22page_size%22%3Anull%2C%22price_max%22%3Anull%2C%22price_min%22%3Anull%2C%22query_pin_sigs%22%3Anull%2C%22query%22%3A%22higurashi%20when%20they%20cry%22%2C%22redux_normalize_feed%22%3Atrue%2C%22request_params%22%3Anull%2C%22rs%22%3A%22ac%22%2C%22scope%22%3A%22pins%22%2C%22selected_one_bar_modules%22%3Anull%2C%22source_id%22%3Anull%2C%22source_module_id%22%3Anull%2C%22source_url%22%3A%22%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac%22%2C%22top_pin_id%22%3Anull%2C%22top_pin_ids%22%3Anull%7D%2C%22context%22%3A%7B%7D%7D&_=1736116313987
|
||||
// source_url=%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac
|
||||
// &data=%7B%22options%22%3A%7B%22applied_unified_filters%22%3Anull%2C%22appliedProductFilters%22%3A%22---%22%2C%22article%22%3Anull%2C%22auto_correction_disabled%22%3Afalse%2C%22corpus%22%3Anull%2C%22customized_rerank_type%22%3Anull%2C%22domains%22%3Anull%2C%22dynamicPageSizeExpGroup%22%3Anull%2C%22filters%22%3Anull%2C%22journey_depth%22%3Anull%2C%22page_size%22%3Anull%2C%22price_max%22%3Anull%2C%22price_min%22%3Anull%2C%22query_pin_sigs%22%3Anull%2C%22query%22%3A%22higurashi%20when%20they%20cry%22%2C%22redux_normalize_feed%22%3Atrue%2C%22request_params%22%3Anull%2C%22rs%22%3A%22ac%22%2C%22scope%22%3A%22pins%22%2C%22selected_one_bar_modules%22%3Anull%2C%22source_id%22%3Anull%2C%22source_module_id%22%3Anull%2C%22source_url%22%3A%22%2Fsearch%2Fpins%2F%3Feq%3Dhigurashi%26etslf%3D5966%26len%3D2%26q%3Dhigurashi%2520when%2520they%2520cry%26rs%3Dac%22%2C%22top_pin_id%22%3Anull%2C%22top_pin_ids%22%3Anull%7D%2C%22context%22%3A%7B%7D%7D
|
||||
// &_=1736116313987
|
||||
|
||||
$source_url = "/search/pins/?q=" . urlencode($search) . "&rs=" . urlencode($search);
|
||||
|
||||
$filter = [
|
||||
"source_url" => $source_url,
|
||||
"rs" => "typed",
|
||||
"data" =>
|
||||
json_encode(
|
||||
[
|
||||
"options" => [
|
||||
"applied_unified_filters" => null,
|
||||
"appliedProductFilters" => "---",
|
||||
"article" => null,
|
||||
"corpus" => null,
|
||||
"customized_rerank_type" => null,
|
||||
"domains" => null,
|
||||
"dynamicPageSizeExpGroup" => null,
|
||||
"filters" => null,
|
||||
"journey_depth" => null,
|
||||
"page_size" => null,
|
||||
"price_max" => null,
|
||||
"price_min" => null,
|
||||
"query_pin_sigs" => null,
|
||||
"query" => $search,
|
||||
"redux_normalize_feed" => true,
|
||||
"request_params" => null,
|
||||
"rs" => "ac",
|
||||
"scope" => "pins", // pins, boards, videos,
|
||||
"selected_one_bar_modules" => null,
|
||||
"source_id" => null,
|
||||
"source_module_id" => null,
|
||||
"source_url" => $source_url,
|
||||
"top_pin_id" => null,
|
||||
"top_pin_ids" => null
|
||||
],
|
||||
"context" => []
|
||||
]
|
||||
),
|
||||
"_" => substr(str_replace(".", "", (string)microtime(true)), 0, -1)
|
||||
];
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
$cookies = [];
|
||||
|
||||
try{
|
||||
$json =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://ca.pinterest.com/resource/BaseSearchResource/get/",
|
||||
$filter,
|
||||
$cookies,
|
||||
null
|
||||
);
|
||||
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch JSON");
|
||||
}
|
||||
}
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"image" => []
|
||||
];
|
||||
|
||||
if(
|
||||
!isset(
|
||||
$json["resource_response"]
|
||||
["status"]
|
||||
)
|
||||
){
|
||||
|
||||
throw new Exception("Unknown API failure");
|
||||
}
|
||||
|
||||
if($json["resource_response"]["status"] != "success"){
|
||||
|
||||
$status = "Got non-OK response: " . $json["resource_response"]["status"];
|
||||
|
||||
if(
|
||||
isset(
|
||||
$json["resource_response"]["message"]
|
||||
)
|
||||
){
|
||||
|
||||
$status .= " - " . $json["resource_response"]["message"];
|
||||
}
|
||||
|
||||
throw new Exception($status);
|
||||
}
|
||||
|
||||
if(
|
||||
isset(
|
||||
$json["resource_response"]["sensitivity"]
|
||||
["notices"][0]["description"]["text"]
|
||||
)
|
||||
){
|
||||
|
||||
throw new Exception(
|
||||
"Pinterest returned a notice: " .
|
||||
$json["resource_response"]["sensitivity"]["notices"][0]["description"]["text"]
|
||||
);
|
||||
}
|
||||
|
||||
// get NPT
|
||||
if(isset($json["resource_response"]["bookmark"])){
|
||||
|
||||
$out["npt"] =
|
||||
$this->backend->store(
|
||||
json_encode([
|
||||
"q" => $search,
|
||||
"bookmark" => $json["resource_response"]["bookmark"],
|
||||
"cookies" => $cookies
|
||||
]),
|
||||
"images",
|
||||
$proxy
|
||||
);
|
||||
}
|
||||
|
||||
foreach(
|
||||
$json
|
||||
["resource_response"]
|
||||
["data"]
|
||||
["results"]
|
||||
as $item
|
||||
){
|
||||
|
||||
switch($item["type"]){
|
||||
|
||||
case "pin":
|
||||
case "board":
|
||||
|
||||
/*
|
||||
Handle image object
|
||||
*/
|
||||
$images = array_values($item["images"]);
|
||||
$image = &$images[count($images) - 1]; // original
|
||||
$thumb = &$images[1]; // 236x
|
||||
|
||||
$title = [];
|
||||
|
||||
if(
|
||||
isset($item["grid_title"]) &&
|
||||
trim($item["grid_title"]) != ""
|
||||
){
|
||||
|
||||
$title[] = $item["grid_title"];
|
||||
}
|
||||
|
||||
if(
|
||||
isset($item["description"]) &&
|
||||
trim($item["description"]) != ""
|
||||
){
|
||||
|
||||
$title[] = $item["description"];
|
||||
}
|
||||
|
||||
$title = implode(": ", $title);
|
||||
|
||||
if(
|
||||
$title == "" &&
|
||||
isset($item["board"]["name"]) &&
|
||||
trim($item["board"]["name"]) != ""
|
||||
){
|
||||
|
||||
$title = $item["board"]["name"];
|
||||
}
|
||||
|
||||
if($title == ""){
|
||||
|
||||
$title = null;
|
||||
}
|
||||
|
||||
$out["image"][] = [
|
||||
"title" => $title,
|
||||
"source" => [
|
||||
[
|
||||
"url" => $image["url"],
|
||||
"width" => (int)$image["width"],
|
||||
"height" => (int)$image["height"]
|
||||
],
|
||||
[
|
||||
"url" => $thumb["url"],
|
||||
"width" => (int)$thumb["width"],
|
||||
"height" => (int)$thumb["height"]
|
||||
]
|
||||
],
|
||||
"url" =>
|
||||
$item["link"] === null ?
|
||||
"https://ca.pinterest.com/pin/" . $item["id"] :
|
||||
$item["link"]
|
||||
];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
}
|
||||
937
scraper/qwant.php
Normal file
937
scraper/qwant.php
Normal file
|
|
@ -0,0 +1,937 @@
|
|||
<?php
|
||||
|
||||
class qwant{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("qwant");
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
$base = [
|
||||
"nsfw" => [
|
||||
"display" => "NSFW",
|
||||
"option" => [
|
||||
"yes" => "Yes",
|
||||
"maybe" => "Maybe",
|
||||
"no" => "No"
|
||||
]
|
||||
],
|
||||
"country" => [
|
||||
"display" => "Country",
|
||||
"option" => [
|
||||
"en_US" => "United States",
|
||||
"fr_FR" => "France",
|
||||
"en_GB" => "Great Britain",
|
||||
"de_DE" => "Germany",
|
||||
"it_IT" => "Italy",
|
||||
"es_AR" => "Argentina",
|
||||
"en_AU" => "Australia",
|
||||
"es_ES" => "Spain (es)",
|
||||
"ca_ES" => "Spain (ca)",
|
||||
"cs_CZ" => "Czech Republic",
|
||||
"ro_RO" => "Romania",
|
||||
"el_GR" => "Greece",
|
||||
"zh_CN" => "China",
|
||||
"zh_HK" => "Hong Kong",
|
||||
"en_NZ" => "New Zealand",
|
||||
"fr_FR" => "France",
|
||||
"th_TH" => "Thailand",
|
||||
"ko_KR" => "South Korea",
|
||||
"sv_SE" => "Sweden",
|
||||
"nb_NO" => "Norway",
|
||||
"da_DK" => "Denmark",
|
||||
"hu_HU" => "Hungary",
|
||||
"et_EE" => "Estonia",
|
||||
"es_MX" => "Mexico",
|
||||
"es_CL" => "Chile",
|
||||
"en_CA" => "Canada (en)",
|
||||
"fr_CA" => "Canada (fr)",
|
||||
"en_MY" => "Malaysia",
|
||||
"bg_BG" => "Bulgaria",
|
||||
"fi_FI" => "Finland",
|
||||
"pl_PL" => "Poland",
|
||||
"nl_NL" => "Netherlands",
|
||||
"pt_PT" => "Portugal",
|
||||
"de_CH" => "Switzerland (de)",
|
||||
"fr_CH" => "Switzerland (fr)",
|
||||
"it_CH" => "Switzerland (it)",
|
||||
"de_AT" => "Austria",
|
||||
"fr_BE" => "Belgium (fr)",
|
||||
"nl_BE" => "Belgium (nl)",
|
||||
"en_IE" => "Ireland",
|
||||
"he_IL" => "Israel"
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
switch($page){
|
||||
|
||||
case "web":
|
||||
$base = array_merge(
|
||||
$base,
|
||||
[
|
||||
"time" => [
|
||||
"display" => "Time posted",
|
||||
"option" => [
|
||||
"any" => "Any time",
|
||||
"day" => "Past 24 hours",
|
||||
"week" => "Past week",
|
||||
"month" => "Past month"
|
||||
]
|
||||
],
|
||||
"extendedsearch" => [
|
||||
// no display, wont show in interface
|
||||
"option" => [
|
||||
"yes" => "Yes",
|
||||
"no" => "No"
|
||||
]
|
||||
]
|
||||
]
|
||||
);
|
||||
break;
|
||||
|
||||
case "images":
|
||||
$base = array_merge(
|
||||
$base,
|
||||
[
|
||||
"time" => [
|
||||
"display" => "Time posted",
|
||||
"option" => [
|
||||
"any" => "Any time",
|
||||
"day" => "Past 24 hours",
|
||||
"week" => "Past week",
|
||||
"month" => "Past month"
|
||||
]
|
||||
],
|
||||
"size" => [
|
||||
"display" => "Size",
|
||||
"option" => [
|
||||
"any" => "Any size",
|
||||
"large" => "Large",
|
||||
"medium" => "Medium",
|
||||
"small" => "Small"
|
||||
]
|
||||
],
|
||||
"color" => [
|
||||
"display" => "Color",
|
||||
"option" => [
|
||||
"any" => "Any color",
|
||||
"coloronly" => "Color only",
|
||||
"monochrome" => "Monochrome",
|
||||
"black" => "Black",
|
||||
"brown" => "Brown",
|
||||
"gray" => "Gray",
|
||||
"white" => "White",
|
||||
"yellow" => "Yellow",
|
||||
"orange" => "Orange",
|
||||
"red" => "Red",
|
||||
"pink" => "Pink",
|
||||
"purple" => "Purple",
|
||||
"blue" => "Blue",
|
||||
"teal" => "Teal",
|
||||
"green" => "Green"
|
||||
]
|
||||
],
|
||||
"imagetype" => [
|
||||
"display" => "Type",
|
||||
"option" => [
|
||||
"any" => "Any type",
|
||||
"animatedgif" => "Animated GIF",
|
||||
"photo" => "Photograph",
|
||||
"transparent" => "Transparent"
|
||||
]
|
||||
],
|
||||
"license" => [
|
||||
"display" => "License",
|
||||
"option" => [
|
||||
"any" => "Any license",
|
||||
"share" => "Non-commercial reproduction and sharing",
|
||||
"sharecommercially" => "Reproduction and sharing",
|
||||
"modify" => "Non-commercial reproduction, sharing and modification",
|
||||
"modifycommercially" => "Reproduction, sharing and modification",
|
||||
"public" => "Public domain"
|
||||
]
|
||||
]
|
||||
]
|
||||
);
|
||||
break;
|
||||
|
||||
case "videos":
|
||||
$base = array_merge(
|
||||
$base,
|
||||
[
|
||||
"order" => [
|
||||
"display" => "Order by",
|
||||
"option" => [
|
||||
"relevance" => "Relevance",
|
||||
"views" => "Views",
|
||||
"date" => "Most recent",
|
||||
]
|
||||
],
|
||||
"source" => [
|
||||
"display" => "Source",
|
||||
"option" => [
|
||||
"any" => "Any source",
|
||||
"youtube" => "YouTube",
|
||||
"dailymotion" => "Dailymotion",
|
||||
]
|
||||
]
|
||||
]
|
||||
);
|
||||
break;
|
||||
|
||||
case "news":
|
||||
$base = array_merge(
|
||||
$base,
|
||||
[
|
||||
"time" => [
|
||||
"display" => "Time posted",
|
||||
"option" => [
|
||||
"any" => "Any time",
|
||||
"hour" => "Less than 1 hour ago",
|
||||
"day" => "Past 24 hours",
|
||||
"week" => "Past week",
|
||||
"month" => "Past month"
|
||||
]
|
||||
],
|
||||
"order" => [
|
||||
"display" => "Order by",
|
||||
"option" => [
|
||||
"relevance" => "Relevance",
|
||||
"date" => "Most recent"
|
||||
]
|
||||
]
|
||||
]
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
return $base;
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = []){
|
||||
|
||||
$headers = [
|
||||
"User-Agent: " . config::USER_AGENT,
|
||||
"Accept: application/json, text/plain, */*",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Origin: https://www.qwant.com",
|
||||
"Referer: https://www.qwant.com/",
|
||||
"Sec-Fetch-Dest: empty",
|
||||
"Sec-Fetch-Mode: cors",
|
||||
"Sec-Fetch-Site: same-site",
|
||||
"TE: trailers"
|
||||
];
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
|
||||
|
||||
// Bypass HTTP/2 check
|
||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function web($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
// get next page data
|
||||
[$params, $proxy] = $this->backend->get($get["npt"], "web");
|
||||
|
||||
$params = json_decode($params, true);
|
||||
|
||||
}else{
|
||||
|
||||
// get _GET data instead
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
if(strlen($search) > 2048){
|
||||
|
||||
throw new Exception("Search term is too long!");
|
||||
}
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
|
||||
$params = [
|
||||
"q" => $search,
|
||||
"freshness" => $get["time"],
|
||||
"count" => 10,
|
||||
"locale" => $get["country"],
|
||||
"offset" => 0,
|
||||
"device" => "desktop",
|
||||
"tgp" => 3,
|
||||
"safesearch" => 0,
|
||||
"displayed" => "true"
|
||||
];
|
||||
|
||||
switch($get["nsfw"]){
|
||||
|
||||
case "yes": $params["safesearch"] = 0; break;
|
||||
case "maybe": $params["safesearch"] = 1; break;
|
||||
case "no": $params["safesearch"] = 2; break;
|
||||
}
|
||||
}
|
||||
/*
|
||||
$handle = fopen("scraper/qwant_web.json", "r");
|
||||
$json = fread($handle, filesize("scraper/qwant_web.json"));
|
||||
fclose($handle);*/
|
||||
|
||||
try{
|
||||
$json =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://fdn.qwant.com/v3/search/web",
|
||||
$params
|
||||
);
|
||||
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Could not fetch JSON");
|
||||
}
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === NULL){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
if(isset($json["data"]["message"][0])){
|
||||
|
||||
throw new Exception("Server returned an error:\n" . $json["data"]["message"][0]);
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => null,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
if(
|
||||
$json["status"] != "success" &&
|
||||
$json["data"]["error_code"] === 5
|
||||
){
|
||||
|
||||
// no results
|
||||
return $out;
|
||||
}
|
||||
|
||||
$this->detect_errors($json);
|
||||
|
||||
if(!isset($json["data"]["result"]["items"]["mainline"])){
|
||||
|
||||
throw new Exception("Server did not return a result object");
|
||||
}
|
||||
|
||||
// data is OK, parse
|
||||
|
||||
// get instant answer
|
||||
if(
|
||||
$get["extendedsearch"] == "yes" &&
|
||||
isset($json["data"]["result"]["items"]["sidebar"][0]["endpoint"])
|
||||
){
|
||||
|
||||
try{
|
||||
$answer =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://api.qwant.com/v3" .
|
||||
$json["data"]["result"]["items"]["sidebar"][0]["endpoint"],
|
||||
[]
|
||||
);
|
||||
|
||||
$answer = json_decode($answer, true);
|
||||
|
||||
if(
|
||||
$answer === null ||
|
||||
$answer["status"] != "success" ||
|
||||
$answer["data"]["result"] === null
|
||||
){
|
||||
|
||||
throw new Exception();
|
||||
}
|
||||
|
||||
// parse answer
|
||||
$out["answer"][] = [
|
||||
"title" => $answer["data"]["result"]["title"],
|
||||
"description" => [
|
||||
[
|
||||
"type" => "text",
|
||||
"value" => $this->trimdots($answer["data"]["result"]["description"])
|
||||
]
|
||||
],
|
||||
"url" => $answer["data"]["result"]["url"],
|
||||
"thumb" =>
|
||||
$answer["data"]["result"]["thumbnail"]["landscape"] == null ?
|
||||
null :
|
||||
$this->unshitimage(
|
||||
$answer["data"]["result"]["thumbnail"]["landscape"],
|
||||
false
|
||||
),
|
||||
"table" => [],
|
||||
"sublink" => []
|
||||
];
|
||||
|
||||
}catch(Exception $error){
|
||||
|
||||
// do nothing in case of failure
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// get word correction
|
||||
if(isset($json["data"]["query"]["queryContext"]["alteredQuery"])){
|
||||
|
||||
$out["spelling"] = [
|
||||
"type" => "including",
|
||||
"using" => $json["data"]["query"]["queryContext"]["alteredQuery"],
|
||||
"correction" => $json["data"]["query"]["queryContext"]["alterationOverrideQuery"]
|
||||
];
|
||||
}
|
||||
|
||||
// check for next page
|
||||
if($json["data"]["result"]["lastPage"] === false){
|
||||
|
||||
$params["offset"] = $params["offset"] + 10;
|
||||
|
||||
$out["npt"] =
|
||||
$this->backend->store(
|
||||
json_encode($params),
|
||||
"web",
|
||||
$proxy
|
||||
);
|
||||
}
|
||||
|
||||
// parse results
|
||||
foreach($json["data"]["result"]["items"]["mainline"] as $item){
|
||||
|
||||
switch($item["type"]){ // ignores ads
|
||||
|
||||
case "web":
|
||||
|
||||
$first_iteration = true;
|
||||
foreach($item["items"] as $result){
|
||||
|
||||
if(isset($result["thumbnailUrl"])){
|
||||
|
||||
$thumb = [
|
||||
"url" => $this->unshitimage($result["thumbnailUrl"]),
|
||||
"ratio" => "16:9"
|
||||
];
|
||||
}else{
|
||||
|
||||
$thumb = [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
];
|
||||
}
|
||||
|
||||
$sublinks = [];
|
||||
if(isset($result["links"])){
|
||||
|
||||
foreach($result["links"] as $link){
|
||||
|
||||
$sublinks[] = [
|
||||
"title" => $this->trimdots($link["title"]),
|
||||
"date" => null,
|
||||
"description" => isset($link["desc"]) ? $this->trimdots($link["desc"]) : null,
|
||||
"url" => $link["url"]
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
// detect gibberish results
|
||||
if(
|
||||
$first_iteration &&
|
||||
!isset($result["urlPingSuffix"])
|
||||
){
|
||||
|
||||
throw new Exception("Qwant returned gibberish results");
|
||||
}
|
||||
|
||||
$out["web"][] = [
|
||||
"title" => $this->trimdots($result["title"]),
|
||||
"description" => $this->trimdots($result["desc"]),
|
||||
"url" => $result["url"],
|
||||
"date" => null,
|
||||
"type" => "web",
|
||||
"thumb" => $thumb,
|
||||
"sublink" => $sublinks,
|
||||
"table" => []
|
||||
];
|
||||
|
||||
$first_iteration = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case "images":
|
||||
foreach($item["items"] as $image){
|
||||
|
||||
$out["image"][] = [
|
||||
"title" => $image["title"],
|
||||
"source" => [
|
||||
[
|
||||
"url" => $image["media"],
|
||||
"width" => (int)$image["width"],
|
||||
"height" => (int)$image["height"]
|
||||
],
|
||||
[
|
||||
"url" => $this->unshitimage($image["thumbnail"]),
|
||||
"width" => $image["thumb_width"],
|
||||
"height" => $image["thumb_height"]
|
||||
]
|
||||
],
|
||||
"url" => $image["url"]
|
||||
];
|
||||
}
|
||||
break;
|
||||
|
||||
case "videos":
|
||||
foreach($item["items"] as $video){
|
||||
|
||||
$out["video"][] = [
|
||||
"title" => $video["title"],
|
||||
"description" => null,
|
||||
"date" => (int)$video["date"],
|
||||
"duration" => $video["duration"] === null ? null : $video["duration"] / 1000,
|
||||
"views" => null,
|
||||
"thumb" =>
|
||||
$video["thumbnail"] === null ?
|
||||
[
|
||||
"url" => null,
|
||||
"ratio" => null,
|
||||
] :
|
||||
[
|
||||
"url" => $this->unshitimage($video["thumbnail"]),
|
||||
"ratio" => "16:9",
|
||||
],
|
||||
"url" => $video["url"]
|
||||
];
|
||||
}
|
||||
break;
|
||||
|
||||
case "related_searches":
|
||||
foreach($item["items"] as $related){
|
||||
|
||||
$out["related"][] = $related["text"];
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
|
||||
public function image($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$params, $proxy] =
|
||||
$this->backend->get(
|
||||
$get["npt"],
|
||||
"images"
|
||||
);
|
||||
|
||||
$params = json_decode($params, true);
|
||||
}else{
|
||||
|
||||
$search = $get["s"];
|
||||
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
|
||||
$params = [
|
||||
"t" => "images",
|
||||
"q" => $search,
|
||||
"count" => 125,
|
||||
"locale" => $get["country"],
|
||||
"offset" => 0, // increment by 125
|
||||
"device" => "desktop",
|
||||
"tgp" => 3
|
||||
];
|
||||
|
||||
if($get["time"] != "any"){
|
||||
|
||||
$params["freshness"] = $get["time"];
|
||||
}
|
||||
|
||||
foreach(["size", "color", "imagetype", "license"] as $p){
|
||||
|
||||
if($get[$p] != "any"){
|
||||
|
||||
$params[$p] = $get[$p];
|
||||
}
|
||||
}
|
||||
|
||||
switch($get["nsfw"]){
|
||||
|
||||
case "yes": $params["safesearch"] = 0; break;
|
||||
case "maybe": $params["safesearch"] = 1; break;
|
||||
case "no": $params["safesearch"] = 2; break;
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
$json = $this->get(
|
||||
$proxy,
|
||||
"https://api.qwant.com/v3/search/images",
|
||||
$params,
|
||||
);
|
||||
}catch(Exception $err){
|
||||
|
||||
throw new Exception("Failed to get JSON");
|
||||
}
|
||||
|
||||
/*
|
||||
$handle = fopen("scraper/yandex.json", "r");
|
||||
$json = fread($handle, filesize("scraper/yandex.json"));
|
||||
fclose($handle);*/
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
$this->detect_errors($json);
|
||||
|
||||
if(isset($json["data"]["result"]["items"]["mainline"])){
|
||||
|
||||
throw new Exception("Qwant returned gibberish results");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"image" => []
|
||||
];
|
||||
|
||||
if($json["data"]["result"]["lastPage"] === false){
|
||||
|
||||
$params["offset"] = $params["offset"] + 125;
|
||||
|
||||
$out["npt"] = $this->backend->store(
|
||||
json_encode($params),
|
||||
"images",
|
||||
$proxy
|
||||
);
|
||||
}
|
||||
|
||||
foreach($json["data"]["result"]["items"] as $image){
|
||||
|
||||
$out["image"][] = [
|
||||
"title" => $this->trimdots($image["title"]),
|
||||
"source" => [
|
||||
[
|
||||
"url" => $image["media"],
|
||||
"width" => $image["width"],
|
||||
"height" => $image["height"]
|
||||
],
|
||||
[
|
||||
"url" => $this->unshitimage($image["thumbnail"]),
|
||||
"width" => $image["thumb_width"],
|
||||
"height" => $image["thumb_height"]
|
||||
]
|
||||
],
|
||||
"url" => $image["url"]
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
public function video($get){
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$params = [
|
||||
"t" => "videos",
|
||||
"q" => $search,
|
||||
"count" => 50,
|
||||
"locale" => $get["country"],
|
||||
"offset" => 0, // dont implement pagination
|
||||
"device" => "desktop",
|
||||
"tgp" => 3
|
||||
];
|
||||
|
||||
switch($get["nsfw"]){
|
||||
|
||||
case "yes": $params["safesearch"] = 0; break;
|
||||
case "maybe": $params["safesearch"] = 1; break;
|
||||
case "no": $params["safesearch"] = 2; break;
|
||||
}
|
||||
|
||||
try{
|
||||
$json =
|
||||
$this->get(
|
||||
$this->backend->get_ip(),
|
||||
"https://api.qwant.com/v3/search/videos",
|
||||
$params
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Could not fetch JSON");
|
||||
}
|
||||
|
||||
/*
|
||||
$handle = fopen("scraper/yandex-video.json", "r");
|
||||
$json = fread($handle, filesize("scraper/yandex-video.json"));
|
||||
fclose($handle);
|
||||
*/
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Could not parse JSON");
|
||||
}
|
||||
|
||||
$this->detect_errors($json);
|
||||
|
||||
if(isset($json["data"]["result"]["items"]["mainline"])){
|
||||
|
||||
throw new Exception("Qwant returned gibberish results");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"video" => [],
|
||||
"author" => [],
|
||||
"livestream" => [],
|
||||
"playlist" => [],
|
||||
"reel" => []
|
||||
];
|
||||
|
||||
foreach($json["data"]["result"]["items"] as $video){
|
||||
|
||||
if(empty($video["thumbnail"])){
|
||||
|
||||
$thumb = [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
];
|
||||
}else{
|
||||
|
||||
$thumb = [
|
||||
"url" => $this->unshitimage($video["thumbnail"], false),
|
||||
"ratio" => "16:9"
|
||||
];
|
||||
}
|
||||
|
||||
$duration = (int)$video["duration"];
|
||||
|
||||
$out["video"][] = [
|
||||
"title" => $video["title"],
|
||||
"description" => $this->limitstrlen($video["desc"]),
|
||||
"author" => [
|
||||
"name" => $video["channel"],
|
||||
"url" => null,
|
||||
"avatar" => null
|
||||
],
|
||||
"date" => (int)$video["date"],
|
||||
"duration" => $duration === 0 ? null : $duration,
|
||||
"views" => null,
|
||||
"thumb" => $thumb,
|
||||
"url" => preg_replace("/\?syndication=.+/", "", $video["url"])
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
public function news($get){
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$params = [
|
||||
"t" => "news",
|
||||
"q" => $search,
|
||||
"count" => 50,
|
||||
"locale" => $get["country"],
|
||||
"offset" => 0, // dont implement pagination
|
||||
"device" => "desktop",
|
||||
"tgp" => 3
|
||||
];
|
||||
|
||||
switch($get["nsfw"]){
|
||||
|
||||
case "yes": $params["safesearch"] = 0; break;
|
||||
case "maybe": $params["safesearch"] = 1; break;
|
||||
case "no": $params["safesearch"] = 2; break;
|
||||
}
|
||||
|
||||
try{
|
||||
$json =
|
||||
$this->get(
|
||||
$this->backend->get_ip(),
|
||||
"https://api.qwant.com/v3/search/news",
|
||||
$params
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Could not fetch JSON");
|
||||
}
|
||||
|
||||
/*
|
||||
$handle = fopen("scraper/yandex-video.json", "r");
|
||||
$json = fread($handle, filesize("scraper/yandex-video.json"));
|
||||
fclose($handle);
|
||||
*/
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Could not parse JSON");
|
||||
}
|
||||
|
||||
$this->detect_errors($json);
|
||||
|
||||
if(isset($json["data"]["result"]["items"]["mainline"])){
|
||||
|
||||
throw new Exception("Qwant returned gibberish results");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"news" => []
|
||||
];
|
||||
|
||||
foreach($json["data"]["result"]["items"] as $news){
|
||||
|
||||
if(empty($news["media"][0]["pict_big"]["url"])){
|
||||
|
||||
$thumb = [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
];
|
||||
}else{
|
||||
|
||||
$thumb = [
|
||||
"url" => $this->unshitimage($news["media"][0]["pict_big"]["url"], false),
|
||||
"ratio" => "16:9"
|
||||
];
|
||||
}
|
||||
|
||||
$out["news"][] = [
|
||||
"title" => $news["title"],
|
||||
"author" => $news["press_name"],
|
||||
"description" => $this->trimdots($news["desc"]),
|
||||
"date" => (int)$news["date"],
|
||||
"thumb" => $thumb,
|
||||
"url" => $news["url"]
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function detect_errors($json){
|
||||
|
||||
if(
|
||||
isset($json["status"]) &&
|
||||
$json["status"] == "error"
|
||||
){
|
||||
|
||||
if(isset($json["data"]["error_data"]["captchaUrl"])){
|
||||
|
||||
throw new Exception("Qwant returned a captcha");
|
||||
}elseif(isset($json["data"]["error_data"]["error_code"])){
|
||||
|
||||
throw new Exception(
|
||||
"Qwant returned an API error: " .
|
||||
$json["data"]["error_data"]["error_code"]
|
||||
);
|
||||
}
|
||||
|
||||
throw new Exception("Qwant returned an API error");
|
||||
}
|
||||
}
|
||||
|
||||
private function limitstrlen($text){
|
||||
|
||||
return explode("\n", wordwrap($text, 300, "\n"))[0];
|
||||
}
|
||||
|
||||
private function trimdots($text){
|
||||
|
||||
return trim($text, ". ");
|
||||
}
|
||||
|
||||
private function unshitimage($url, $is_bing = true){
|
||||
|
||||
// https://s1.qwant.com/thumbr/0x0/8/d/f6de4deb2c2b12f55d8bdcaae576f9f62fd58a05ec0feeac117b354d1bf5c2/th.jpg?u=https%3A%2F%2Fwww.bing.com%2Fth%3Fid%3DOIP.vvDWsagzxjoKKP_rOqhwrQAAAA%26w%3D160%26h%3D160%26c%3D7%26pid%3D5.1&q=0&b=1&p=0&a=0
|
||||
parse_str(parse_url($url)["query"], $parts);
|
||||
|
||||
if($is_bing){
|
||||
$parse = parse_url($parts["u"]);
|
||||
parse_str($parse["query"], $parts);
|
||||
|
||||
return "https://" . $parse["host"] . "/th?id=" . urlencode($parts["id"]);
|
||||
}
|
||||
|
||||
return $parts["u"];
|
||||
}
|
||||
}
|
||||
512
scraper/sc.php
Normal file
512
scraper/sc.php
Normal file
|
|
@ -0,0 +1,512 @@
|
|||
<?php
|
||||
|
||||
class sc{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("sc");
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
return [
|
||||
"type" => [
|
||||
"display" => "Type",
|
||||
"option" => [
|
||||
"any" => "Any type",
|
||||
"track" => "Tracks",
|
||||
"author" => "People",
|
||||
"album" => "Albums",
|
||||
"playlist" => "Playlists",
|
||||
"goplus" => "Go+ Tracks"
|
||||
]
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = [], $web_req = false){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
|
||||
// use http2
|
||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||
|
||||
if($web_req === false){
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"Referer: https://soundcloud.com/",
|
||||
"Origin: https://soundcloud.com",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Sec-Fetch-Dest: empty",
|
||||
"Sec-Fetch-Mode: cors",
|
||||
"Sec-Fetch-Site: same-site",
|
||||
"Priority: u=1"]
|
||||
);
|
||||
}else{
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: cross-site",
|
||||
"Priority: u=1",
|
||||
"TE: trailers"]
|
||||
);
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function music($get, $last_attempt = false){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$params, $proxy] = $this->backend->get($get["npt"], "music");
|
||||
$params = json_decode($params, true);
|
||||
|
||||
$url = $params["url"];
|
||||
unset($params["url"]);
|
||||
|
||||
}else{
|
||||
|
||||
// normal search:
|
||||
// https://api-v2.soundcloud.com/search?q=freddie%20dredd&variant_ids=&facet=model&user_id=351062-302234-707916-795081&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
|
||||
|
||||
// soundcloud go+ search:
|
||||
// https://api-v2.soundcloud.com/search/tracks?q=freddie%20dredd&variant_ids=&filter.content_tier=SUB_HIGH_TIER&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
|
||||
|
||||
// tracks search:
|
||||
// https://api-v2.soundcloud.com/search/tracks?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
|
||||
|
||||
// users search:
|
||||
// https://api-v2.soundcloud.com/search/users?q=freddie%20dredd&variant_ids=&facet=place&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
|
||||
|
||||
// albums search:
|
||||
// https://api-v2.soundcloud.com/search/albums?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
|
||||
|
||||
// playlists search:
|
||||
// https://api-v2.soundcloud.com/search/playlists_without_albums?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$type = $get["type"];
|
||||
$proxy = $this->backend->get_ip();
|
||||
$token = $this->get_token($proxy);
|
||||
|
||||
switch($type){
|
||||
|
||||
case "any":
|
||||
$url = "https://api-v2.soundcloud.com/search";
|
||||
$params = [
|
||||
"q" => $search,
|
||||
"variant_ids" => "",
|
||||
"facet" => "model",
|
||||
"client_id" => $token,
|
||||
"limit" => 20,
|
||||
"offset" => 0,
|
||||
"linked_partitioning" => 1,
|
||||
"app_version" => 1713542117,
|
||||
"app_locale" => "en"
|
||||
];
|
||||
break;
|
||||
|
||||
case "track":
|
||||
$url = "https://api-v2.soundcloud.com/search/tracks";
|
||||
$params = [
|
||||
"q" => $search,
|
||||
"variant_ids" => "",
|
||||
"facet_genre" => "",
|
||||
"client_id" => $token,
|
||||
"limit" => 20,
|
||||
"offset" => 0,
|
||||
"linked_partitioning" => 1,
|
||||
"app_version" => 1713542117,
|
||||
"app_locale" => "en"
|
||||
];
|
||||
break;
|
||||
|
||||
case "author":
|
||||
$url = "https://api-v2.soundcloud.com/search/users";
|
||||
$params = [
|
||||
"q" => $search,
|
||||
"variant_ids" => "",
|
||||
"facet" => "place",
|
||||
"client_id" => $token,
|
||||
"limit" => 20,
|
||||
"offset" => 0,
|
||||
"linked_partitioning" => 1,
|
||||
"app_version" => 1713542117,
|
||||
"app_locale" => "en"
|
||||
];
|
||||
break;
|
||||
|
||||
case "album":
|
||||
$url = "https://api-v2.soundcloud.com/search/albums";
|
||||
$params = [
|
||||
"q" => $search,
|
||||
"variant_ids" => "",
|
||||
"facet" => "genre",
|
||||
"client_id" => $token,
|
||||
"limit" => 20,
|
||||
"offset" => 0,
|
||||
"linked_partitioning" => 1,
|
||||
"app_version" => 1713542117,
|
||||
"app_locale" => "en"
|
||||
];
|
||||
break;
|
||||
|
||||
case "playlist":
|
||||
$url = "https://api-v2.soundcloud.com/search/playlists_without_albums";
|
||||
$params = [
|
||||
"q" => $search,
|
||||
"variant_ids" => "",
|
||||
"facet" => "genre",
|
||||
"client_id" => $token,
|
||||
"limit" => 20,
|
||||
"offset" => 0,
|
||||
"linked_partitioning" => 1,
|
||||
"app_version" => 1713542117,
|
||||
"app_locale" => "en"
|
||||
];
|
||||
break;
|
||||
|
||||
case "goplus":
|
||||
$url = "https://api-v2.soundcloud.com/search/tracks";
|
||||
$params = [
|
||||
"q" => $search,
|
||||
"variant_ids" => "",
|
||||
"filter.content_tier" => "SUB_HIGH_TIER",
|
||||
"facet" => "genre",
|
||||
"client_id" => $token,
|
||||
"limit" => 20,
|
||||
"offset" => 0,
|
||||
"linked_partitioning" => 1,
|
||||
"app_version" => 1713542117,
|
||||
"app_locale" => "en"
|
||||
];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
try{
|
||||
|
||||
$json = $this->get($proxy, $url, $params);
|
||||
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch JSON");
|
||||
}
|
||||
|
||||
/*
|
||||
$handle = fopen("scraper/soundcloud.json", "r");
|
||||
$json = fread($handle, filesize("scraper/soundcloud.json"));
|
||||
fclose($handle);
|
||||
*/
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
if($last_attempt === true){
|
||||
|
||||
throw new Exception("Fetched an invalid token (please report!!)");
|
||||
}
|
||||
|
||||
// token might've expired, get a new one and re-try search
|
||||
$this->get_token($proxy);
|
||||
return $this->music($get, true);
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"song" => [],
|
||||
"playlist" => [],
|
||||
"album" => [],
|
||||
"podcast" => [],
|
||||
"author" => [],
|
||||
"user" => []
|
||||
];
|
||||
|
||||
/*
|
||||
Get next page
|
||||
*/
|
||||
if(isset($json["next_href"])){
|
||||
|
||||
$params["query_urn"] = $json["query_urn"];
|
||||
$params["offset"] = $params["offset"] + 20;
|
||||
$params["url"] = $url; // we will remove this later
|
||||
|
||||
$out["npt"] =
|
||||
$this->backend->store(
|
||||
json_encode($params),
|
||||
"music",
|
||||
$proxy
|
||||
);
|
||||
}
|
||||
|
||||
/*
|
||||
Scrape items
|
||||
*/
|
||||
foreach($json["collection"] as $item){
|
||||
|
||||
switch($item["kind"]){
|
||||
|
||||
case "user":
|
||||
// parse author
|
||||
$out["author"][] = [
|
||||
"title" => $item["username"],
|
||||
"followers" => $item["followers_count"],
|
||||
"description" => trim($item["track_count"] . " songs. " . $this->limitstrlen($item["description"])),
|
||||
"thumb" => [
|
||||
"url" => $item["avatar_url"],
|
||||
"ratio" => "1:1"
|
||||
],
|
||||
"url" => $item["permalink_url"]
|
||||
];
|
||||
break;
|
||||
|
||||
case "playlist":
|
||||
// parse playlist
|
||||
$description = [];
|
||||
$count = 0;
|
||||
|
||||
foreach($item["tracks"] as $song){
|
||||
|
||||
$count++;
|
||||
|
||||
if(!isset($song["title"])){
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
$description[] = $song["title"];
|
||||
}
|
||||
|
||||
if(count($description) !== 0){
|
||||
|
||||
$description = trim($count . " songs. " . implode(", ", $description));
|
||||
}else{
|
||||
|
||||
$description = "";
|
||||
}
|
||||
|
||||
if(
|
||||
isset($item["artwork_url"]) &&
|
||||
!empty($item["artwork_url"])
|
||||
){
|
||||
|
||||
$thumb = [
|
||||
"ratio" => "1:1",
|
||||
"url" => $item["artwork_url"]
|
||||
];
|
||||
|
||||
}elseif(
|
||||
isset($item["tracks"][0]["artwork_url"]) &&
|
||||
!empty($item["tracks"][0]["artwork_url"])
|
||||
){
|
||||
|
||||
$thumb = [
|
||||
"ratio" => "1:1",
|
||||
"url" => $item["tracks"][0]["artwork_url"]
|
||||
];
|
||||
}else{
|
||||
|
||||
$thumb = [
|
||||
"ratio" => null,
|
||||
"url" => null
|
||||
];
|
||||
}
|
||||
|
||||
$out["playlist"][] = [
|
||||
"title" => $item["title"],
|
||||
"description" => $this->limitstrlen($description),
|
||||
"author" => [
|
||||
"name" => $item["user"]["username"],
|
||||
"url" => $item["user"]["permalink_url"],
|
||||
"avatar" => $item["user"]["avatar_url"]
|
||||
],
|
||||
"thumb" => $thumb,
|
||||
"date" => strtotime($item["created_at"]),
|
||||
"duration" => $item["duration"] / 1000,
|
||||
"url" => $item["permalink_url"]
|
||||
];
|
||||
break;
|
||||
|
||||
case "track":
|
||||
if(stripos($item["monetization_model"], "TIER") === false){
|
||||
|
||||
$stream = [
|
||||
"endpoint" => "sc",
|
||||
"url" =>
|
||||
$item["media"]["transcodings"][0]["url"] .
|
||||
"?client_id=" . $token .
|
||||
"&track_authorization=" .
|
||||
$item["track_authorization"]
|
||||
];
|
||||
}else{
|
||||
|
||||
$stream = [
|
||||
"endpoint" => null,
|
||||
"url" => null
|
||||
];
|
||||
}
|
||||
|
||||
// parse track
|
||||
$out["song"][] = [
|
||||
"title" => $item["title"],
|
||||
"description" => $item["description"] == "" ? null : $this->limitstrlen($item["description"]),
|
||||
"url" => $item["permalink_url"],
|
||||
"views" => $item["playback_count"],
|
||||
"author" => [
|
||||
"name" => $item["user"]["username"],
|
||||
"url" => $item["user"]["permalink_url"],
|
||||
"avatar" => $item["user"]["avatar_url"]
|
||||
],
|
||||
"thumb" => [
|
||||
"ratio" => "1:1",
|
||||
"url" => $item["artwork_url"]
|
||||
],
|
||||
"date" => strtotime($item["created_at"]),
|
||||
"duration" => (int)$item["full_duration"] / 1000,
|
||||
"stream" => $stream
|
||||
];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
public function get_token($proxy){
|
||||
|
||||
$token = apcu_fetch("sc_token");
|
||||
|
||||
if($token !== false){
|
||||
|
||||
return $token;
|
||||
}
|
||||
|
||||
// search through all javascript components on the main page
|
||||
try{
|
||||
$html =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://soundcloud.com",
|
||||
[],
|
||||
true
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch front page");
|
||||
}
|
||||
|
||||
$this->fuckhtml->load($html);
|
||||
|
||||
$scripts =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"script"
|
||||
);
|
||||
|
||||
foreach($scripts as $script){
|
||||
|
||||
if(
|
||||
!isset($script["attributes"]["src"]) ||
|
||||
strpos($script["attributes"]["src"], "sndcdn.com") === false
|
||||
){
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
try{
|
||||
$js =
|
||||
$this->get(
|
||||
$proxy,
|
||||
$script["attributes"]["src"],
|
||||
[]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch search token");
|
||||
}
|
||||
|
||||
preg_match(
|
||||
'/client_id=([^"]+)/',
|
||||
$js,
|
||||
$token
|
||||
);
|
||||
|
||||
if(isset($token[1])){
|
||||
|
||||
apcu_store("sc_token", $token[1]);
|
||||
return $token[1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
throw new Exception("Did not find a Soundcloud token in the Javascript blobs");
|
||||
}
|
||||
|
||||
private function limitstrlen($text){
|
||||
|
||||
return
|
||||
explode(
|
||||
"\n",
|
||||
wordwrap(
|
||||
str_replace(
|
||||
["\n\r", "\r\n", "\n", "\r"],
|
||||
" ",
|
||||
$text
|
||||
),
|
||||
300,
|
||||
"\n"
|
||||
),
|
||||
2
|
||||
)[0];
|
||||
}
|
||||
}
|
||||
668
scraper/solofield.php
Normal file
668
scraper/solofield.php
Normal file
|
|
@ -0,0 +1,668 @@
|
|||
<?php
|
||||
|
||||
class solofield{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("solofield");
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
return [
|
||||
"nsfw" => [
|
||||
"display" => "NSFW",
|
||||
"option" => [
|
||||
"yes" => "Yes",
|
||||
"no" => "No",
|
||||
]
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = []){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"Referer: https://solofield.net",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Cookie: cross-site-cookie=name; lno=35842050",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: same-origin",
|
||||
"Sec-Fetch-User: ?1"]
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function web($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$query, $proxy] = $this->backend->get($get["npt"], "web");
|
||||
|
||||
try{
|
||||
|
||||
$html =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://solofield.net/search?" . $query,
|
||||
[]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch search page");
|
||||
}
|
||||
}else{
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
|
||||
try{
|
||||
|
||||
$html =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://solofield.net/search",
|
||||
[
|
||||
"q" => $get["s"],
|
||||
"ie" => "UTF-8",
|
||||
"oe" => "UTF-8",
|
||||
"hl" => "ja", // changing this doesnt do anything
|
||||
"lr" => "lang_ja", // same here
|
||||
//"ls" => "", // ??
|
||||
"f" => ($get["nsfw"] == "yes" ? "off" : "on")
|
||||
]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch search page");
|
||||
}
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => null,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
// check for errors and load the result div
|
||||
if($this->error_and_load($html)){
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
$items =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"g0",
|
||||
"li"
|
||||
);
|
||||
|
||||
foreach($items as $item){
|
||||
|
||||
$this->fuckhtml->load($item);
|
||||
|
||||
$title_tag =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"r",
|
||||
"h3"
|
||||
);
|
||||
|
||||
if(count($title_tag) === 0){
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
$this->fuckhtml->load($title_tag[0]);
|
||||
|
||||
$link =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"a"
|
||||
)[0]
|
||||
["attributes"]
|
||||
["href"]
|
||||
);
|
||||
|
||||
$this->fuckhtml->load($item);
|
||||
$thumb =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"webshot",
|
||||
"img"
|
||||
);
|
||||
|
||||
if(count($thumb) !== 0){
|
||||
|
||||
$uri =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$thumb[0]
|
||||
["attributes"]
|
||||
["src"]
|
||||
);
|
||||
|
||||
if(stripos($uri, "now_printing") === false){
|
||||
|
||||
$thumb = [
|
||||
"ratio" => "1:1",
|
||||
"url" =>
|
||||
"https://solofield.net" .
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$thumb[0]
|
||||
["attributes"]
|
||||
["src"]
|
||||
)
|
||||
];
|
||||
}else{
|
||||
|
||||
$thumb = [
|
||||
"ratio" => null,
|
||||
"url" => null
|
||||
];
|
||||
}
|
||||
}else{
|
||||
|
||||
$thumb = [
|
||||
"ratio" => null,
|
||||
"url" => null
|
||||
];
|
||||
}
|
||||
|
||||
$out["web"][] = [
|
||||
"title" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$title_tag[0]
|
||||
),
|
||||
"description" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"s",
|
||||
"div"
|
||||
)[0]
|
||||
),
|
||||
"url" => $link,
|
||||
"date" => null,
|
||||
"type" => "web",
|
||||
"thumb" => $thumb,
|
||||
"sublink" => [],
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
||||
// get next page
|
||||
$this->get_npt($html, $proxy, $out, "web");
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
|
||||
public function image($get){
|
||||
|
||||
// no pagination
|
||||
$html =
|
||||
$this->get(
|
||||
$this->backend->get_ip(),
|
||||
"https://solofield.net/isearch",
|
||||
[
|
||||
"q" => $get["s"],
|
||||
"ie" => "UTF-8",
|
||||
"oe" => "UTF-8",
|
||||
"hl" => "ja", // changing this doesnt do anything
|
||||
//"lr" => "lang_ja", // same here
|
||||
"ls" => "", // ??
|
||||
"f" => ($get["nsfw"] == "yes" ? "off" : "on")
|
||||
]
|
||||
);
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"image" => []
|
||||
];
|
||||
|
||||
// check for errors and load the result div
|
||||
if($this->error_and_load($html)){
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
$images =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"li"
|
||||
);
|
||||
|
||||
foreach($images as $image){
|
||||
|
||||
$this->fuckhtml->load($image);
|
||||
|
||||
$img =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"img"
|
||||
);
|
||||
|
||||
if(count($img) === 0){
|
||||
|
||||
// ?? invalid
|
||||
continue;
|
||||
}
|
||||
|
||||
$img = $img[0];
|
||||
|
||||
$size =
|
||||
explode(
|
||||
"x",
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$image
|
||||
),
|
||||
2
|
||||
);
|
||||
|
||||
$size = [
|
||||
(int)trim($size[0]), // width
|
||||
(int)trim($size[1]) // height
|
||||
];
|
||||
|
||||
$out["image"][] = [
|
||||
"title" => null,
|
||||
"source" => [
|
||||
[
|
||||
"url" =>
|
||||
"https://solofield.net/" .
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$img["attributes"]["src"]
|
||||
),
|
||||
"width" => $size[0],
|
||||
"height" => $size[1]
|
||||
]
|
||||
],
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"a"
|
||||
)[0]
|
||||
["attributes"]
|
||||
["href"]
|
||||
)
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
|
||||
public function video($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$query, $proxy] = $this->backend->get($get["npt"], "videos");
|
||||
|
||||
try{
|
||||
|
||||
$html =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://solofield.net/vsearch?" . $query,
|
||||
[]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch search page");
|
||||
}
|
||||
}else{
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
|
||||
try{
|
||||
|
||||
$html =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://solofield.net/vsearch",
|
||||
[
|
||||
"q" => $get["s"],
|
||||
"ie" => "UTF-8",
|
||||
"oe" => "UTF-8",
|
||||
"hl" => "ja", // changing this doesnt do anything
|
||||
//"lr" => "lang_ja", // same here
|
||||
"ls" => "", // ??
|
||||
"f" => ($get["nsfw"] == "yes" ? "off" : "on")
|
||||
]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch search page");
|
||||
}
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"video" => [],
|
||||
"author" => [],
|
||||
"livestream" => [],
|
||||
"playlist" => [],
|
||||
"reel" => []
|
||||
];
|
||||
|
||||
// check for errors and load the result div
|
||||
if($this->error_and_load($html)){
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
$items =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"li"
|
||||
);
|
||||
|
||||
foreach($items as $item){
|
||||
|
||||
$this->fuckhtml->load($item);
|
||||
|
||||
$as =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"a"
|
||||
);
|
||||
|
||||
if(count($as) === 0){
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
$thumb =
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"img"
|
||||
);
|
||||
|
||||
if(count($thumb) !== 0){
|
||||
|
||||
$thumb = [
|
||||
"ratio" => "16:9",
|
||||
"url" =>
|
||||
"https://solofield.net/" .
|
||||
$thumb[0]
|
||||
["attributes"]
|
||||
["src"]
|
||||
];
|
||||
}else{
|
||||
|
||||
$thumb = [
|
||||
"ratio" => null,
|
||||
"url" => null
|
||||
];
|
||||
}
|
||||
|
||||
$date =
|
||||
$this->fuckhtml
|
||||
->getElementsByAttributeValue(
|
||||
"style",
|
||||
"font-size: 10px;",
|
||||
"span"
|
||||
);
|
||||
|
||||
if(count($date) !== 0){
|
||||
|
||||
$date =
|
||||
$this->unfuckdate(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$date[0]
|
||||
)
|
||||
);
|
||||
}else{
|
||||
|
||||
$date = null;
|
||||
}
|
||||
|
||||
$center_td =
|
||||
$this->fuckhtml
|
||||
->getElementsByAttributeValue(
|
||||
"align",
|
||||
"center",
|
||||
"td"
|
||||
);
|
||||
|
||||
if(count($center_td) === 2){
|
||||
|
||||
$duration =
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$this->hms2int(
|
||||
$center_td[0]
|
||||
)
|
||||
);
|
||||
}else{
|
||||
|
||||
$duration = null;
|
||||
}
|
||||
|
||||
$out["video"][] = [
|
||||
"title" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$as[1]
|
||||
),
|
||||
"description" => null,
|
||||
"author" => [
|
||||
"name" => null,
|
||||
"url" => null,
|
||||
"avatar" => null
|
||||
],
|
||||
"date" => $date,
|
||||
"duration" => $duration,
|
||||
"views" => null,
|
||||
"thumb" => $thumb,
|
||||
"url" =>
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$as[0]
|
||||
["attributes"]
|
||||
["href"]
|
||||
)
|
||||
];
|
||||
}
|
||||
|
||||
// get next page
|
||||
$this->get_npt($html, $proxy, $out, "videos");
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
|
||||
private function get_npt($html, $proxy, &$out, $type){
|
||||
|
||||
// get next page
|
||||
$this->fuckhtml->load($html);
|
||||
|
||||
$pjs =
|
||||
$this->fuckhtml
|
||||
->getElementById(
|
||||
"pjs"
|
||||
);
|
||||
|
||||
if($pjs){
|
||||
|
||||
$alnk =
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"alnk",
|
||||
"span"
|
||||
);
|
||||
|
||||
foreach($alnk as $lnk){
|
||||
|
||||
if(
|
||||
stripos(
|
||||
$this->fuckhtml
|
||||
->getTextContent(
|
||||
$lnk
|
||||
),
|
||||
"Next"
|
||||
) !== false
|
||||
){
|
||||
|
||||
$this->fuckhtml->load($lnk);
|
||||
|
||||
$out["npt"] =
|
||||
$this->backend->store(
|
||||
parse_url(
|
||||
$this->fuckhtml
|
||||
->getElementsByTagName(
|
||||
"a"
|
||||
)[0]
|
||||
["attributes"]
|
||||
["href"],
|
||||
PHP_URL_QUERY
|
||||
),
|
||||
$type,
|
||||
$proxy
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function error_and_load($html){
|
||||
|
||||
if(strlen($html) === 0){
|
||||
|
||||
throw new Exception("Solofield blocked the request IP");
|
||||
}
|
||||
|
||||
$this->fuckhtml->load($html);
|
||||
|
||||
$list =
|
||||
$this->fuckhtml
|
||||
->getElementById(
|
||||
"list",
|
||||
"div"
|
||||
);
|
||||
|
||||
if($list === false){
|
||||
|
||||
$nosearch =
|
||||
$this->fuckhtml
|
||||
->getElementById(
|
||||
"nosearch",
|
||||
"div"
|
||||
);
|
||||
|
||||
if($nosearch){
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
throw new Exception("Failed to grep search list");
|
||||
}
|
||||
|
||||
$this->fuckhtml->load($list);
|
||||
return false;
|
||||
}
|
||||
|
||||
private function unfuckdate($date){
|
||||
|
||||
return
|
||||
strtotime(
|
||||
rtrim(
|
||||
preg_replace(
|
||||
'/[^0-9]+/',
|
||||
"-",
|
||||
explode(
|
||||
":",
|
||||
$date,
|
||||
2
|
||||
)[1]
|
||||
),
|
||||
"-"
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
private function hms2int($time){
|
||||
|
||||
$parts = explode(":", $time, 3);
|
||||
$time = 0;
|
||||
|
||||
if(count($parts) === 3){
|
||||
|
||||
// hours
|
||||
$time = $time + ((int)$parts[0] * 3600);
|
||||
array_shift($parts);
|
||||
}
|
||||
|
||||
if(count($parts) === 2){
|
||||
|
||||
// minutes
|
||||
$time = $time + ((int)$parts[0] * 60);
|
||||
array_shift($parts);
|
||||
}
|
||||
|
||||
// seconds
|
||||
$time = $time + (int)$parts[0];
|
||||
|
||||
return $time;
|
||||
}
|
||||
}
|
||||
726
scraper/spotify.php
Normal file
726
scraper/spotify.php
Normal file
|
|
@ -0,0 +1,726 @@
|
|||
<?php
|
||||
|
||||
class spotify{
|
||||
|
||||
private const req_web = 0;
|
||||
private const req_api = 1;
|
||||
private const req_clientid = 2;
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("spotify");
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
return [
|
||||
"category" => [
|
||||
"display" => "Category",
|
||||
"option" => [
|
||||
"any" => "All (no pagination)",
|
||||
"audiobooks" => "Audiobooks",
|
||||
"tracks" => "Songs",
|
||||
"artists" => "Artists",
|
||||
"playlists" => "Playlists",
|
||||
"albums" => "Albums",
|
||||
"podcastAndEpisodes" => "Podcasts & Shows (no pagination)",
|
||||
"episodes" => "Episodes",
|
||||
"users" => "Profiles"
|
||||
]
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = [], $reqtype = self::req_web, $bearer = null, $token = null){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
switch($reqtype){
|
||||
|
||||
case self::req_api:
|
||||
$headers = [
|
||||
"User-Agent: " . config::USER_AGENT,
|
||||
"Accept: application/json",
|
||||
"Accept-Language: en",
|
||||
"app-platform: WebPlayer",
|
||||
"authorization: Bearer {$bearer}",
|
||||
"client-token: {$token}",
|
||||
"content-type: application/json;charset=UTF-8",
|
||||
"Origin: https://open.spotify.com",
|
||||
"Referer: https://open.spotify.com/",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Sec-Fetch-Dest: empty",
|
||||
"Sec-Fetch-Mode: cors",
|
||||
"Sec-Fetch-Site: same-site",
|
||||
"spotify-app-version: 1.2.27.93.g7aee53d4",
|
||||
"TE: trailers"
|
||||
];
|
||||
break;
|
||||
|
||||
case self::req_web:
|
||||
$headers = [
|
||||
"User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Sec-GPC: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: cross-site"
|
||||
];
|
||||
break;
|
||||
|
||||
case self::req_clientid:
|
||||
$get = json_encode($get);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_POST, true);
|
||||
curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
|
||||
|
||||
$headers = [
|
||||
"User-Agent:" . config::USER_AGENT,
|
||||
"Accept: application/json",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip, deflate, br",
|
||||
"Referer: https://open.spotify.com/",
|
||||
"content-type: application/json",
|
||||
"Content-Length: " . strlen($get),
|
||||
"Origin: https://open.spotify.com",
|
||||
"DNT: 1",
|
||||
"Sec-GPC: 1",
|
||||
"Connection: keep-alive",
|
||||
"Sec-Fetch-Dest: empty",
|
||||
"Sec-Fetch-Mode: cors",
|
||||
"Sec-Fetch-Site: same-site",
|
||||
"TE: trailers"
|
||||
];
|
||||
break;
|
||||
}
|
||||
|
||||
if($reqtype !== self::req_clientid){
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function music($get){
|
||||
|
||||
$search = $get["s"];
|
||||
$ip = $this->backend->get_ip();
|
||||
$category = $get["category"];
|
||||
|
||||
/*
|
||||
audiobooks first and second page decoded
|
||||
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchAudiobooks&variables={"searchTerm":"freddie+dredd","offset":0,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"8758e540afdba5afa3c5246817f6bd31d86a15b3f5666c363dd017030f35d785"}}
|
||||
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchAudiobooks&variables={"searchTerm":"freddie+dredd","offset":30,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"8758e540afdba5afa3c5246817f6bd31d86a15b3f5666c363dd017030f35d785"}}
|
||||
*/
|
||||
|
||||
/*
|
||||
songs
|
||||
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchTracks&variables={"searchTerm":"asmr","offset":0,"limit":100,"numberOfTopResults":20,"includeAudiobooks":false}&extensions={"persistedQuery":{"version":1,"sha256Hash":"16c02d6304f5f721fc2eb39dacf2361a4543815112506a9c05c9e0bc9733a679"}}
|
||||
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchTracks&variables={"searchTerm":"asmr","offset":100,"limit":100,"numberOfTopResults":20,"includeAudiobooks":false}&extensions={"persistedQuery":{"version":1,"sha256Hash":"16c02d6304f5f721fc2eb39dacf2361a4543815112506a9c05c9e0bc9733a679"}}
|
||||
*/
|
||||
|
||||
/*
|
||||
artists
|
||||
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchArtists&variables={"searchTerm":"asmr","offset":0,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"b8840daafdda9a9ceadb7c5774731f63f9eca100445d2d94665f2dc58b45e2b9"}}
|
||||
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchArtists&variables={"searchTerm":"asmr","offset":30,"limit":23,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"b8840daafdda9a9ceadb7c5774731f63f9eca100445d2d94665f2dc58b45e2b9"}}
|
||||
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchArtists&variables={"searchTerm":"asmr","offset":53,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"b8840daafdda9a9ceadb7c5774731f63f9eca100445d2d94665f2dc58b45e2b9"}}
|
||||
*/
|
||||
|
||||
/*
|
||||
playlists
|
||||
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchPlaylists&variables={"searchTerm":"asmr","offset":0,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"19b4143a0500ccec189ca0f4a0316bc2c615ecb51ce993ba4d7d08afd1d87aa4"}}
|
||||
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchPlaylists&variables={"searchTerm":"asmr","offset":30,"limit":3,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"19b4143a0500ccec189ca0f4a0316bc2c615ecb51ce993ba4d7d08afd1d87aa4"}}
|
||||
*/
|
||||
|
||||
/*
|
||||
albums
|
||||
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchAlbums&variables={"searchTerm":"asmr","offset":33,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"e93b13cda461482da2940467eb2beed9368e9bb2fff37df3fb6633fc61271a27"}}
|
||||
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchAlbums&variables={"searchTerm":"asmr","offset":33,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"e93b13cda461482da2940467eb2beed9368e9bb2fff37df3fb6633fc61271a27"}}
|
||||
*/
|
||||
|
||||
/*
|
||||
podcasts & shows (contains authors, no pagination)
|
||||
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchFullEpisodes&variables={"searchTerm":"asmr","offset":0,"limit":30}&extensions={"persistedQuery":{"version":1,"sha256Hash":"9f996251c9781fabce63f1a9980b5287ea33bc5e8c8953d0c4689b09936067a1"}}
|
||||
*/
|
||||
|
||||
/*
|
||||
episodes
|
||||
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchDesktop&variables={"searchTerm":"asmr","offset":0,"limit":10,"numberOfTopResults":5,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"da03293d92a2cfc5e24597dcdc652c0ad135e1c64a78fddbf1478a7e096bea44"}}
|
||||
??? https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchFullEpisodes&variables={"searchTerm":"asmr","offset":60,"limit":30}&extensions={"persistedQuery":{"version":1,"sha256Hash":"9f996251c9781fabce63f1a9980b5287ea33bc5e8c8953d0c4689b09936067a1"}}
|
||||
*/
|
||||
|
||||
/*
|
||||
profiles
|
||||
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchUsers&variables={"searchTerm":"asmr","offset":0,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"02026f48ab5001894e598904079b620ebc64f2d53b55ca20c3858abd3a46c5fb"}}
|
||||
https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchUsers&variables={"searchTerm":"asmr","offset":30,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"02026f48ab5001894e598904079b620ebc64f2d53b55ca20c3858abd3a46c5fb"}}
|
||||
*/
|
||||
|
||||
// get HTML
|
||||
try{
|
||||
|
||||
$html =
|
||||
$this->get(
|
||||
$ip,
|
||||
"https://open.spotify.com/search/" .
|
||||
rawurlencode($search) .
|
||||
($category != "any" ? "/" . $category : ""),
|
||||
[]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to get initial search page");
|
||||
}
|
||||
|
||||
// grep bearer and client ID
|
||||
$this->fuckhtml->load($html);
|
||||
|
||||
$script =
|
||||
$this->fuckhtml
|
||||
->getElementById(
|
||||
"session",
|
||||
"script"
|
||||
);
|
||||
|
||||
if($script === null){
|
||||
|
||||
throw new Exception("Failed to grep bearer token");
|
||||
}
|
||||
|
||||
$script =
|
||||
json_decode(
|
||||
$script["innerHTML"],
|
||||
true
|
||||
);
|
||||
|
||||
$bearer = $script["accessToken"];
|
||||
$client_id = $script["clientId"];
|
||||
|
||||
// hit client ID endpoint
|
||||
try{
|
||||
|
||||
$token =
|
||||
json_decode(
|
||||
$this->get(
|
||||
$ip,
|
||||
"https://clienttoken.spotify.com/v1/clienttoken",
|
||||
[ // !! that shit must be sent as json data
|
||||
"client_data" => [
|
||||
"client_id" => $client_id,
|
||||
"client_version" => "1.2.27.93.g7aee53d4",
|
||||
"js_sdk_data" => [
|
||||
"device_brand" => "unknown",
|
||||
"device_id" => "4c7ca20117ca12288ea8fc7118a9118c",
|
||||
"device_model" => "unknown",
|
||||
"device_name" => "computer",
|
||||
"os" => "windows",
|
||||
"os_version" => "NT 10.0"
|
||||
]
|
||||
]
|
||||
],
|
||||
self::req_clientid
|
||||
),
|
||||
true
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch token");
|
||||
}
|
||||
|
||||
if($token === null){
|
||||
|
||||
throw new Exception("Failed to decode token");
|
||||
}
|
||||
|
||||
$token = $token["granted_token"]["token"];
|
||||
|
||||
try{
|
||||
|
||||
switch($get["option"]){
|
||||
|
||||
case "any":
|
||||
$variables = [
|
||||
"searchTerm" => $search,
|
||||
"offset" => 0,
|
||||
"limit" => 10,
|
||||
"numberOfTopResults" => 5,
|
||||
"includeAudiobooks" => true
|
||||
];
|
||||
break;
|
||||
|
||||
case "audiobooks":
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
$payload =
|
||||
$this->get(
|
||||
$ip,
|
||||
"https://api-partner.spotify.com/pathfinder/v1/query",
|
||||
[
|
||||
"operationName" => "searchDesktop",
|
||||
"variables" =>
|
||||
json_encode(
|
||||
[
|
||||
"searchTerm" => $search,
|
||||
"offset" => 0,
|
||||
"limit" => 10,
|
||||
"numberOfTopResults" => 5,
|
||||
"includeAudiobooks" => true
|
||||
]
|
||||
),
|
||||
"extensions" =>
|
||||
json_encode(
|
||||
[
|
||||
"persistedQuery" => [
|
||||
"version" => 1,
|
||||
"sha256Hash" => "21969b655b795601fb2d2204a4243188e75fdc6d3520e7b9cd3f4db2aff9591e" // ?
|
||||
]
|
||||
]
|
||||
)
|
||||
],
|
||||
self::req_api,
|
||||
$bearer,
|
||||
$token
|
||||
);
|
||||
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch JSON results");
|
||||
}
|
||||
|
||||
if($payload == "Token expired"){
|
||||
|
||||
throw new Exception("Grepped spotify token has expired");
|
||||
}
|
||||
|
||||
$payload = json_decode($payload, true);
|
||||
|
||||
if($payload === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON results");
|
||||
}
|
||||
|
||||
//$payload = json_decode(file_get_contents("scraper/spotify.json"), true);
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"song" => [],
|
||||
"playlist" => [],
|
||||
"album" => [],
|
||||
"podcast" => [],
|
||||
"author" => [],
|
||||
"user" => []
|
||||
];
|
||||
|
||||
// get songs
|
||||
foreach($payload["data"]["searchV2"]["tracksV2"]["items"] as $result){
|
||||
|
||||
if(isset($result["item"])){
|
||||
|
||||
$result = $result["item"];
|
||||
}
|
||||
|
||||
if(isset($result["data"])){
|
||||
|
||||
$result = $result["data"];
|
||||
}
|
||||
|
||||
[$artist, $artist_link] = $this->get_artists($result["artists"]);
|
||||
|
||||
$out["song"][] = [
|
||||
"title" => $result["name"],
|
||||
"description" => null,
|
||||
"url" => "https://open.spotify.com/track/" . $result["id"],
|
||||
"views" => null,
|
||||
"author" => [
|
||||
"name" => $artist,
|
||||
"url" => $artist_link,
|
||||
"avatar" => null
|
||||
],
|
||||
"thumb" => $this->get_thumb($result["albumOfTrack"]["coverArt"]),
|
||||
"date" => null,
|
||||
"duration" => $result["duration"]["totalMilliseconds"] / 1000,
|
||||
"stream" => [
|
||||
"endpoint" => "spotify",
|
||||
"url" => "track." . $result["id"]
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
// get playlists
|
||||
foreach($payload["data"]["searchV2"]["playlists"]["items"] as $playlist){
|
||||
|
||||
if(isset($playlist["data"])){
|
||||
|
||||
$playlist = $playlist["data"];
|
||||
}
|
||||
|
||||
$avatar = $this->get_thumb($playlist["ownerV2"]["data"]["avatar"]);
|
||||
|
||||
$out["playlist"][] = [
|
||||
"title" => $playlist["name"],
|
||||
"description" => null,
|
||||
"author" => [
|
||||
"name" => $playlist["ownerV2"]["data"]["name"],
|
||||
"url" =>
|
||||
"https://open.spotify.com/user/" .
|
||||
explode(
|
||||
":",
|
||||
$playlist["ownerV2"]["data"]["uri"],
|
||||
3
|
||||
)[2],
|
||||
"avatar" => $avatar["url"]
|
||||
],
|
||||
"thumb" => $this->get_thumb($playlist["images"]["items"][0]),
|
||||
"date" => null,
|
||||
"duration" => null,
|
||||
"url" =>
|
||||
"https://open.spotify.com/playlist/" .
|
||||
explode(
|
||||
":",
|
||||
$playlist["uri"],
|
||||
3
|
||||
)[2]
|
||||
];
|
||||
}
|
||||
|
||||
// get albums
|
||||
foreach($payload["data"]["searchV2"]["albums"]["items"] as $album){
|
||||
|
||||
if(isset($album["data"])){
|
||||
|
||||
$album = $album["data"];
|
||||
}
|
||||
|
||||
[$artist, $artist_link] = $this->get_artists($album["artists"]);
|
||||
|
||||
$out["album"][] = [
|
||||
"title" => $album["name"],
|
||||
"description" => null,
|
||||
"author" => [
|
||||
"name" => $artist,
|
||||
"url" => $artist_link,
|
||||
"avatar" => null
|
||||
],
|
||||
"thumb" => $this->get_thumb($album["coverArt"]),
|
||||
"date" => mktime(0, 0, 0, 0, 32, $album["date"]["year"]),
|
||||
"duration" => null,
|
||||
"url" =>
|
||||
"https://open.spotify.com/album/" .
|
||||
explode(
|
||||
":",
|
||||
$album["uri"],
|
||||
3
|
||||
)[2]
|
||||
];
|
||||
}
|
||||
|
||||
// get podcasts
|
||||
foreach($payload["data"]["searchV2"]["podcasts"]["items"] as $podcast){
|
||||
|
||||
if(isset($podcast["data"])){
|
||||
|
||||
$podcast = $podcast["data"];
|
||||
}
|
||||
|
||||
$description = [];
|
||||
foreach($podcast["topics"]["items"] as $subject){
|
||||
|
||||
$description[] = $subject["title"];
|
||||
}
|
||||
|
||||
$description = implode(", ", $description);
|
||||
|
||||
if($description == ""){
|
||||
|
||||
$description = null;
|
||||
}
|
||||
|
||||
$out["podcast"][] = [
|
||||
"title" => $podcast["name"],
|
||||
"description" => $description,
|
||||
"author" => [
|
||||
"name" => $podcast["publisher"]["name"],
|
||||
"url" => null,
|
||||
"avatar" => null
|
||||
],
|
||||
"thumb" => $this->get_thumb($podcast["coverArt"]),
|
||||
"date" => null,
|
||||
"duration" => null,
|
||||
"url" =>
|
||||
"https://open.spotify.com/show/" .
|
||||
explode(
|
||||
":",
|
||||
$podcast["uri"],
|
||||
3
|
||||
)[2],
|
||||
"stream" => [
|
||||
"endpoint" => null,
|
||||
"url" => null
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
// get audio books (put in podcasts)
|
||||
foreach($payload["data"]["searchV2"]["audiobooks"]["items"] as $podcast){
|
||||
|
||||
if(isset($podcast["data"])){
|
||||
|
||||
$podcast = $podcast["data"];
|
||||
}
|
||||
|
||||
$description = [];
|
||||
foreach($podcast["topics"]["items"] as $subject){
|
||||
|
||||
$description[] = $subject["title"];
|
||||
}
|
||||
|
||||
$description = implode(", ", $description);
|
||||
|
||||
if($description == ""){
|
||||
|
||||
$description = null;
|
||||
}
|
||||
|
||||
$authors = [];
|
||||
foreach($podcast["authors"] as $author){
|
||||
|
||||
$authors[] = $author["name"];
|
||||
}
|
||||
|
||||
$authors = implode(", ", $authors);
|
||||
|
||||
if($authors == ""){
|
||||
|
||||
$authors = null;
|
||||
}
|
||||
|
||||
$uri =
|
||||
explode(
|
||||
":",
|
||||
$podcast["uri"],
|
||||
3
|
||||
)[2];
|
||||
|
||||
$out["podcast"][] = [
|
||||
"title" => $podcast["name"],
|
||||
"description" => $description,
|
||||
"author" => [
|
||||
"name" => $authors,
|
||||
"url" => null,
|
||||
"avatar" => null
|
||||
],
|
||||
"thumb" => $this->get_thumb($podcast["coverArt"]),
|
||||
"date" => strtotime($podcast["publishDate"]["isoString"]),
|
||||
"duration" => null,
|
||||
"url" => "https://open.spotify.com/show/" . $uri,
|
||||
"stream" => [
|
||||
"endpoint" => "spotify",
|
||||
"url" => "episode." . $uri
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
// get episodes (and place them in podcasts)
|
||||
foreach($payload["data"]["searchV2"]["episodes"]["items"] as $podcast){
|
||||
|
||||
if(isset($podcast["data"])){
|
||||
|
||||
$podcast = $podcast["data"];
|
||||
}
|
||||
|
||||
$out["podcast"][] = [
|
||||
"title" => $podcast["name"],
|
||||
"description" => $this->limitstrlen($podcast["description"]),
|
||||
"author" => [
|
||||
"name" =>
|
||||
isset(
|
||||
$podcast["podcastV2"]["data"]["publisher"]["name"]
|
||||
) ?
|
||||
$podcast["podcastV2"]["data"]["publisher"]["name"]
|
||||
: null,
|
||||
"url" => null,
|
||||
"avatar" => null
|
||||
],
|
||||
"thumb" => $this->get_thumb($podcast["coverArt"]),
|
||||
"date" => strtotime($podcast["releaseDate"]["isoString"]),
|
||||
"duration" => $podcast["duration"]["totalMilliseconds"] / 1000,
|
||||
"url" =>
|
||||
"https://open.spotify.com/show/" .
|
||||
explode(
|
||||
":",
|
||||
$podcast["uri"],
|
||||
3
|
||||
)[2],
|
||||
"stream" => [
|
||||
"endpoint" => null,
|
||||
"url" => null
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
// get authors
|
||||
foreach($payload["data"]["searchV2"]["artists"]["items"] as $user){
|
||||
|
||||
if(isset($user["data"])){
|
||||
|
||||
$user = $user["data"];
|
||||
}
|
||||
|
||||
$avatar = $this->get_thumb($user["visuals"]["avatarImage"]);
|
||||
|
||||
$out["author"][] = [
|
||||
"title" =>
|
||||
(
|
||||
$user["profile"]["verified"] === true ?
|
||||
"✓ " : ""
|
||||
) .
|
||||
$user["profile"]["name"],
|
||||
"followers" => null,
|
||||
"description" => null,
|
||||
"thumb" => $avatar,
|
||||
"url" =>
|
||||
"https://open.spotify.com/artist/" .
|
||||
explode(
|
||||
":",
|
||||
$user["uri"],
|
||||
3
|
||||
)[2]
|
||||
];
|
||||
}
|
||||
|
||||
// get users
|
||||
foreach($payload["data"]["searchV2"]["users"]["items"] as $user){
|
||||
|
||||
if(isset($user["data"])){
|
||||
|
||||
$user = $user["data"];
|
||||
}
|
||||
|
||||
$avatar = $this->get_thumb($user["avatar"]);
|
||||
|
||||
$out["user"][] = [
|
||||
"title" => $user["displayName"] . " (@{$user["id"]})",
|
||||
"followers" => null,
|
||||
"description" => null,
|
||||
"thumb" => $avatar,
|
||||
"url" => "https://open.spotify.com/user/" . $user["id"]
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function get_artists($artists){
|
||||
|
||||
$artist_out = [];
|
||||
|
||||
foreach($artists["items"] as $artist){
|
||||
|
||||
$artist_out[] = $artist["profile"]["name"];
|
||||
}
|
||||
|
||||
$artist_out =
|
||||
implode(", ", $artist_out);
|
||||
|
||||
if($artist_out == ""){
|
||||
|
||||
return [null, null];
|
||||
}
|
||||
|
||||
$artist_link =
|
||||
$artist === null ?
|
||||
null :
|
||||
"https://open.spotify.com/artist/" .
|
||||
explode(
|
||||
":",
|
||||
$artists["items"][0]["uri"]
|
||||
)[2];
|
||||
|
||||
return [$artist_out, $artist_link];
|
||||
}
|
||||
|
||||
private function get_thumb($cover){
|
||||
|
||||
$thumb_out = null;
|
||||
|
||||
if($cover !== null){
|
||||
foreach($cover["sources"] as $thumb){
|
||||
|
||||
if(
|
||||
$thumb_out === null ||
|
||||
(int)$thumb["width"] > $thumb_out["width"]
|
||||
){
|
||||
|
||||
$thumb_out = $thumb;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if($thumb_out === null){
|
||||
|
||||
return [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
];
|
||||
}else{
|
||||
|
||||
return [
|
||||
"url" => $thumb_out["url"],
|
||||
"ratio" => "1:1"
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
private function limitstrlen($text){
|
||||
|
||||
return
|
||||
explode(
|
||||
"\n",
|
||||
wordwrap(
|
||||
str_replace(
|
||||
["\n\r", "\r\n", "\n", "\r"],
|
||||
" ",
|
||||
$text
|
||||
),
|
||||
300,
|
||||
"\n"
|
||||
),
|
||||
2
|
||||
)[0];
|
||||
}
|
||||
}
|
||||
1579
scraper/startpage.php
Normal file
1579
scraper/startpage.php
Normal file
File diff suppressed because it is too large
Load diff
257
scraper/vsco.php
Normal file
257
scraper/vsco.php
Normal file
|
|
@ -0,0 +1,257 @@
|
|||
<?php
|
||||
|
||||
class vsco{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("vsco");
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = [], $bearer = null){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get_tmp = http_build_query($get);
|
||||
$url .= "?" . $get_tmp;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
|
||||
if($bearer === null){
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"DNT: 1",
|
||||
"Sec-GPC: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: same-origin",
|
||||
"Sec-Fetch-User: ?1",
|
||||
"Priority: u=0, i",
|
||||
"TE: trailers"]
|
||||
);
|
||||
}else{
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: */*",
|
||||
"Accept-Language: en-US",
|
||||
"Accept-Encoding: gzip",
|
||||
"Referer: https://vsco.co/search/images/" . urlencode($get["query"]),
|
||||
"authorization: Bearer " . $bearer,
|
||||
"content-type: application/json",
|
||||
"x-client-build: 1",
|
||||
"x-client-platform: web",
|
||||
"DNT: 1",
|
||||
"Sec-GPC: 1",
|
||||
"Connection: keep-alive",
|
||||
"Sec-Fetch-Dest: empty",
|
||||
"Sec-Fetch-Mode: cors",
|
||||
"Sec-Fetch-Site: same-origin",
|
||||
"Priority: u=0",
|
||||
"TE: trailers"]
|
||||
);
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
// http2 bypass
|
||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function image($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$data, $proxy] =
|
||||
$this->backend->get(
|
||||
$get["npt"], "images"
|
||||
);
|
||||
|
||||
$data = json_decode($data, true);
|
||||
|
||||
}else{
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
|
||||
// get bearer token
|
||||
try{
|
||||
|
||||
$html =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://vsco.co/feed"
|
||||
);
|
||||
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch feed page");
|
||||
}
|
||||
|
||||
preg_match(
|
||||
'/"tkn":"([A-z0-9]+)"/',
|
||||
$html,
|
||||
$bearer
|
||||
);
|
||||
|
||||
if(!isset($bearer[1])){
|
||||
|
||||
throw new Exception("Failed to grep bearer token");
|
||||
}
|
||||
|
||||
$data = [
|
||||
"pagination" => [
|
||||
"query" => $search,
|
||||
"page" => 0,
|
||||
"size" => 100
|
||||
],
|
||||
"bearer" => $bearer[1]
|
||||
];
|
||||
}
|
||||
|
||||
try{
|
||||
|
||||
$json =
|
||||
$this->get(
|
||||
$proxy,
|
||||
"https://vsco.co/api/2.0/search/images",
|
||||
$data["pagination"],
|
||||
$data["bearer"]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch JSON");
|
||||
}
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"image" => []
|
||||
];
|
||||
|
||||
if(!isset($json["results"])){
|
||||
|
||||
throw new Exception("Failed to access results object");
|
||||
}
|
||||
|
||||
foreach($json["results"] as $image){
|
||||
|
||||
$image_domain = parse_url("https://" . $image["responsive_url"], PHP_URL_HOST);
|
||||
$thumbnail = explode($image_domain, $image["responsive_url"], 2)[1];
|
||||
|
||||
if(substr($thumbnail, 0, 3) != "/1/"){
|
||||
|
||||
$thumbnail =
|
||||
preg_replace(
|
||||
'/^\/[^\/]+/',
|
||||
"",
|
||||
$thumbnail
|
||||
);
|
||||
}
|
||||
|
||||
$thumbnail = "https://img.vsco.co/cdn-cgi/image/width=480,height=360" . $thumbnail;
|
||||
$size =
|
||||
$this->image_ratio(
|
||||
(int)$image["dimensions"]["width"],
|
||||
(int)$image["dimensions"]["height"]
|
||||
);
|
||||
|
||||
$out["image"][] = [
|
||||
"title" => $image["description"],
|
||||
"source" => [
|
||||
[
|
||||
"url" => "https://" . $image["responsive_url"],
|
||||
"width" => (int)$image["dimensions"]["width"],
|
||||
"height" => (int)$image["dimensions"]["height"]
|
||||
],
|
||||
[
|
||||
"url" => $thumbnail,
|
||||
"width" => $size[0],
|
||||
"height" => $size[1]
|
||||
]
|
||||
],
|
||||
"url" => "https://" . $image["grid"]["domain"] . "/media/" . $image["imageId"]
|
||||
];
|
||||
}
|
||||
|
||||
// get NPT
|
||||
$max_page = ceil($json["total"] / 100);
|
||||
$data["pagination"]["page"]++;
|
||||
|
||||
if($max_page > $data["pagination"]["page"]){
|
||||
|
||||
$out["npt"] =
|
||||
$this->backend->store(
|
||||
json_encode($data),
|
||||
"images",
|
||||
$proxy
|
||||
);
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function image_ratio($width, $height){
|
||||
|
||||
$ratio = [
|
||||
480 / $width,
|
||||
360 / $height
|
||||
];
|
||||
|
||||
if($ratio[0] < $ratio[1]){
|
||||
|
||||
$ratio = $ratio[0];
|
||||
}else{
|
||||
|
||||
$ratio = $ratio[1];
|
||||
}
|
||||
|
||||
return [
|
||||
floor($width * $ratio),
|
||||
floor($height * $ratio)
|
||||
];
|
||||
}
|
||||
}
|
||||
246
scraper/wiby.php
Normal file
246
scraper/wiby.php
Normal file
|
|
@ -0,0 +1,246 @@
|
|||
<?php
|
||||
|
||||
class wiby{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("wiby");
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
if($page != "web"){
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
return [
|
||||
"nsfw" => [
|
||||
"display" => "NSFW",
|
||||
"option" => [
|
||||
"yes" => "Yes",
|
||||
"no" => "No"
|
||||
]
|
||||
],
|
||||
"date" => [
|
||||
"display" => "Time posted",
|
||||
"option" => [
|
||||
"any" => "Any time",
|
||||
"day" => "Past day",
|
||||
"week" => "Past week",
|
||||
"month" => "Past month",
|
||||
"year" => "Past year",
|
||||
]
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = [], $nsfw){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip",
|
||||
"Cookie: ws={$nsfw}",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Upgrade-Insecure-Requests: 1",
|
||||
"Sec-Fetch-Dest: document",
|
||||
"Sec-Fetch-Mode: navigate",
|
||||
"Sec-Fetch-Site: none",
|
||||
"Sec-Fetch-User: ?1"]
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
public function web($get){
|
||||
|
||||
if($get["npt"]){
|
||||
|
||||
[$q, $proxy] = $this->backend->get($get["npt"], "web");
|
||||
$q = json_decode($q, true);
|
||||
|
||||
$nsfw = $q["nsfw"];
|
||||
unset($q["nsfw"]);
|
||||
}else{
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$proxy = $this->backend->get_ip();
|
||||
$date = $get["date"];
|
||||
$nsfw = $get["nsfw"] == "yes" ? "0" : "1";
|
||||
|
||||
$search =
|
||||
str_replace(
|
||||
[
|
||||
"!g",
|
||||
"!gi",
|
||||
"!gv",
|
||||
"!gm",
|
||||
"!b",
|
||||
"!bi",
|
||||
"!bv",
|
||||
"!bm",
|
||||
"!td",
|
||||
"!tw",
|
||||
"!tm",
|
||||
"!ty",
|
||||
"&g",
|
||||
"&gi",
|
||||
"&gv",
|
||||
"&gm",
|
||||
"&b",
|
||||
"&bi",
|
||||
"&bv",
|
||||
"&bm",
|
||||
"&td",
|
||||
"&tw",
|
||||
"&tm",
|
||||
"&ty",
|
||||
],
|
||||
"",
|
||||
$search
|
||||
);
|
||||
|
||||
switch($date){
|
||||
|
||||
case "day": $search = "!td " . $search; break;
|
||||
case "week": $search = "!tw " . $search; break;
|
||||
case "month": $search = "!tm " . $search; break;
|
||||
case "year": $search = "!ty " . $search; break;
|
||||
}
|
||||
|
||||
$q = [
|
||||
"q" => $search
|
||||
];
|
||||
}
|
||||
|
||||
try{
|
||||
$html = $this->get(
|
||||
$proxy,
|
||||
"https://wiby.me/",
|
||||
$q,
|
||||
$nsfw
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch search page");
|
||||
}
|
||||
|
||||
preg_match(
|
||||
'/<p class="pin"><blockquote>(?:<\/p>)?<br><a class="more" href="\/\?q=[^"]+&p=([0-9]+)">Find more\.\.\.<\/a><\/blockquote>/',
|
||||
$html,
|
||||
$nextpage
|
||||
);
|
||||
|
||||
if(count($nextpage) === 0){
|
||||
|
||||
$nextpage = null;
|
||||
}else{
|
||||
|
||||
$nextpage =
|
||||
$this->backend->store(
|
||||
json_encode([
|
||||
"q" => $q["q"],
|
||||
"p" => (int)$nextpage[1],
|
||||
"nsfw" => $nsfw
|
||||
]),
|
||||
"web",
|
||||
$proxy
|
||||
);
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => $nextpage,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
preg_match_all(
|
||||
'/<blockquote>[\s]*<a .* href="(.*)">(.*)<\/a>.*<p>(.*)<\/p>[\s]*<\/blockquote>/Ui',
|
||||
$html,
|
||||
$links
|
||||
);
|
||||
|
||||
for($i=0; $i<count($links[0]); $i++){
|
||||
|
||||
$out["web"][] = [
|
||||
"title" => $this->unescapehtml(trim($links[2][$i])),
|
||||
"description" => $this->unescapehtml(trim(strip_tags($links[3][$i]), ".\n\r ")),
|
||||
"url" => trim($links[1][$i]),
|
||||
"date" => null,
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => [],
|
||||
"table" => []
|
||||
];
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
private function unescapehtml($str){
|
||||
|
||||
return html_entity_decode(
|
||||
str_replace(
|
||||
[
|
||||
"<br>",
|
||||
"<br/>",
|
||||
"</br>",
|
||||
"<BR>",
|
||||
"<BR/>",
|
||||
"</BR>",
|
||||
],
|
||||
"\n",
|
||||
$str
|
||||
),
|
||||
ENT_QUOTES | ENT_XML1, 'UTF-8'
|
||||
);
|
||||
}
|
||||
}
|
||||
1170
scraper/yandex.php
Normal file
1170
scraper/yandex.php
Normal file
File diff suppressed because it is too large
Load diff
741
scraper/yep.php
Normal file
741
scraper/yep.php
Normal file
|
|
@ -0,0 +1,741 @@
|
|||
<?php
|
||||
|
||||
class yep{
|
||||
|
||||
public function __construct(){
|
||||
|
||||
include "lib/backend.php";
|
||||
$this->backend = new backend("yep");
|
||||
|
||||
include "lib/fuckhtml.php";
|
||||
$this->fuckhtml = new fuckhtml();
|
||||
}
|
||||
|
||||
public function getfilters($page){
|
||||
|
||||
return [
|
||||
"country" => [
|
||||
"display" => "Country",
|
||||
"option" => [
|
||||
"all" => "All regions",
|
||||
"af" => "Afghanistan",
|
||||
"al" => "Albania",
|
||||
"dz" => "Algeria",
|
||||
"as" => "American Samoa",
|
||||
"ad" => "Andorra",
|
||||
"ao" => "Angola",
|
||||
"ai" => "Anguilla",
|
||||
"ag" => "Antigua and Barbuda",
|
||||
"ar" => "Argentina",
|
||||
"am" => "Armenia",
|
||||
"aw" => "Aruba",
|
||||
"au" => "Australia",
|
||||
"at" => "Austria",
|
||||
"az" => "Azerbaijan",
|
||||
"bs" => "Bahamas",
|
||||
"bh" => "Bahrain",
|
||||
"bd" => "Bangladesh",
|
||||
"bb" => "Barbados",
|
||||
"by" => "Belarus",
|
||||
"be" => "Belgium",
|
||||
"bz" => "Belize",
|
||||
"bj" => "Benin",
|
||||
"bt" => "Bhutan",
|
||||
"bo" => "Bolivia",
|
||||
"ba" => "Bosnia and Herzegovina",
|
||||
"bw" => "Botswana",
|
||||
"br" => "Brazil",
|
||||
"bn" => "Brunei Darussalam",
|
||||
"bg" => "Bulgaria",
|
||||
"bf" => "Burkina Faso",
|
||||
"bi" => "Burundi",
|
||||
"cv" => "Cabo Verde",
|
||||
"kh" => "Cambodia",
|
||||
"cm" => "Cameroon",
|
||||
"ca" => "Canada",
|
||||
"ky" => "Cayman Islands",
|
||||
"cf" => "Central African Republic",
|
||||
"td" => "Chad",
|
||||
"cl" => "Chile",
|
||||
"cn" => "China",
|
||||
"co" => "Colombia",
|
||||
"cg" => "Congo",
|
||||
"cd" => "Congo, Democratic Republic",
|
||||
"ck" => "Cook Islands",
|
||||
"cr" => "Costa Rica",
|
||||
"hr" => "Croatia",
|
||||
"cu" => "Cuba",
|
||||
"cy" => "Cyprus",
|
||||
"cz" => "Czechia",
|
||||
"ci" => "Côte d'Ivoire",
|
||||
"dk" => "Denmark",
|
||||
"dj" => "Djibouti",
|
||||
"dm" => "Dominica",
|
||||
"do" => "Dominican Republic",
|
||||
"ec" => "Ecuador",
|
||||
"eg" => "Egypt",
|
||||
"sv" => "El Salvador",
|
||||
"gq" => "Equatorial Guinea",
|
||||
"ee" => "Estonia",
|
||||
"et" => "Ethiopia",
|
||||
"fo" => "Faroe Islands",
|
||||
"fj" => "Fiji",
|
||||
"fi" => "Finland",
|
||||
"fr" => "France",
|
||||
"gf" => "French Guiana",
|
||||
"pf" => "French Polynesia",
|
||||
"ga" => "Gabon",
|
||||
"gm" => "Gambia",
|
||||
"ge" => "Georgia",
|
||||
"de" => "Germany",
|
||||
"gh" => "Ghana",
|
||||
"gi" => "Gibraltar",
|
||||
"gr" => "Greece",
|
||||
"gl" => "Greenland",
|
||||
"gd" => "Grenada",
|
||||
"gp" => "Guadeloupe",
|
||||
"gu" => "Guam",
|
||||
"gt" => "Guatemala",
|
||||
"gg" => "Guernsey",
|
||||
"gn" => "Guinea",
|
||||
"gy" => "Guyana",
|
||||
"ht" => "Haiti",
|
||||
"hn" => "Honduras",
|
||||
"hk" => "Hong Kong",
|
||||
"hu" => "Hungary",
|
||||
"is" => "Iceland",
|
||||
"in" => "India",
|
||||
"id" => "Indonesia",
|
||||
"iq" => "Iraq",
|
||||
"ie" => "Ireland",
|
||||
"im" => "Isle of Man",
|
||||
"il" => "Israel",
|
||||
"it" => "Italy",
|
||||
"jm" => "Jamaica",
|
||||
"jp" => "Japan",
|
||||
"je" => "Jersey",
|
||||
"jo" => "Jordan",
|
||||
"kz" => "Kazakhstan",
|
||||
"ke" => "Kenya",
|
||||
"ki" => "Kiribati",
|
||||
"kw" => "Kuwait",
|
||||
"kg" => "Kyrgyzstan",
|
||||
"la" => "Lao People's Democratic Republic",
|
||||
"lv" => "Latvia",
|
||||
"lb" => "Lebanon",
|
||||
"ls" => "Lesotho",
|
||||
"ly" => "Libya",
|
||||
"li" => "Liechtenstein",
|
||||
"lt" => "Lithuania",
|
||||
"lu" => "Luxembourg",
|
||||
"mk" => "Macedonia",
|
||||
"mg" => "Madagascar",
|
||||
"mw" => "Malawi",
|
||||
"my" => "Malaysia",
|
||||
"mv" => "Maldives",
|
||||
"ml" => "Mali",
|
||||
"mt" => "Malta",
|
||||
"mq" => "Martinique",
|
||||
"mr" => "Mauritania",
|
||||
"mu" => "Mauritius",
|
||||
"yt" => "Mayotte",
|
||||
"mx" => "Mexico",
|
||||
"fm" => "Micronesia, Federated States of",
|
||||
"md" => "Moldova",
|
||||
"mc" => "Monaco",
|
||||
"mn" => "Mongolia",
|
||||
"me" => "Montenegro",
|
||||
"ms" => "Montserrat",
|
||||
"ma" => "Morocco",
|
||||
"mz" => "Mozambique",
|
||||
"mm" => "Myanmar",
|
||||
"na" => "Namibia",
|
||||
"nr" => "Nauru",
|
||||
"np" => "Nepal",
|
||||
"nl" => "Netherlands",
|
||||
"nc" => "New Caledonia",
|
||||
"nz" => "New Zealand",
|
||||
"ni" => "Nicaragua",
|
||||
"ne" => "Niger",
|
||||
"ng" => "Nigeria",
|
||||
"nu" => "Niue",
|
||||
"no" => "Norway",
|
||||
"om" => "Oman",
|
||||
"pk" => "Pakistan",
|
||||
"ps" => "Palestine, State of",
|
||||
"pa" => "Panama",
|
||||
"pg" => "Papua New Guinea",
|
||||
"py" => "Paraguay",
|
||||
"pe" => "Peru",
|
||||
"ph" => "Philippines",
|
||||
"pn" => "Pitcairn",
|
||||
"pl" => "Poland",
|
||||
"pt" => "Portugal",
|
||||
"pr" => "Puerto Rico",
|
||||
"qa" => "Qatar",
|
||||
"ro" => "Romania",
|
||||
"ru" => "Russian Federation",
|
||||
"rw" => "Rwanda",
|
||||
"re" => "Réunion",
|
||||
"sh" => "Saint Helena",
|
||||
"kn" => "Saint Kitts and Nevis",
|
||||
"lc" => "Saint Lucia",
|
||||
"vc" => "Saint Vincent and the Grenadines",
|
||||
"ws" => "Samoa",
|
||||
"sm" => "San Marino",
|
||||
"st" => "Sao Tome and Principe",
|
||||
"sa" => "Saudi Arabia",
|
||||
"sn" => "Senegal",
|
||||
"rs" => "Serbia",
|
||||
"sc" => "Seychelles",
|
||||
"sl" => "Sierra Leone",
|
||||
"sg" => "Singapore",
|
||||
"sk" => "Slovakia",
|
||||
"si" => "Slovenia",
|
||||
"sb" => "Solomon Islands",
|
||||
"so" => "Somalia",
|
||||
"kr" => "Sourth Korea",
|
||||
"za" => "South Africa",
|
||||
"es" => "Spain",
|
||||
"lk" => "Sri Lanka",
|
||||
"sr" => "Suriname",
|
||||
"se" => "Sweden",
|
||||
"ch" => "Switzerland",
|
||||
"tw" => "Taiwan",
|
||||
"tj" => "Tajikistan",
|
||||
"tz" => "Tanzania",
|
||||
"th" => "Thailand",
|
||||
"tl" => "Timor-Leste",
|
||||
"tg" => "Togo",
|
||||
"tk" => "Tokelau",
|
||||
"to" => "Tonga",
|
||||
"tt" => "Trinidad and Tobago",
|
||||
"tn" => "Tunisia",
|
||||
"tr" => "Turkey",
|
||||
"tm" => "Turkmenistan",
|
||||
"ug" => "Uganda",
|
||||
"ua" => "Ukraine",
|
||||
"ae" => "United Arab Emirates",
|
||||
"gb" => "United Kingdom",
|
||||
"us" => "United States",
|
||||
"uy" => "Uruguay",
|
||||
"uz" => "Uzbekistan",
|
||||
"vu" => "Vanuatu",
|
||||
"ve" => "Venezuela",
|
||||
"vn" => "Vietnam",
|
||||
"vg" => "Virgin Islands, British",
|
||||
"vi" => "Virgin Islands, U.S.",
|
||||
"ye" => "Yemen",
|
||||
"zm" => "Zambia",
|
||||
"zw" => "Zimbabwe"
|
||||
]
|
||||
],
|
||||
"nsfw" => [
|
||||
"display" => "NSFW",
|
||||
"option" => [
|
||||
"yes" => "Yes",
|
||||
"maybe" => "Maybe",
|
||||
"no" => "No"
|
||||
]
|
||||
]
|
||||
];
|
||||
}
|
||||
|
||||
private function get($proxy, $url, $get = []){
|
||||
|
||||
$curlproc = curl_init();
|
||||
|
||||
if($get !== []){
|
||||
$get = http_build_query($get);
|
||||
$url .= "?" . $get;
|
||||
}
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_URL, $url);
|
||||
|
||||
// use http2
|
||||
curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
|
||||
|
||||
// set ciphers
|
||||
curl_setopt(
|
||||
$curlproc,
|
||||
CURLOPT_SSL_CIPHER_LIST,
|
||||
"aes_128_gcm_sha_256,chacha20_poly1305_sha_256,aes_256_gcm_sha_384,ecdhe_ecdsa_aes_128_gcm_sha_256,ecdhe_rsa_aes_128_gcm_sha_256,ecdhe_ecdsa_chacha20_poly1305_sha_256,ecdhe_rsa_chacha20_poly1305_sha_256,ecdhe_ecdsa_aes_256_gcm_sha_384,ecdhe_rsa_aes_256_gcm_sha_384,ecdhe_ecdsa_aes_256_sha,ecdhe_ecdsa_aes_128_sha,ecdhe_rsa_aes_128_sha,ecdhe_rsa_aes_256_sha,rsa_aes_128_gcm_sha_256,rsa_aes_256_gcm_sha_384,rsa_aes_128_sha,rsa_aes_256_sha"
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
|
||||
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
|
||||
["User-Agent: " . config::USER_AGENT,
|
||||
"Accept: */*",
|
||||
"Accept-Language: en-US,en;q=0.5",
|
||||
"Accept-Encoding: gzip, deflate, br, zstd",
|
||||
"Referer: https://yep.com/",
|
||||
"Origin: https://yep.com",
|
||||
"DNT: 1",
|
||||
"Connection: keep-alive",
|
||||
"Sec-Fetch-Dest: empty",
|
||||
"Sec-Fetch-Mode: cors",
|
||||
"Sec-Fetch-Site: same-site",
|
||||
"Priority: u=4",
|
||||
"TE: trailers"]
|
||||
);
|
||||
|
||||
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
|
||||
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
|
||||
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
|
||||
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$this->backend->assign_proxy($curlproc, $proxy);
|
||||
|
||||
$data = curl_exec($curlproc);
|
||||
|
||||
if(curl_errno($curlproc)){
|
||||
|
||||
throw new Exception(curl_error($curlproc));
|
||||
}
|
||||
|
||||
curl_close($curlproc);
|
||||
return $data;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public function web($get){
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$country = $get["country"];
|
||||
$nsfw = $get["nsfw"];
|
||||
|
||||
switch($nsfw){
|
||||
|
||||
case "yes": $nsfw = "off"; break;
|
||||
case "maybe": $nsfw = "moderate"; break;
|
||||
case "no": $nsfw = "strict"; break;
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"spelling" => [
|
||||
"type" => "no_correction",
|
||||
"using" => null,
|
||||
"correction" => null
|
||||
],
|
||||
"npt" => null,
|
||||
"answer" => [],
|
||||
"web" => [],
|
||||
"image" => [],
|
||||
"video" => [],
|
||||
"news" => [],
|
||||
"related" => []
|
||||
];
|
||||
|
||||
try{
|
||||
|
||||
// https://api.yep.com/fs/2/search?client=web&gl=CA&no_correct=false&q=undefined+variable+javascript&safeSearch=off&type=web
|
||||
$json =
|
||||
$this->get(
|
||||
$this->backend->get_ip(),
|
||||
"https://api.yep.com/fs/2/search",
|
||||
[
|
||||
"client" => "web",
|
||||
"gl" => $country == "all" ? $country : strtoupper($country),
|
||||
"limit" => "99999",
|
||||
"no_correct" => "false",
|
||||
"q" => $search,
|
||||
"safeSearch" => $nsfw,
|
||||
"type" => "web"
|
||||
]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch JSON");
|
||||
}
|
||||
|
||||
$this->detect_cf($json);
|
||||
|
||||
$json = json_decode($json, true);
|
||||
//$json = json_decode(file_get_contents("scraper/yep.json"), true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
if(isset($json[1]["correction"])){
|
||||
|
||||
$out["spelling"] = [
|
||||
"type" => "not_many",
|
||||
"using" => $search,
|
||||
"correction" => $json[1]["correction"][1]
|
||||
];
|
||||
}
|
||||
|
||||
if(isset($json[1]["results"])){
|
||||
foreach($json[1]["results"] as $item){
|
||||
|
||||
switch(strtolower($item["type"])){
|
||||
|
||||
case "organic":
|
||||
$sublinks = [];
|
||||
|
||||
if(isset($item["sitelinks"]["full"])){
|
||||
|
||||
foreach($item["sitelinks"]["full"] as $link){
|
||||
|
||||
$sublinks[] = [
|
||||
"title" => $link["title"],
|
||||
"date" => null,
|
||||
"description" =>
|
||||
$this->titledots(
|
||||
strip_tags(
|
||||
html_entity_decode(
|
||||
$link["snippet"]
|
||||
)
|
||||
)
|
||||
),
|
||||
"url" => $link["url"]
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
$out["web"][] = [
|
||||
"title" => $item["title"],
|
||||
"description" =>
|
||||
$this->titledots(
|
||||
strip_tags(
|
||||
html_entity_decode(
|
||||
$item["snippet"]
|
||||
)
|
||||
)
|
||||
),
|
||||
"url" => $item["url"],
|
||||
"date" => strtotime($item["first_seen"]),
|
||||
"type" => "web",
|
||||
"thumb" => [
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"sublink" => $sublinks,
|
||||
"table" => []
|
||||
];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(isset($json[1]["featured_news"])){
|
||||
|
||||
foreach($json[1]["featured_news"] as $news){
|
||||
|
||||
$out["news"][] = [
|
||||
"title" => $news["title"],
|
||||
"description" =>
|
||||
$this->titledots(
|
||||
strip_tags(
|
||||
html_entity_decode(
|
||||
$news["snippet"]
|
||||
)
|
||||
)
|
||||
),
|
||||
"date" => strtotime($news["first_seen"]),
|
||||
"thumb" =>
|
||||
isset($news["img"]) ?
|
||||
[
|
||||
"url" => $this->unshiturl($news["img"]),
|
||||
"ratio" => "16:9"
|
||||
] :
|
||||
[
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"url" => $news["url"]
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
if(isset($json[1]["featured_images"])){
|
||||
|
||||
foreach($json[1]["featured_images"] as $image){
|
||||
|
||||
if(
|
||||
$image["width"] !== 0 &&
|
||||
$image["height"] !== 0
|
||||
){
|
||||
|
||||
$thumb_width = $image["width"] >= 260 ? 260 : $image["width"];
|
||||
$thumb_height = ceil($image["height"] * ($thumb_width / $image["width"]));
|
||||
|
||||
$width = $image["width"];
|
||||
$height = $image["height"];
|
||||
}else{
|
||||
|
||||
$thumb_width = null;
|
||||
$thumb_height = null;
|
||||
$width = null;
|
||||
$height = null;
|
||||
}
|
||||
|
||||
$out["image"][] = [
|
||||
"title" => $image["title"],
|
||||
"source" => [
|
||||
[
|
||||
"url" => $image["image_id"],
|
||||
"width" => $width,
|
||||
"height" => $height
|
||||
],
|
||||
[
|
||||
"url" => $image["src"],
|
||||
"width" => $thumb_width,
|
||||
"height" => $thumb_height
|
||||
]
|
||||
],
|
||||
"url" => $image["host_page"]
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public function image($get){
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$country = $get["country"];
|
||||
$nsfw = $get["nsfw"];
|
||||
|
||||
switch($nsfw){
|
||||
|
||||
case "yes": $nsfw = "off"; break;
|
||||
case "maybe": $nsfw = "moderate"; break;
|
||||
case "no": $nsfw = "strict"; break;
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"image" => []
|
||||
];
|
||||
|
||||
try{
|
||||
|
||||
$json =
|
||||
$this->get(
|
||||
$this->backend->get_ip(), // no nextpage!
|
||||
"https://api.yep.com/fs/2/search",
|
||||
[
|
||||
"client" => "web",
|
||||
"gl" => $country == "all" ? $country : strtoupper($country),
|
||||
"no_correct" => "false",
|
||||
"q" => $search,
|
||||
"safeSearch" => $nsfw,
|
||||
"type" => "images"
|
||||
]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch JSON");
|
||||
}
|
||||
|
||||
$this->detect_cf($json);
|
||||
|
||||
$json = json_decode($json, true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
if(isset($json[1]["results"])){
|
||||
foreach($json[1]["results"] as $item){
|
||||
|
||||
if(
|
||||
$item["width"] !== 0 &&
|
||||
$item["height"] !== 0
|
||||
){
|
||||
|
||||
$thumb_width = $item["width"] >= 260 ? 260 : $item["width"];
|
||||
$thumb_height = ceil($item["height"] * ($thumb_width / $item["width"]));
|
||||
|
||||
$width = $item["width"];
|
||||
$height = $item["height"];
|
||||
}else{
|
||||
|
||||
$thumb_width = null;
|
||||
$thumb_height = null;
|
||||
$width = null;
|
||||
$height = null;
|
||||
}
|
||||
|
||||
$out["image"][] = [
|
||||
"title" => $item["title"],
|
||||
"source" => [
|
||||
[
|
||||
"url" => $item["image_id"],
|
||||
"width" => $width,
|
||||
"height" => $height
|
||||
],
|
||||
[
|
||||
"url" => $item["src"],
|
||||
"width" => $thumb_width,
|
||||
"height" => $thumb_height
|
||||
]
|
||||
],
|
||||
"url" => $item["host_page"]
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
|
||||
public function news($get){
|
||||
|
||||
$search = $get["s"];
|
||||
if(strlen($search) === 0){
|
||||
|
||||
throw new Exception("Search term is empty!");
|
||||
}
|
||||
|
||||
$country = $get["country"];
|
||||
$nsfw = $get["nsfw"];
|
||||
|
||||
switch($nsfw){
|
||||
|
||||
case "yes": $nsfw = "off"; break;
|
||||
case "maybe": $nsfw = "moderate"; break;
|
||||
case "no": $nsfw = "strict"; break;
|
||||
}
|
||||
|
||||
$out = [
|
||||
"status" => "ok",
|
||||
"npt" => null,
|
||||
"news" => []
|
||||
];
|
||||
|
||||
try{
|
||||
|
||||
// https://api.yep.com/fs/2/search?client=web&gl=CA&no_correct=false&q=undefined+variable+javascript&safeSearch=off&type=web
|
||||
$json =
|
||||
$this->get(
|
||||
$this->backend->get_ip(),
|
||||
"https://api.yep.com/fs/2/search",
|
||||
[
|
||||
"client" => "web",
|
||||
"gl" => $country == "all" ? $country : strtoupper($country),
|
||||
"limit" => "99999",
|
||||
"no_correct" => "false",
|
||||
"q" => $search,
|
||||
"safeSearch" => $nsfw,
|
||||
"type" => "news"
|
||||
]
|
||||
);
|
||||
}catch(Exception $error){
|
||||
|
||||
throw new Exception("Failed to fetch JSON");
|
||||
}
|
||||
|
||||
$this->detect_cf($json);
|
||||
|
||||
$json = json_decode($json, true);
|
||||
//$json = json_decode(file_get_contents("scraper/yep.json"), true);
|
||||
|
||||
if($json === null){
|
||||
|
||||
throw new Exception("Failed to decode JSON");
|
||||
}
|
||||
|
||||
if(isset($json[1]["results"])){
|
||||
foreach($json[1]["results"] as $item){
|
||||
|
||||
$out["news"][] = [
|
||||
"title" => $item["title"],
|
||||
"author" => null,
|
||||
"description" =>
|
||||
$this->titledots(
|
||||
strip_tags(
|
||||
html_entity_decode(
|
||||
$item["snippet"]
|
||||
)
|
||||
)
|
||||
),
|
||||
"date" => strtotime($item["first_seen"]),
|
||||
"thumb" =>
|
||||
isset($item["img"]) ?
|
||||
[
|
||||
"url" => $this->unshiturl($item["img"]),
|
||||
"ratio" => "16:9"
|
||||
] :
|
||||
[
|
||||
"url" => null,
|
||||
"ratio" => null
|
||||
],
|
||||
"url" => $item["url"]
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
|
||||
private function detect_cf($payload){
|
||||
|
||||
// detect cloudflare page
|
||||
$this->fuckhtml->load($payload);
|
||||
|
||||
if(
|
||||
count(
|
||||
$this->fuckhtml
|
||||
->getElementsByClassName(
|
||||
"cf-wrapper",
|
||||
"div"
|
||||
)
|
||||
) !== 0
|
||||
){
|
||||
|
||||
throw new Exception("Blocked by Cloudflare. Please follow curl-impersonate installation instructions");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private function titledots($title){
|
||||
|
||||
$substr = substr($title, -4);
|
||||
|
||||
if(
|
||||
strpos($substr, "...") !== false ||
|
||||
strpos($substr, "…") !== false
|
||||
){
|
||||
|
||||
return trim(substr($title, 0, -4));
|
||||
}
|
||||
|
||||
return trim($title);
|
||||
}
|
||||
|
||||
private function unshiturl($url){
|
||||
|
||||
$newurl = parse_url($url, PHP_URL_QUERY);
|
||||
parse_str($newurl, $newurl);
|
||||
|
||||
if(isset($newurl["url"])){
|
||||
|
||||
return $newurl["url"];
|
||||
}
|
||||
|
||||
return $url;
|
||||
}
|
||||
}
|
||||
1727
scraper/yt.php
Normal file
1727
scraper/yt.php
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue