From 944f6c731b8fc16827ac0f31b7e02682966bceb8 Mon Sep 17 00:00:00 2001 From: vdbhb59 Date: Wed, 25 Feb 2026 12:48:38 +0530 Subject: [PATCH] Fix Marginalia (Sync) https://git.lolcat.ca/lolcat/4get/commit/c42e62cb09c9fc16f627c96b8b5e1cbc9bac6937 https://git.lolcat.ca/lolcat/4get/commit/cdf9164113a3cef01b4f26f6e02887a3e3c91267 --- scraper/marginalia.php | 91 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 79 insertions(+), 12 deletions(-) diff --git a/scraper/marginalia.php b/scraper/marginalia.php index 4753297..9a487a4 100644 --- a/scraper/marginalia.php +++ b/scraper/marginalia.php @@ -196,7 +196,7 @@ class marginalia{ return $data; } - public function web($get){ + public function web($get, $ss = ""){ $search = [$get["s"]]; if(strlen($get["s"]) === 0){ @@ -390,28 +390,35 @@ class marginalia{ } }else{ - $params = [ - "query" => $search - ]; - foreach(["adtech", "recent", "intitle"] as $v){ + if($ss == ""){ - if($get[$v] == "yes"){ + $params = [ + "query" => $search + ]; + + foreach(["adtech", "recent", "intitle"] as $v){ - switch($v){ + if($get[$v] == "yes"){ - case "adtech": $params["adtech"] = "reduce"; break; - case "recent": $params["recent"] = "recent"; break; - case "adtech": $params["searchTitle"] = "title"; break; + switch($v){ + + case "adtech": $params["adtech"] = "reduce"; break; + case "recent": $params["recent"] = "recent"; break; + case "adtech": $params["searchTitle"] = "title"; break; + } } } + }else{ + + $params = []; } try{ $html = $this->get( $proxy, - "https://old-search.marginalia.nu/search", + "https://old-search.marginalia.nu/search" . $ss, $params, //$anubis_key ); @@ -423,6 +430,66 @@ class marginalia{ $this->fuckhtml->load($html); + // detect meta redirect + $title = + $this->fuckhtml + ->getElementsByTagName( + "title" + ); + + if( + count($title) !== 0 && + $this->fuckhtml + ->getTextContent( + $title[0] + ) == "Error" + ){ + + // redirect detected + + // get timeout + $timeout = + $this->fuckhtml + ->getElementById( + "countdown", + "b" + ); + + if(count($timeout) === null){ + + throw new Exception("Failed to find timeout value"); + } + + $timeout = + $this->fuckhtml + ->getTextContent( + $timeout + ); + + preg_match( + '/location\.replace\(\'([^\']+)\'\)/', + $html, + $redirect + ); + + if(!isset($redirect[1])){ + + throw new Exception("Failed to grep redirect value"); + } + + $one = 1; + $redirect = + str_replace( + "/search", + "", + $redirect[1], + $one + ); + + sleep((int)$timeout); + return $this->web($get, $redirect); + } + $sections = $this->fuckhtml ->getElementsByClassName( @@ -576,4 +643,4 @@ class marginalia{ return $out; } -} \ No newline at end of file +}