diff --git a/lib/bingcache-todo-fix.php b/lib/bingcache-todo-fix.php
index c52fbfd..e69de29 100644
--- a/lib/bingcache-todo-fix.php
+++ b/lib/bingcache-todo-fix.php
@@ -1,144 +0,0 @@
-
-
-new bingcache();
-
-class bingcache{
-
- public function __construct(){
-
- if(
- !isset($_GET["s"]) ||
- $this->validate_url($_GET["s"]) === false
- ){
-
- var_dump($this->validate_url($_GET["s"]));
- $this->do404("Please provide a valid URL.");
- }
-
- $url = $_GET["s"];
-
- $curlproc = curl_init();
-
- curl_setopt(
- $curlproc,
- CURLOPT_URL,
- "https://www.bing.com/search?q=url%3A" .
- urlencode($url)
- );
-
- curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
- curl_setopt(
- $curlproc,
- CURLOPT_HTTPHEADER,
- ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0",
- "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
- "Accept-Language: en-US,en;q=0.5",
- "Accept-Encoding: gzip",
- "DNT: 1",
- "Connection: keep-alive",
- "Upgrade-Insecure-Requests: 1",
- "Sec-Fetch-Dest: document",
- "Sec-Fetch-Mode: navigate",
- "Sec-Fetch-Site: none",
- "Sec-Fetch-User: ?1"]
- );
-
- curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
- curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
- curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
- curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 5);
-
- $data = curl_exec($curlproc);
-
- if(curl_errno($curlproc)){
-
- $this->do404("Failed to connect to bing servers. Please try again later.");
- }
-
- curl_close($curlproc);
-
- preg_match(
- '/
/',
- $data,
- $keys
- );
-
- print_r($keys);
-
- if(count($keys) === 0){
-
- $this->do404("Bing has not archived this URL.");
- }
-
- $keys = explode("|", $keys[1]);
- $count = count($keys);
-
- //header("Location: https://cc.bingj.com/cache.aspx?d=" . $keys[$count - 2] . "&w=" . $keys[$count - 1]);
- echo("Location: https://cc.bingj.com/cache.aspx?d=" . $keys[$count - 2] . "&w=" . $keys[$count - 1]);
- }
-
- public function do404($text){
-
- include "lib/frontend.php";
- $frontend = new frontend();
-
- echo
- $frontend->load(
- "error.html",
- [
- "title" => "Shit",
- "text" => $text
- ]
- );
-
- die();
- }
-
- public function validate_url($url){
-
- $url_parts = parse_url($url);
-
- // check if required parts are there
- if(
- !isset($url_parts["scheme"]) ||
- !(
- $url_parts["scheme"] == "http" ||
- $url_parts["scheme"] == "https"
- ) ||
- !isset($url_parts["host"])
- ){
- return false;
- }
-
- if(
- // if its not an RFC-valid URL
- !filter_var($url, FILTER_VALIDATE_URL)
- ){
- return false;
- }
-
- $ip =
- str_replace(
- ["[", "]"], // handle ipv6
- "",
- $url_parts["host"]
- );
-
- // if its not an IP
- if(!filter_var($ip, FILTER_VALIDATE_IP)){
-
- // resolve domain's IP
- $ip = gethostbyname($url_parts["host"] . ".");
- }
-
- // check if its localhost
- return filter_var(
- $ip,
- FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE
- );
- }
-}
diff --git a/scraper/coccoc.php b/scraper/coccoc.php
index 8baf371..5a045d1 100644
--- a/scraper/coccoc.php
+++ b/scraper/coccoc.php
@@ -164,6 +164,13 @@ class coccoc{
throw new Exception("Failed to decode JSON");
}
+ if(
+ isset($html["captcha"]) &&
+ (int)$html["captcha"] === 1
+ ){
+
+ throw new Exception("Coc Coc returned a Captcha");
+ }
if(!isset($html["search"]["search_results"])){
diff --git a/scraper/google.php b/scraper/google.php
index 4742971..83c4d01 100644
--- a/scraper/google.php
+++ b/scraper/google.php
@@ -705,7 +705,7 @@ class google{
}
- private function unshit_thumb($url){
+ private function unshit_thumb($url, $get_bigger_res = false){
// https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQINE2vbnNLHXqoZr3RVsaEJFyOsj1_BiBnJch-e1nyz3oia7Aj5xVj
// https://i.ytimg.com/vi/PZVIyA5ER3Y/mqdefault.jpg?sqp=-oaymwEFCJQBEFM&rs=AMzJL3nXeaCpdIar-ltNwl82Y82cIJfphA
@@ -714,7 +714,7 @@ class google{
if(
isset($parts["host"]) &&
preg_match(
- '/tbn.*\.gstatic\.com/',
+ '/(?:encrypted-)?tbn.*\.gstatic\.com/',
$parts["host"]
)
){
@@ -723,7 +723,26 @@ class google{
if(isset($params["q"])){
- return "https://" . $parts["host"] . "/images?q=" . $params["q"];
+ if($get_bigger_res){
+
+ // this method doesnt always work, but does work for wiki thumbnails
+ return
+ "https://" . $parts["host"] . "/images?q=tbn:" .
+ $this->base64url_encode(
+ substr(
+ $this->base64url_decode(
+ explode(
+ ":",
+ $params["q"])[1]
+ ),
+ 0,
+ 29
+ )
+ );
+ }else{
+
+ return "https://" . $parts["host"] . "/images?q=" . $params["q"];
+ }
}
}
@@ -1591,9 +1610,12 @@ class google{
if(count($img) !== 0){
$thumb =
- $this->fuckhtml
- ->getTextContent(
- $img[0]["attributes"]["src"]
+ $this->unshit_thumb(
+ $this->fuckhtml
+ ->getTextContent(
+ $img[0]["attributes"]["src"]
+ ),
+ true
);
}
@@ -2976,6 +2998,20 @@ class google{
return $time;
}
+ function base64url_decode($data){
+
+ $b64 = strtr($data, "-_", "+/");
+ $pad = strlen($b64) % 4;
+ if ($pad) $b64 .= str_repeat("=", 4 - $pad);
+
+ return base64_decode($b64);
+ }
+
+ function base64url_encode($data){
+
+ return rtrim(strtr(base64_encode($data), "+/", "-_"), "=");
+ }
+
private function detect_sorry(){
$captcha_form =