From 1934057895b417fe827c2c290e0ecc4c5a28977b Mon Sep 17 00:00:00 2001 From: vdbhb59 Date: Sun, 3 Aug 2025 12:11:46 +0530 Subject: [PATCH] Synced (https://git.lolcat.ca/lolcat/4get/commit/336cb49d9824c07cf01d3f9b919ffa9c4f40ba92) Added Sepia search (https://git.lolcat.ca/lolcat/4get/commit/73b792289803be7fa9b78cfa4f42802fd6cc9146) (https://git.lolcat.ca/lolcat/4get/commit/7cf403e1257d862ba61f330607ae22b90066551b) (https://git.lolcat.ca/lolcat/4get/commit/5a0f5b868ac2f007c39e085f8c6f720ac70e8957) (https://git.lolcat.ca/lolcat/4get/commit/2c4dc7da84dbed4dbea21e8093af4e77b66b03d6) --- README.md | 44 ++-- data/config.php | 19 +- lib/frontend.php | 4 +- scraper/sepiasearch.php | 541 ++++++++++++++++++++++++++++++++++++++++ settings.php | 4 + 5 files changed, 583 insertions(+), 29 deletions(-) create mode 100644 scraper/sepiasearch.php diff --git a/README.md b/README.md index d714641..b69cb3c 100644 --- a/README.md +++ b/README.md @@ -24,31 +24,25 @@ tl;dr 4Get is the best way to browse for shit. # Supported websites -| Web | Images | Videos | News | Music | Autocompleter | -|------------|--------------|------------|------------|------------|---------------| -| DuckDuckGo | DuckDuckGo | YouTube | DuckDuckGo | Soundcloud | Brave | -| Brave | Brave | DuckDuckGo | Brave | | DuckDuckGo | -| Yandex | Yandex | Brave | Google | | Yandex | -| Google | Google | Yandex | Startpage | | Google | -| Startpage | Startpage | Google | Qwant | | Startpage | -| Qwant | Qwant | Startpage | Mojeek | | Kagi | -| Ghostery | Yep | Qwant | | | Qwant | -| Yep | Solofield | Solofield | | | Ghostery | -| Greppr | Pinterest | | | | Yep | -| Crowdview | 500px | | | | Marginalia | -| Mwmbl | VSCO | | | | YouTube | -| Mojeek | Imgur | | | | Soundcloud | -| Ghostery | Yep | Qwant | Baidu | | Qwant | -| Yep | Baidu | Baidu | | | Ghostery | -| Greppr | Pinterest | Coc Coc | | | Yep | -| Crowdview | 500px | | | | Marginalia | -| Mwmbl | VSCO | | | | YouTube | -| Mojeek | Imgur | | | | Soundcloud | -| Baidu | FindThatMeme | | | | | -| Coc Coc | | | | | | -| Marginalia | | | | | | -| wiby | | | | | | -| Curlie | | | | | | +| Web | Images | Videos | News | Music | Autocompleter | +|------------|--------------|--------------|------------|------------|---------------| +| DuckDuckGo | DuckDuckGo | YouTube | DuckDuckGo | Soundcloud | Brave | +| Brave | Brave | Sepia Search | Brave | | DuckDuckGo | +| Yandex | Yandex | DuckDuckGo | Google | | Yandex | +| Google | Google | Brave | Startpage | | Google | +| Startpage | Startpage | Yandex | Qwant | | Startpage | +| Qwant | Qwant | Google | Mojeek | | Kagi | +| Ghostery | Yep | Startpage | Baidu | | Qwant | +| Yep | Baidu | Qwant | | | Ghostery | +| Greppr | Pinterest | Baidu | | | Yep | +| Crowdview | 500px | Coc Coc | | | Marginalia | +| Mwmbl | VSCO | | | | YouTube | +| Mojeek | Imgur | | | | Soundcloud | +| Baidu | FindThatMeme | | | | | +| Coc Coc | | | | | | +| Marginalia | | | | | | +| wiby | | | | | | +| Curlie | | | | | | # Installation Refer to the documentation index. I recommend following the apache2 guide. diff --git a/data/config.php b/data/config.php index 8b5aa51..e142704 100644 --- a/data/config.php +++ b/data/config.php @@ -89,15 +89,27 @@ class config{ // To appear in the list of an instance, contact the host and if everyone added // eachother your serber should appear everywhere. const INSTANCES = [ - "https://4g.flossboxin.org.in", "https://4get.ca", - "https://4get.nadeko.net", + "https://4get.zzls.xyz", + "https://4getus.zzls.xyz", "https://4get.silly.computer", + "https://4get.konakona.moe", + "https://4get.lvkaszus.pl", + "https://4g.ggtyler.dev", + "https://4get.perennialte.ch", "https://4get.sijh.net", "https://4get.hbubli.cc", + "https://4get.plunked.party", + "https://4get.etenie.pl", "https://4get.lunar.icu", + "https://4get.dcs0.hu", + "https://4get.kizuki.lol", + "https://4get.psily.garden", + "https://search.milivojevic.in.rs", + "https://4get.snine.nl", + "https://4get.datura.network", "https://4get.neco.lol", - "https://4get.plunked.party/", + "https://4get.lol", "https://4get.ch", "https://4get.edmateo.site", "https://4get.sudovanilla.org", @@ -130,6 +142,7 @@ class config{ const PROXY_WIBY = false; const PROXY_CURLIE = false; const PROXY_YT = false; // youtube + const PROXY_SEPIASEARCH = false; const PROXY_YEP = false; const PROXY_PINTEREST = false; const PROXY_FIVEHPX = false; diff --git a/lib/frontend.php b/lib/frontend.php index 3f94861..9b272ae 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -951,6 +951,7 @@ class frontend{ "mojeek" => "Mojeek", "baidu" => "Baidu", "coccoc" => "Cốc Cốc", + //"sepiasearch" => "Sepia Search", //"solofield" => "Solofield", "marginalia" => "Marginalia", "wiby" => "wiby", @@ -989,6 +990,7 @@ class frontend{ "display" => "Scraper", "option" => [ "yt" => "YouTube", + "sepiasearch" => "Sepia Search", //"fb" => "Facebook videos", "ddg" => "DuckDuckGo", "brave" => "Brave", @@ -1339,7 +1341,7 @@ class frontend{ return htmlspecialchars($image); } - return "https://4get.ca/proxy?i=" . urlencode($image) . "&s=" . $format; + return "/proxy?i=" . urlencode($image) . "&s=" . $format; } public function htmlnextpage($gets, $npt, $page){ diff --git a/scraper/sepiasearch.php b/scraper/sepiasearch.php new file mode 100644 index 0000000..650116b --- /dev/null +++ b/scraper/sepiasearch.php @@ -0,0 +1,541 @@ +backend = new backend("sepiasearch"); + } + + public function getfilters($page){ + + return [ + "nsfw" => [ + "display" => "NSFW", + "option" => [ + "yes" => "Yes", // &sensitiveContent=both + "no" => "No" // &sensitiveContent=false + ] + ], + "language" => [ + "display" => "Language", // &language= + "option" => [ + "any" => "Any language", + "en" => "English", + "fr" => "Français", + "ar" => "العربية", + "ca" => "Català", + "cs" => "Čeština", + "de" => "Deutsch", + "el" => "ελληνικά", + "eo" => "Esperanto", + "es" => "Español", + "eu" => "Euskara", + "fa" => "فارسی", + "fi" => "Suomi", + "gd" => "Gàidhlig", + "gl" => "Galego", + "hr" => "Hrvatski", + "hu" => "Magyar", + "is" => "Íslenska", + "it" => "Italiano", + "ja" => "日本語", + "kab" => "Taqbaylit", + "nl" => "Nederlands", + "no" => "Norsk", + "oc" => "Occitan", + "pl" => "Polski", + "pt" => "Português (Brasil)", + "pt-PT" => "Português (Portugal)", + "ru" => "Pусский", + "sk" => "Slovenčina", + "sq" => "Shqip", + "sv" => "Svenska", + "th" => "ไทย", + "tok" => "Toki Pona", + "tr" => "Türkçe", + "uk" => "украї́нська мо́ва", + "vi" => "Tiếng Việt", + "zh-Hans" => "简体中文(中国)", + "zh-Hant" => "繁體中文(台灣)" + ] + ], + "type" => [ + "display" => "Result type", // i handle this + "option" => [ + "videos" => "Videos", + "playlists" => "Playlists", + "channels" => "Channels" + ] + ], + "sort" => [ + "display" => "Sort by", + "option" => [ + "best" => "Best match", // no filter + "-publishedAt" => "Newest", // sort=-publishedAt + "publishedAt" => "Oldest" // sort=publishedAt + ] + ], + "newer" => [ // &startDate=2025-07-26T04:00:00.000Z + "display" => "Newer than", + "option" => "_DATE" + ], + "duration" => [ + "display" => "Duration", + "option" => [ + "any" => "Any duration", + "short" => "Short (0-4mins)", // &durationRange=short + "medium" => "Medium (4-10 mins)", + "long" => "Long (10+ mins)", + ] + ], + "category" => [ + "display" => "Category", // &categoryOneOf[]= + "option" => [ + "any" => "Any category", + "1" => "Music", + "2" => "Films", + "3" => "Vehicles", + "4" => "Art", + "5" => "Sports", + "6" => "Travels", + "7" => "Gaming", + "8" => "People", + "9" => "Comedy", + "10" => "Entertainment", + "11" => "News & Politics", + "12" => "How To", + "13" => "Education", + "14" => "Activism", + "15" => "Science & Technology", + "16" => "Animals", + "17" => "Kids", + "18" => "Food" + ] + ], + "display" => [ + "display" => "Display", + "option" => [ + "any" => "Everything", + "true" => "Live videos", // &isLive=true + "false" => "VODs" // &isLive=false + ] + ], + "license" => [ + "display" => "License", // &license= + "option" => [ + "any" => "Any license", + "1" => "Attribution", + "2" => "Attribution - Share Alike", + "3" => "Attribution - No Derivatives", + "4" => "Attribution - Non Commercial", + "5" => "Attribution - Non Commercial - Share Alike", + "6" => "Attribution - Non Commercial - No Derivatives", + "7" => "Public Domain Dedication" + ] + ] + ]; + } + + private function get($proxy, $url, $get = []){ + + $curlproc = curl_init(); + + if($get !== []){ + $get = http_build_query($get); + $url .= "?" . $get; + } + + curl_setopt($curlproc, CURLOPT_URL, $url); + curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding + + curl_setopt( + $curlproc, + CURLOPT_HTTPHEADER, + ["User-Agent: " . config::USER_AGENT, + "Accept: application/json, text/plain, */*", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip, deflate, br, zstd", + "DNT: 1", + "Sec-GPC: 1", + "Connection: keep-alive", + "Referer: https://sepiasearch.org/search", + "Sec-Fetch-Dest: empty", + "Sec-Fetch-Mode: cors", + "Sec-Fetch-Site: same-origin", + "Priority: u=0", + "TE: trailers"] + ); + + curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); + curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); + curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); + + $this->backend->assign_proxy($curlproc, $proxy); + + $data = curl_exec($curlproc); + + if(curl_errno($curlproc)){ + + throw new Exception(curl_error($curlproc)); + } + + curl_close($curlproc); + return $data; + } + + public function video($get){ + + if($get["npt"]){ + + [$npt, $proxy] = + $this->backend + ->get( + $get["npt"], + "videos" + ); + + $npt = json_decode($npt, true); + $type = $npt["type"]; + $npt = $npt["npt"]; + }else{ + + $proxy = $this->backend->get_ip(); + + $npt = [ + "search" => $get["s"], + "start" => 0, + "count" => 20 + ]; + + if($get["type"] == "videos"){ + + // + // Parse video filters + // + switch($get["nsfw"]){ + + case "yes": $npt["nsfw"] = "both"; break; + case "no": $npt["nsfw"] = "false"; break; + } + + $npt["boostLanguages[]"] = "en"; + if($get["language"] != "any"){ + + $npt["languageOneOf[]"] = $get["language"]; + } + + if($get["sort"] != "best"){ + + $npt["sort"] = $get["sort"]; + } + + if($get["newer"] !== false){ + + $date = new DateTime("@{$get["newer"]}"); + $date->setTimezone(new DateTimeZone("UTC")); + $formatted = $date->format("Y-m-d\TH:i:s.000\Z"); + + $npt["startDate"] = $formatted; + } + + switch($get["duration"]){ + + case "short": + $npt["durationMax"] = 240; + break; + + case "medium": + $npt["durationMin"] = 240; + $npt["durationMax"] = 600; + break; + + case "long": + $npt["durationMin"] = 600; + break; + } + + if($get["category"] != "any"){ + + $npt["categoryOneOf[]"] = $get["category"]; + } + + if($get["display"] != "any"){ + + $npt["isLive"] = $get["display"]; + } + + if($get["license"] != "any"){ + + // typo in license, lol + $npt["licenceOneOf[]"] = $get["license"]; + } + } + + $type = $get["type"]; + } + + switch($type){ + + case "videos": + $url = "https://sepiasearch.org/api/v1/search/videos"; + break; + + case "channels": + $url = "https://sepiasearch.org/api/v1/search/video-channels"; + break; + + case "playlists": + $url = "https://sepiasearch.org/api/v1/search/video-playlists"; + break; + } + + //$json = file_get_contents("scraper/sepia.json"); + try{ + + $json = + $this->get( + $proxy, + $url, + $npt + ); + }catch(Exception $error){ + + throw new Exception("Failed to fetch JSON"); + } + + $json = json_decode($json, true); + + if($json === null){ + + throw new Exception("Failed to parse JSON"); + } + + if(isset($json["errors"])){ + + $msg = []; + foreach($json["errors"] as $error){ + + if(isset($error["msg"])){ + + $msg[] = $error["msg"]; + } + } + + throw new Exception("Sepia Search returned error(s): " . implode(", ", $msg)); + } + + if(!isset($json["data"])){ + + throw new Exception("Sepia Search did not return a data object"); + } + + $out = [ + "status" => "ok", + "npt" => null, + "video" => [], + "author" => [], + "livestream" => [], + "playlist" => [], + "reel" => [] + ]; + + + switch($get["type"]){ + + case "videos": + foreach($json["data"] as $video){ + + if(count($video["account"]["avatars"]) !== 0){ + + $avatar = + $video["account"]["avatars"][count($video["account"]["avatars"]) - 1]["url"]; + }else{ + + $avatar = null; + } + + if($video["thumbnailUrl"] === null){ + + $thumb = [ + "ratio" => null, + "url" => null + ]; + }else{ + + $thumb = [ + "ratio" => "16:9", + "url" => $video["thumbnailUrl"] + ]; + } + + if($video["isLive"]){ + + $append = "livestream"; + }else{ + + $append = "video"; + } + + $out[$append][] = [ + "title" => $video["name"], + "description" => + $this->limitstrlen( + $this->titledots( + $video["description"] + ) + ), + "author" => [ + "name" => $video["account"]["displayName"] . " ({$video["account"]["name"]})", + "url" => $video["account"]["url"], + "avatar" => $avatar + ], + "date" => strtotime($video["publishedAt"]), + "duration" => $video["isLive"] ? "_LIVE" : $video["duration"], + "views" => $video["views"], + "thumb" => $thumb, + "url" => $video["url"] + ]; + } + break; + + case "playlists": + foreach($json["data"] as $playlist){ + + if(count($playlist["ownerAccount"]["avatars"]) !== 0){ + + $avatar = + $playlist["ownerAccount"]["avatars"][count($playlist["ownerAccount"]["avatars"]) - 1]["url"]; + }else{ + + $avatar = null; + } + + if($playlist["thumbnailUrl"] === null){ + + $thumb = [ + "ratio" => null, + "url" => null + ]; + }else{ + + $thumb = [ + "ratio" => "16:9", + "url" => $playlist["thumbnailUrl"] + ]; + } + + $out["playlist"][] = [ + "title" => $playlist["displayName"], + "description" => + $this->limitstrlen( + $this->titledots( + $playlist["description"] + ) + ), + "author" => [ + "name" => $playlist["ownerAccount"]["displayName"] . " ({$playlist["ownerAccount"]["name"]})", + "url" => $playlist["ownerAccount"]["url"], + "avatar" => $avatar + ], + "date" => strtotime($playlist["createdAt"]), + "duration" => $playlist["videosLength"], + "views" => null, + "thumb" => $thumb, + "url" => $playlist["url"] + ]; + } + break; + + case "channels": + foreach($json["data"] as $channel){ + + if(count($channel["avatars"]) !== 0){ + + $thumb = [ + "ratio" => "1:1", + "url" => $channel["avatars"][count($channel["avatars"]) - 1]["url"] + ]; + }else{ + + $thumb = [ + "ratio" => null, + "url" => null + ]; + } + + $out["author"][] = [ + "title" => $channel["displayName"] . " ({$channel["name"]})", + "followers" => $channel["followersCount"], + "description" => + $channel["videosCount"] . " videos. " . + $this->limitstrlen( + $this->titledots( + $channel["description"] + ) + ), + "thumb" => $thumb, + "url" => $channel["url"] + ]; + } + break; + } + + // get next page + if($json["total"] - 20 > $npt["start"]){ + + $npt["start"] += 20; + + $npt = [ + "type" => $get["type"], + "npt" => $npt + ]; + + $out["npt"] = + $this->backend + ->store( + json_encode($npt), + "videos", + $proxy + ); + } + + return $out; + } + + private function titledots($title){ + + $substr = substr($title, -3); + + if( + $substr == "..." || + $substr == "…" + ){ + + return trim(substr($title, 0, -3), " \n\r\t\v\x00\0\x0B\xc2\xa0"); + } + + return trim($title, " \n\r\t\v\x00\0\x0B\xc2\xa0"); + } + + private function limitstrlen($text){ + + return + explode( + "\n", + wordwrap( + str_replace( + ["\n\r", "\r\n", "\n", "\r"], + " ", + $text + ), + 300, + "\n" + ), + 2 + )[0]; + } +} \ No newline at end of file diff --git a/settings.php b/settings.php index 7bc39b7..96a9400 100644 --- a/settings.php +++ b/settings.php @@ -265,6 +265,10 @@ $settings = [ "value" => "yt", "text" => "YouTube" ], + [ + "value" => "sepiasearch", + "text" => "Sepia Search" + ], [ "value" => "ddg", "text" => "DuckDuckGo"